@article {Fulcher2020.04.24.058958, author = {Ben D. Fulcher and Aurina Arnatkevi{\v c}iute and Alex Fornito}, title = {Overcoming bias in gene-set enrichment analyses of brain-wide transcriptomic data}, elocation-id = {2020.04.24.058958}, year = {2020}, doi = {10.1101/2020.04.24.058958}, publisher = {Cold Spring Harbor Laboratory}, abstract = {The recent availability of whole-brain atlases of gene expression, which quantify the transcriptional activity of thousands of genes across many different brain regions, has opened new opportunities to understand how gene-expression patterns relate to spatially varying properties of brain structure and function. To aid interpretation of a given neural phenotype, gene-set enrichment analysis (GSEA) has become a standard statistical methodology to identify functionally related groups of genes, annotated using systems such as the Gene Ontology (GO), that are associated with a given phenotype. While GSEA has identified groups of genes related to diverse aspects of brain structure and function in mouse and human, here we show that these results are affected by substantial statistical biases. Quantifying the falsepositive rates of individual GO categories across an ensemble of random phenotypic maps, we found an average 875-fold inflation of significant findings relative to expectation in mouse, and a 582-fold inflation in human, with some categories being judged as significant for over 20\% of random phenotypes. Concerningly, the probability of a GO category being reported as significant in the extant literature increases with its estimated false-positive rate, suggesting that published reports are strongly affected by the reporting of false-positive bias. We show that the bias is primarily driven by within-category gene{\textendash}gene coexpression and spatial autocorrelation, which are not accounted for in conventional GSEA nulls, and we introduce flexible ensemble-based null models that can account for these effects. Testing a range of structural connectivity and cell density phenotypes in mouse and human, we demonstrate that many GO categories that would conventionally be judged as highly significant are in fact consistent with ensembles of random phenotypes. Our results highlight major pitfalls with applying standard GSEA to brain-wide transcriptomic data and outline solutions to this pervasive problem, which is made available as an open toolbox.Competing Interest StatementThe authors have declared no competing interest.}, URL = {https://www.biorxiv.org/content/early/2020/12/07/2020.04.24.058958}, eprint = {https://www.biorxiv.org/content/early/2020/12/07/2020.04.24.058958.full.pdf}, journal = {bioRxiv} }