@article {Torshizi2020.05.28.121483, author = {Abolfazl Doostparast Torshizi and Jubao Duan and Kai Wang}, title = {A computational tool for direct inference of cell-specific expression profiles and cellular composition from bulk-tissue RNA-seq in brain disorders}, elocation-id = {2020.05.28.121483}, year = {2020}, doi = {10.1101/2020.05.28.121483}, publisher = {Cold Spring Harbor Laboratory}, abstract = {The importance of cell type-specific gene expression in disease-relevant tissues is increasingly recognized in genetic studies of complex diseases. However, the vast majority of gene expression studies are conducted in bulk tissues, necessitating computational approaches to infer novel biological insights on cell type-specific contribution to diseases. We introduce CellR, a novel computational method that uses external single cell RNA-seq (scRNA-seq) data to infer cell-specific expression profiles from bulk RNA-seq data as well as deconvolving bulk-tissue RNA-Seq data to estimate their cellular compositions. CellR addresses cross-individual gene expression variations by employing genome-wide tissue-wise expression signatures from GTEx to adjust the weights of cell-specific gene markers. It then transforms the deconvolution problem into a linear programming model while taking into account inter/intra cellular correlations. We have developed a multi-variate stochastic search algorithm to estimate the expression level of each gene in each cell type. Extensive analyses on several complex diseases such as schizophrenia, Alzheimer{\textquoteright}s disease, Huntington disease, and type-2 diabetes validated efficiency of CellR, while revealing how specific cell types contribute to different diseases. We conducted numerical simulations on human cerebellum to generate pseudo-bulk RNA-seq data and demonstrated efficiency of CellR in inferring cell-specific expression profiles directly from bulk data. Moreover, we inferred cell-specific expression levels from bulk RNA-seq data on schizophrenia and computed differentially expressed genes within certain cell-types. Next, using predicted gene expression profile on excitatory neurons, we were able to reproduce our recently published findings on TCF4 being a master regulator in schizophrenia via bulk tissue analysis, but with higher statistical significance, suggesting that cell-type-specific re-analysis of bulk RNA-Seq data can greatly improve understanding of complex brain disorders. In summary, CellR compares favorably (both accuracy and stability of inference) against competing approaches to infer cellular composition from bulk RNA-seq data, and allows direct inference of cell type-specific gene expression, opening new doors to re-analyze vast amounts of gene expression data sets on bulk tissues in many complex diseases.Competing Interest StatementThe authors have declared no competing interest.}, URL = {https://www.biorxiv.org/content/early/2020/05/31/2020.05.28.121483}, eprint = {https://www.biorxiv.org/content/early/2020/05/31/2020.05.28.121483.full.pdf}, journal = {bioRxiv} }