@article {Nguyen534628, author = {Thin Nguyen and Samuel C. Lee and Thomas P. Quinn and Buu Truong and Xiaomei Li and Truyen Tran and Svetha Venkatesh and Thuc Duy Le}, title = {Personalized Annotation-based Networks (PAN) for the Prediction of Breast Cancer Relapse}, elocation-id = {534628}, year = {2019}, doi = {10.1101/534628}, publisher = {Cold Spring Harbor Laboratory}, abstract = {The classification of clinical samples based on gene expression data is an important part of precision medicine. However, it is not a trivial task and it is difficult to accurately predict survival outcomes and treatment responses despite advancements in the field. In this manuscript, we show how transforming gene expression data into a set of personalized (sample-specific) networks can allow us to harness existing graph-based methods to improve classifier performance. Existing approaches to personalized gene networks, based on protein-protein interactions (PPI) or population-level models, all have the limitation that they depend on other samples in the data and must get re-computed whenever a new sample is introduced. Here, we propose a novel method, called Personalized Annotation-based Networks (PAN), that avoids this limitation by using curated annotation databases to transform gene expression data into a graph. These databases organize genes into overlapping gene sets, called annotations, that we use to build a network where nodes represent functional terms and edges represent the similarity between them. Unlike competing methods, PANs are calculated for each sample independent of the population, making it a more general solution to the single-sample network problem. Using two breast cancer datasets as a case study (METABRIC and a super-set of GEO studies), we show that PAN classifiers not only predict cancer relapse better than gene features alone, but also outperform PPI and population-level graph-based classifiers. This work demonstrates the practical advantages of graph-based classification for high-dimensional genomic data, while offering a new approach to making sample-specific networks.}, URL = {https://www.biorxiv.org/content/early/2019/01/29/534628}, eprint = {https://www.biorxiv.org/content/early/2019/01/29/534628.full.pdf}, journal = {bioRxiv} }