@article {Saladi098673, author = {Shyam M. Saladi and Nauman Javed and Axel M{\"u}ller and William M. Clemons, Jr.}, title = {Decoding sequence-level information to predict membrane protein expression}, elocation-id = {098673}, year = {2017}, doi = {10.1101/098673}, publisher = {Cold Spring Harbor Laboratory}, abstract = {The expression of integral membrane proteins (IMPs) remains a major bottleneck in the characterization of this important protein class. IMP expression levels are currently unpredictable, which renders the pursuit of IMPs for structural and biophysical characterization challenging and inefficient. Experimental evidence demonstrates that changes within the nucleotide or amino-acid sequence for a given IMP can dramatically affect expression; yet these observations have not resulted in generalizable approaches to improved expression. Here, we develop a data-driven statistical predictor named IMProve, that, using only sequence information, increases the likelihood of selecting an IMP that expresses in E. coli. The IMProve model, trained on experimental data, combines a set of sequence-derived features resulting in an IMProve score, where higher values have a higher probability of success. The model is rigorously validated against a variety of independent datasets that contain a wide range of experimental outcomes from various IMP expression trials. The results demonstrate that use of the model can more than double the number of successfully expressed targets at any experimental scale. IMProve can immediately be used to identify favorable targets for characterization.}, URL = {https://www.biorxiv.org/content/early/2017/11/06/098673}, eprint = {https://www.biorxiv.org/content/early/2017/11/06/098673.full.pdf}, journal = {bioRxiv} }