@article {Davydov174839, author = {Iakov I. Davydov and Nicolas Salamin and Marc Robinson-Rechavi}, title = {Modeling Codon Rate Variation Improves Protein Positive Selection Inference and Detects Nucleotide Selection}, elocation-id = {174839}, year = {2017}, doi = {10.1101/174839}, publisher = {Cold Spring Harbor Laboratory}, abstract = {There are numerous sources of variation in the rate of synonymous substitutions inside genes, such as direct selection on the nucleotide sequence, or mutation rate variation. However the majority of the codon models which are developed and widely used today still incorporate an assumption of effectively neutral synonymous substitution rate, constant between sites of each gene. Here we propose a simple yet effective extension to codon models, which incorporates codon substation rate variation along the gene sequence. We assess the performance of our approach in simulations and on real data. We find strong effects of nucleotide rate variation on positive selection inference, both under models with variation of protein selection and with branch-site variation of protein selection. We also demonstrate that the computational load of our approach remains tractable, and therefore we are able to apply it to genome scale positive selection scans. We apply our new method to two datasets: 767 vertebrate orthologs and 8,606 orthologs from twelve Drosophila species. We demonstrate that our new model is strongly favored by the data, and the support of the model increases with the amount of information. Moreover, it is able to capture signatures of nucleotide level selection acting on translation initiation and on splicing sites within the coding region. Finally, we show that rate variation is highest in the highly recombining regions, and we hypothesize that recombination and mutation rate variation, such as high CpG mutation rate, are the two main sources of nucleotide rate variation. Overall, nucleotide rate variation in substitutions is an important feature to capture, both to detect positive selection and to understand gene evolution, and the approach that we propose allows to do this in genome-wide scans.}, URL = {https://www.biorxiv.org/content/early/2017/08/11/174839}, eprint = {https://www.biorxiv.org/content/early/2017/08/11/174839.full.pdf}, journal = {bioRxiv} }