@article {Martin-Duran050724, author = {Jose M Martin-Duran and Joseph F Ryan and Bruno Cossermelli Cossermelli Vellutini and Kevin Pang and Andreas Hejnol}, title = {A novel approach using increased taxon sampling reveals thousands of hidden orthologs in flatworms}, elocation-id = {050724}, year = {2016}, doi = {10.1101/050724}, publisher = {Cold Spring Harbor Laboratory}, abstract = {Gene gains and losses shape the gene complement of animal lineages and are a fundamental aspect of genomic evolution. Acquiring a comprehensive view of the evolution of gene repertoires is however limited by the intrinsic limitations of common sequence similarity searches and available databases. Thus, a subset of the complement of an organism consists of hidden orthologs, those with no apparent homology with common sequenced animal lineages --mistakenly considered new genes-- but actually representing fast evolving orthologs of presumably lost proteins. Here, we describe {\textquoteright}Leapfrog{\textquoteright}, an automated pipeline that uses increased taxon sampling to overcome long evolutionary distances and identify hidden orthologs in large transcriptomic databases. As a case study, we used 35 transcriptomes of 29 flatworm lineages to recover 3,597 hidden orthologs. Unexpectedly, we do not observe a correlation between the number of hidden orthologs in a lineage and its {\textquoteright}average{\textquoteright} evolutionary rate. Hidden orthologs do not show unusual sequence composition biases (e.g. GC content, average length, domain composition), but do appear to be more common in genes with binding or catalytic activity. By using {\textquoteright}Leapfrog{\textquoteright}, we identify key centrosome-related genes and homeodomain classes previously reported as absent in free-living flatworms, e.g. planarians. Altogether, our findings demonstrate that hidden orthologs comprise a significant proportion of the gene repertoire, qualifying the impact of gene losses and gains in gene complement evolution.}, URL = {https://www.biorxiv.org/content/early/2016/04/28/050724}, eprint = {https://www.biorxiv.org/content/early/2016/04/28/050724.full.pdf}, journal = {bioRxiv} }