@article {Deschamps-Francoeur477869, author = {Gabrielle Deschamps-Francoeur and Vincent Boivin and Sherif Abou Elela and Michelle S Scott}, title = {CoCo: RNA-seq Read Assignment Correction for Nested Genes and Multimapped Reads}, elocation-id = {477869}, year = {2018}, doi = {10.1101/477869}, publisher = {Cold Spring Harbor Laboratory}, abstract = {Motivation Next generation sequencing techniques revolutionized the study of RNA expression by permitting whole transcriptome analysis. However, sequencing reads generated from nested and multi-copy genes are often either misassigned or discarded, which greatly reduces both quantification accuracy and gene coverage.Results Here we present CoCo, a read assignment pipeline that takes into account the multitude of overlapping and repetitive genes in the transcriptome of higher eukaryotes. CoCo uses a modified annotation file that highlights nested genes and proportionally distributes multimapped reads between repeated sequences. CoCo salvages over 15\% of discarded aligned RNA-seq reads and significantly changes the abundance estimates for both coding and non-coding RNA as validated by PCR and bed-graph comparisons.Availability The CoCo software is an open source package written in Python and available from http://gitlabscottgroup.med.usherbrooke.ca/scott-group/coco.Contact michelle.scott{at}usherbrooke.ca}, URL = {https://www.biorxiv.org/content/early/2018/11/29/477869}, eprint = {https://www.biorxiv.org/content/early/2018/11/29/477869.full.pdf}, journal = {bioRxiv} }