@article {Lai120428, author = {Xianjun Lai and Sairam Behera and Zhikai Liang and Yanli Lu and Jitender S Deogun and James C. Schnable}, title = {STAG-CNS: An Order-Aware Conserved Non-coding Sequences Discovery Tool For Arbitrary Numbers of Species}, elocation-id = {120428}, year = {2017}, doi = {10.1101/120428}, publisher = {Cold Spring Harbor Laboratory}, abstract = {One method for identifying noncoding regulatory regions of a genome is to quantify rates of divergence between related species, as functional sequence will generally diverge more slowly. Most approaches to identifying these conserved noncoding sequences (CNS) based on alignment have had relatively large minimum sequence lengths (⩾15 base pair) compared to the average length of known transcription factor binding sites. To circumvent this constraint, STAG-CNS integrates data from the promoters of conserved orthologous genes in three or more species simultaneously. Using data from up to six grass species made it possible to identify conserved sequences as short at 9 base pairs with FDP ⩽ 0.05. These CNS exhibit greater overlap with open chromatin regions identified using DNase I hypersensitivity, and are enriched in the promoters of genes involved in transcriptional regulation. STAG-CNS was further employed to characterize loss of conserved noncoding sequences associated with retained duplicate genes from the ancient maize polyploidy. Genes with fewer retained CNS show lower overall expression, although this bias is more apparent in samples of complex organ systems containing many cell types, suggesting CNS loss may correspond to a reduced number of expression contexts rather than lower expression levels across the entire ancestral expression domain.}, URL = {https://www.biorxiv.org/content/early/2017/03/24/120428}, eprint = {https://www.biorxiv.org/content/early/2017/03/24/120428.full.pdf}, journal = {bioRxiv} }