@article {Zhang709683, author = {Henry B. Zhang and Minji Kim and Jeffrey H. Chuang and Yijun Ruan}, title = {pyBedGraph: a Python package for fast operations on 1-dimensional genomic signal tracks}, elocation-id = {709683}, year = {2019}, doi = {10.1101/709683}, publisher = {Cold Spring Harbor Laboratory}, abstract = {Motivation Modern genomic research relies heavily on next-generation sequencing experiments such as ChIP-seq and ChIA-PET that generate coverage files for transcription factor binding, as well as DHS and ATAC-seq that yield coverage files for chromatin accessibility. Such files are in a bedGraph text format or a bigWig binary format. Obtaining summary statistics in a given region is a fundamental task in analyzing protein binding intensity or chromatin accessibility. However, the existing Python package for operating on coverage files is not optimized for speed.Results We developed pyBedGraph, a Python package to quickly obtain summary statistics for a given interval in a bedGraph file. When tested on 8 ChIP-seq and ATAC-seq datasets, pyBedGraph is on average 245 times faster than the existing program. Notably, pyBedGraph can look up the exact mean signal of 1 million regions in ~0.26 second on a conventional laptop. An approximate mean for 10,000 regions can be computed in ~0.0012 second with an error rate of less than 5 percent.Availability pyBedGraph is publicly available at https://github.com/TheJacksonLaboratory/pyBedGraph under the MIT license.}, URL = {https://www.biorxiv.org/content/early/2019/07/20/709683}, eprint = {https://www.biorxiv.org/content/early/2019/07/20/709683.full.pdf}, journal = {bioRxiv} }