@article {Assefa677740, author = {Alemu Takele Assefa and Jo Vandesompele and Olivier Thas}, title = {SPsimSeq: semi-parametric simulation of bulk and single cell RNA sequencing data}, elocation-id = {677740}, year = {2019}, doi = {10.1101/677740}, publisher = {Cold Spring Harbor Laboratory}, abstract = {SPsimSeq is a semi-parametric simulation method for bulk and single cell RNA sequencing data. It simulates data from a good estimate of the actual distribution of a given real RNA-seq dataset. In contrast to existing approaches that assume a particular data distribution, our method constructs an empirical distribution of gene expression data from a given source RNA-seq experiment to faithfully capture the data characteristics of real data. Importantly, our method can be used to simulate a wide range of scenarios, such as single or multiple biological groups, systematic variations (e.g. confounding batch effects), and different sample sizes. It can also be used to simulate different gene expression units resulting from different library preparation protocols, such as read counts or UMI counts.Availability and implementation The R package and associated documentation is available from https://github.com/CenterForStatistics-UGent/SPsimSeq.Supplementary information Supplementary data are available at bioRĪ‡iv online.}, URL = {https://www.biorxiv.org/content/early/2019/06/21/677740}, eprint = {https://www.biorxiv.org/content/early/2019/06/21/677740.full.pdf}, journal = {bioRxiv} }