@article {Kaushik074708, author = {Gaurav Kaushik and Sinisa Ivkovic and Janko Simonovic and Nebojsa Tijanic and Brandi Davis-Dusenbery and Kural Deniz}, title = {Graph Theory Approaches for Optimizing Biomedical Data Analysis Using Reproducible Workflows}, elocation-id = {074708}, year = {2016}, doi = {10.1101/074708}, publisher = {Cold Spring Harbor Laboratory}, abstract = {As biomedical data becomes increasingly easy to generate in large quantities, the methods used to analyze it have proliferated rapidly. However, for the insights gained from these analyses to be meaningful, the analysis methods themselves must be transparent and reproducible. To address this issue, numerous groups have developed workflow standards or specifications that provide a common framework with which to describe a given analysis method. The diversity of methods demands that the specification be robust and flexible to accurately describe a specific biomedical data analysis. However, a powerful specification alone is insufficient to drive reproducible analysis {\textendash} even the most completely described workflow must also be {\textquoteleft}runnable{\textquoteright} on diverse architectures. Thus the complete reproducible workflow ecosystem includes one or more well defined workflow definitions or specifications as well as the software components needed to implement these specifications. Such implementations allow adaptation to diverse environments, provide optimizations to workflow execution, improve computing efficiency, and support reproducibility through task logging and provenance. To meet these requirements, we have developed the Rabix Executor, an open-source workflow engine which utilizes graph theory approaches to enable computational optimization of complex, dynamic workflows, and supports reproducibility in biomedical data analysis.}, URL = {https://www.biorxiv.org/content/early/2016/09/12/074708}, eprint = {https://www.biorxiv.org/content/early/2016/09/12/074708.full.pdf}, journal = {bioRxiv} }