@article {Schneider-Luftman569715, author = {D. Schneider-Luftman and W. R. Crum}, title = {BioBankRead: Data pre-processing in Python for UKBiobank clinical data}, elocation-id = {569715}, year = {2019}, doi = {10.1101/569715}, publisher = {Cold Spring Harbor Laboratory}, abstract = {Motivation UKBiobank collects health-related data from 500,000 volunteers and is widely used by medical researchers. However, the data is supplied in a custom compressed format and its preparation for analysis is cumbersome and time-consuming. This Python package automates the extraction of selected UKBiobank data, for easy integration in an analysis pipeline.Features The functions provided within this Python package reduce the number of steps, as well as human and computational time, required for extraction and preparation of the data for analysis. It is executable through command line, is easily installed on any platform and requires no prior knowledge of Python.Application BiobankRead is used for an analysis of dietary lifestyles and cardio-vascular outcomes. A large range of dietary, phenotypical, lifestyle and vascular outcomes is extracted and pre-processed. Significant associations are found between non-meat-eating and lower blood pressure / reduced risk of hypertension.Availability The Python package BiobankRead is freely available under the GNU General Public License (version 3 or later). It can be downloaded from GitHub (https://github.com/saphir746/BiobankRead-Bash), where example scripts and detailed instructions are also available.}, URL = {https://www.biorxiv.org/content/early/2019/04/26/569715}, eprint = {https://www.biorxiv.org/content/early/2019/04/26/569715.full.pdf}, journal = {bioRxiv} }