@article {McIntyre156919, author = {Alexa B. R. McIntyre and Rachid Ounit and Ebrahim Afshinnekoo and Robert J. Prill and Elizabeth H{\'e}naff and Noah Alexander and Sam Minot and David Danko and Jonathan Foox and Sofia Ahsanuddin and Scott Tighe and Nur A. Hasan and Poorani Subramanian and Kelly Moffat and Shawn Levy and Stefano Lonardi and Nick Greenfield and Rita R. Colwell and Gail L. Rosen and Christopher E. Mason}, title = {Comprehensive Benchmarking and Ensemble Approaches for Metagenomic Classifiers}, elocation-id = {156919}, year = {2017}, doi = {10.1101/156919}, publisher = {Cold Spring Harbor Laboratory}, abstract = {Background One of the main challenges in metagenomics is the identification of microorganisms in clinical and environmental samples. While an extensive and heterogeneous set of computational tools is available to classify microorganisms using whole genome shotgun sequencing data, comprehensive comparisons of these methods are limited. In this study, we use the largest (n=35) to date set of laboratory-generated and simulated controls across 846 species to evaluate the performance of eleven metagenomics classifiers. We also assess the effects of filtering and combining tools to reduce the number of false positives.Results Tools were characterized on the basis of their ability to (1) identify taxa at the genus, species, and strain levels, (2) quantify relative abundance measures of taxa, and (3) classify individual reads to the species level. Strikingly, the number of species identified by the eleven tools can differ by over three orders of magnitude on the same datasets. However, various strategies can ameliorate taxonomic misclassification, including abundance filtering, ensemble approaches, and tool intersection. Indeed, leveraging tools with different heuristics is beneficial for improved precision. Nevertheless, these strategies were often insufficient to completely eliminate false positives from environmental samples, which are especially important where they concern medically relevant species and where customized tools may be required.Conclusions The results of this study provide positive controls, titrated standards, and a guide for selecting tools for metagenomic analyses by comparing ranges of precision and recall. We show that proper experimental design and analysis parameters, including depth of sequencing, choice of classifier or classifiers, database size, and filtering, can reduce false positives, provide greater resolution of species in complex metagenomic samples, and improve the interpretation of results.}, URL = {https://www.biorxiv.org/content/early/2017/06/28/156919.1}, eprint = {https://www.biorxiv.org/content/early/2017/06/28/156919.1.full.pdf}, journal = {bioRxiv} }