@article {Shah075416, author = {Naisha Shah and Ying-Chen Claire Hou and Hung-Chun Yu and Rachana Sainger and Eric Dec and Brad Perkins and C. Thomas Caskey and J. Craig Venter and Amalio Telenti}, title = {Identification of misclassified ClinVar variants using disease population prevalence}, elocation-id = {075416}, year = {2016}, doi = {10.1101/075416}, publisher = {Cold Spring Harbor Laboratory}, abstract = {There is a significant interest in the standardized classification of human genetic variants. The availability of new large datasets generated through genome sequencing initiatives provides a ground for the computational evaluation of the supporting evidence. We used whole genome sequence data from 8,102 unrelated individuals to analyze the adequacy of estimated rates of disease on the basis of genetic risk and the expected population prevalence of the disease. Analyses included the ACMG recommended 56 gene-condition sets for incidental findings and 631 genes associated with 348 OrphaNet conditions. A total of 21,004 variants were used to identify patterns of inflation (i.e. excess genetic risk). Inflation, i.e., misclassification, increases as the level of evidence in ClinVar supporting the pathogenic nature of the variant decreases. The burden of rare variants was a main contributing factor of the observed inflation indicating misclassified benign private mutations. We also analyzed the dynamics of re-classification of variant pathogenicity in ClinVar over time. The study strongly suggests that ClinVar includes a significant proportion of wrongly ascertained variants, and underscores the critical role of ClinVar to contrast claims, and foster validation across submitters.}, URL = {https://www.biorxiv.org/content/early/2016/09/15/075416}, eprint = {https://www.biorxiv.org/content/early/2016/09/15/075416.full.pdf}, journal = {bioRxiv} }