@article {Naslavsky2020.09.15.298026, author = {Michel S. Naslavsky and Marilia O. Scliar and Guilherme L. Yamamoto and Jaqueline Yu Ting Wang and Stepanka Zverinova and Tatiana Karp and Kelly Nunes and Jos{\'e} Ricardo Magliocco Ceroni and Diego Lima de Carvalho and Carlos Eduardo da Silva Sim{\~o}es and Daniel Bozoklian and Ricardo Nonaka and Nayane dos Santos Brito Silva and Andreia da Silva Souza and Helo{\'\i}sa de Souza Andrade and Mar{\'\i}lia Rodrigues Silva Passos and Camila Ferreira Bannwart Castro and Celso T. Mendes-Junior and Rafael L. V. Mercuri and Thiago L. A. Miller and Jose Leonel Buzzo and Fernanda O. Rego and Nathalia M Ara{\'u}jo and Wagner CS Magalh{\~a}es and Regina C{\'e}lia Mingroni-Netto and Victor Borda and Heinner Guio and Mauricio L Barreto and Maria Fernanda Lima-Costa and Bernardo L Horta and Eduardo Tarazona-Santos and Diogo Meyer and Pedro A. F. Galante and Victor Guryev and Erick C. Castelli and Yeda A. O. Duarte and Maria Rita Passos-Bueno and Mayana Zatz}, title = {Whole-genome sequencing of 1,171 elderly admixed individuals from the largest Latin American metropolis (S{\~a}o Paulo, Brazil)}, elocation-id = {2020.09.15.298026}, year = {2020}, doi = {10.1101/2020.09.15.298026}, publisher = {Cold Spring Harbor Laboratory}, abstract = {As whole-genome sequencing (WGS) becomes the gold standard tool for studying population genomics and medical applications, data on diverse non-European and admixed individuals are still scarce. Here, we present a high-coverage WGS dataset of 1,171 highly admixed elderly Brazilians from a census-based cohort, providing over 76 million variants, of which ~2 million are absent from large public databases. WGS enabled identifying ~2,000 novel mobile element insertions, nearly 5Mb of genomic segments absent from human genome reference, and over 140 novel alleles from HLA genes. We reclassified and curated nearly four hundred variant{\textquoteright}s pathogenicity assertions in genes associated with dominantly inherited Mendelian disorders and calculated the incidence for selected recessive disorders, demonstrating the clinical usefulness of the present study. Finally, we observed that whole-genome and HLA imputation could be significantly improved compared to available datasets since rare variation represents the largest proportion of input from WGS. These results demonstrate that even smaller sample sizes of underrepresented populations bring relevant data for genomic studies, especially when exploring analyses allowed only by WGS.Competing Interest StatementThe authors have declared no competing interest.}, URL = {https://www.biorxiv.org/content/early/2020/09/16/2020.09.15.298026}, eprint = {https://www.biorxiv.org/content/early/2020/09/16/2020.09.15.298026.full.pdf}, journal = {bioRxiv} }