@article {Patell2020.06.05.124891, author = {Villoo Morawala Patell and Naseer Pasha and Kashyap Krishnasamy and Bharti Mittal and Chellappa Gopalakrishnan and Raja Mugasimangalam and Naveen Sharma and Arati-Khanna Gupta and Perviz Bhote-Patell and Sudha Rao and Renuka Jain and The Avestagenome Project{\textregistered}}, title = {The First complete Zoroastrian-Parsi Mitochondria Reference Genome: Implications of mitochondrial signatures in an endogamous, non-smoking population}, elocation-id = {2020.06.05.124891}, year = {2020}, doi = {10.1101/2020.06.05.124891}, publisher = {Cold Spring Harbor Laboratory}, abstract = {The present-day Zoroastrian-Parsis have roots in ancient pastoralist migrations from circumpolar regions1 leading to their settlement on the Eurasian Steppes2 and later, as Indo Iranians in the Fertile Crescent3. From then, the Achaemenids (550 - 331 BC), and later the Sassanids (224 BC - 642 AD) established the mighty Persian Empires2. The Arab invasion of Persia in 642 AD necessitated the migration of Zoroastrians from Pars to India where they settled as Parsis and practiced their faith, Zoroastrianism. Endogamy became a dogma, and the community has maintained the practice since their arrival in India. Fire is the medium of worship4 as it is considered pure and sacrosanct; Social ostracism practiced against smokers resulted in a non-smoking community, thus forming a unique basis for our study.In order to gain a clearer understanding of the historically recorded migration of the Zoroastrian-Parsis, decipher their phylogenetic relationships and understand disease association to their individual mitochondrial genomes, we generated the first complete de novo Zoroastrian-Parsi Mitochondrial Reference Genome, AGENOME-ZPMS-HV2a-1. Phylogenetic analysis of additional 100 Parsi mitochondrial genome sequences, showed their distribution into 7 major haplogroups and 25 sub-haplogroups and a largely Persian origin for the Parsi community. We have generated individual reference genomes for each major haplogroup and assembled the Zoroastrian Parsi Mitochondrial Consensus Genome (AGENOME-ZPMCG V1.0) for the first time in the world.We report 420 variants, specifically 12 unique mitochondrial variants in the 100 mitochondrial genome sequences compared with the revised Cambridge Reference Sequence (rCRS) standard.Disease association mapping showed 217 unique variants linked to longevity and 41 longevity associated disease phenotypes across most haplogroups. Our results indicate none of the variants are linked to lung cancer. Mutational signatures, C\>A, G\>T transitions36, linked to tobacco carcinogens were found at extremely low frequencies in the Zoroastrian-Parsi cohort.Our analysis of gene-coding, tRNA and the D-Loop regions revealed haplogroup specific disease associations for Parkinson{\textquoteright}s, Alzheimer{\textquoteright}s, Cancers, and Rare diseases.These disease signatures investigated in the backdrop of generations of endogamy, in the rapidly declining, endangered Zoroastrian-Parsi community of India, provides exceptional universal opportunity to understand and mitigate disease.Competing Interest StatementThe authors have declared no competing interest.mtDNA-Mitochondrial DNArCRS-revised Cambridge Reference SequenceNGSNext Generation SequencingZPMSZoroastrian Parsi Mitochondrial SequenceZPMRGZoroastrian Parsi Mitochondrial Reference GenomeZPMCGZoroastrian Parsi Mitochondrial Consensus GenomeADAlzheimers DiseasePDParkinsons Disease}, URL = {https://www.biorxiv.org/content/early/2020/06/08/2020.06.05.124891}, eprint = {https://www.biorxiv.org/content/early/2020/06/08/2020.06.05.124891.full.pdf}, journal = {bioRxiv} }