@article {Bhattacharjee2020.05.28.122143, author = {Bornali Bhattacharjee and Bhaswati Pandit}, title = {Phylogenetic clustering of the Indian SARS-CoV-2 genomes reveals the presence of distinct clades of viral haplotypes among states}, elocation-id = {2020.05.28.122143}, year = {2020}, doi = {10.1101/2020.05.28.122143}, publisher = {Cold Spring Harbor Laboratory}, abstract = {The first Indian cases of COVID-19 caused by SARS-Cov-2 were reported in February 29, 2020 with a history of travel from Wuhan, China and so far above 4500 deaths have been attributed to this pandemic. The objectives of this study were to characterize Indian SARS-CoV-2 genome-wide nucleotide variations, trace ancestries using phylogenetic networks and correlate state-wise distribution of viral haplotypes with differences in mortality rates. A total of 305 whole genome sequences from 19 Indian states were downloaded from GISAID. Sequences were aligned using the ancestral Wuhan-Hu genome sequence (NC_045512.2). A total of 633 variants resulting in 388 amino acid substitutions were identified. Allele frequency spectrum, and nucleotide diversity (π) values revealed the presence of higher proportions of low frequency variants and negative Tajima{\textquoteright}s D values across ORFs indicated the presence of population expansion. Network analysis highlighted the presence of two major clusters of viral haplotypes, namely, clade G with the S:D614G, RdRp: P323L variants and a variant of clade L [Lv] having the RdRp:A97V variant. Clade G genomes were found to be evolving more rapidly into multiple sub-clusters including clade GH and GR and were also found in higher proportions in three states with highest mortality rates namely, Gujarat, Madhya Pradesh and West Bengal.Competing Interest StatementThe authors have declared no competing interest.}, URL = {https://www.biorxiv.org/content/early/2020/05/28/2020.05.28.122143}, eprint = {https://www.biorxiv.org/content/early/2020/05/28/2020.05.28.122143.full.pdf}, journal = {bioRxiv} }