@article {Jones2020.04.16.038703, author = {Martin R. Jones and Ernani Pinto and Mariana A. Torres and Fabiane D{\"o}rr and Hanna Mazur-Marzec and Karolina Szubert and Luciana Tartaglione and Carmela Dell{\textquoteright}Aversano and Christopher O. Miles and Daniel G. Beach and Pearse McCarron and Kaarina Sivonen and David P. Fewer and Jouni Jokela and Elisabeth M.-L. Janssen}, title = {Comprehensive database of secondary metabolites from cyanobacteria}, elocation-id = {2020.04.16.038703}, year = {2020}, doi = {10.1101/2020.04.16.038703}, publisher = {Cold Spring Harbor Laboratory}, abstract = {Cyanobacteria form harmful mass blooms in freshwater and marine environments around the world. A range of secondary metabolites has been identified from cultures of cyanobacteria and biomass collected from cyanobacterial bloom events. A comprehensive database is necessary to correctly identify cyanobacterial metabolites and advance research on their abundance, persistence and toxicity in natural environments. We consolidated open access databases and manually curated missing information from the literature published between 1970 and March 2020. The result is the database CyanoMetDB, which includes more than 2000 entries based on more than 750 literature references. This effort has more than doubled the total number of entries with complete literature metadata and structural composition (SMILES codes) compared to publicly available databases to this date. Over the past decade, more than one hundred additional secondary metabolites have been identified yearly. We organized all entries into structural classes and conducted substructure searches of the provided SMILES codes. This approach demonstrated, for example, that 65\% of the compounds carry at least one peptide bond, 57\% are cyclic compounds, and 30\% carry at least one halogen atom. Structural searches by SMILES code can be further specified to identify structural motifs that are relevant for analytical approaches, research on biosynthetic pathways, bioactivity-guided analysis, or to facilitate predictive science and modeling efforts on cyanobacterial metabolites. This database facilitates rapid identification of cyanobacterial metabolites from toxic blooms, research on the biosynthesis of cyanobacterial natural products, and the identification of novel natural products from cyanobacteria. Competing Interest StatementThe authors have declared no competing interest.}, URL = {https://www.biorxiv.org/content/early/2020/04/16/2020.04.16.038703}, eprint = {https://www.biorxiv.org/content/early/2020/04/16/2020.04.16.038703.full.pdf}, journal = {bioRxiv} }