@article {Bankevich2020.12.10.420448, author = {Anton Bankevich and Andrey Bzikadze and Mikhail Kolmogorov and Pavel A. Pevzner}, title = {Assembling Long Accurate Reads Using de Bruijn Graphs}, elocation-id = {2020.12.10.420448}, year = {2020}, doi = {10.1101/2020.12.10.420448}, publisher = {Cold Spring Harbor Laboratory}, abstract = {Although the de Bruijn graphs represent the basis of many genome assemblers, it remains unclear how to construct these graphs for large genomes and large k-mer sizes. This algorithmic challenge has become particularly important with the emergence of long and accurate high-fidelity (HiFi) reads that were recently utilized to generate a semi-manual telomere-to-telomere assembly of the human genome using the alternative string graph assembly approach. To enable fully automated high-quality HiFi assemblies of various genomes, we developed an efficient jumboDB algorithm for constructing the de Bruijn graph for large genomes and large k-mer sizes and the LJA genome assembler that error-corrects HiFi reads and uses jumboDB to construct the de Bruijn graph on the error-corrected reads. Since the de Bruijn graph constructed for a fixed k-mer size is typically either too tangled or too fragmented, LJA uses a new concept of a multiplex de Bruijn graph with varying k-mer sizes. We demonstrate that LJA produces contiguous assemblies of complex repetitive regions in genomes including automated assemblies of various highly-repetitive human centromeres.Competing Interest StatementThe authors have declared no competing interest.}, URL = {https://www.biorxiv.org/content/early/2020/12/11/2020.12.10.420448}, eprint = {https://www.biorxiv.org/content/early/2020/12/11/2020.12.10.420448.full.pdf}, journal = {bioRxiv} }