@article {O{\textquoteright}Hagan110437, author = {Steve O{\textquoteright}Hagan and Douglas B. Kell}, title = {Consensus rank orderings of molecular fingerprints illustrate the {\textquoteleft}most genuine{\textquoteright} similarities between marketed drugs and small endogenous human metabolites, but highlight exogenous natural products as the most important {\textquoteleft}natural{\textquoteright} drug transporter substra{\textellipsis}}, elocation-id = {110437}, year = {2017}, doi = {10.1101/110437}, publisher = {Cold Spring Harbor Laboratory}, abstract = {We compare several molecular fingerprint encodings for marketed, small molecule drugs, and assess how their rank order varies with the fingerprint in terms of the Tanimoto similarity to the most similar endogenous human metabolite as taken from Recon2. For the great majority of drugs, the rank order varies very greatly depending on the encoding used, and also somewhat when the Tanimoto similarity (TS) is replaced by the Tversky similarity. However, for a subset of such drugs, amounting to some 10\% of the set and a Tanimoto similarity of ~0.8 or greater, the similarity coefficient is relatively robust to the encoding used. This leads to a metric that, while arbitrary, suggests that a Tanimoto similarity of 0.75-0.8 or greater genuinely does imply a considerable structural similarity of two molecules in the drug-endogenite space. Although comparatively few (\<10\% of) marketed drugs are, in this sense, robustly similar to an endogenite, there is often at least one encoding with which they are genuinely similar (e.g. TS \> 0.75). This is referred to as the Take Your Pick Improved Cheminformatic Analytical Likeness or TYPICAL encoding, and on this basis some 66\% of drugs are within a TS of 0.75 to an endogenite.We next explicitly recognise that natural evolution will have selected for the ability to transport dietary substances, including plant, animal and microbial {\textquoteleft}secondary{\textquoteright} metabolites, that are of benefit to the host. These should also be explored in terms of their closeness to marketed drugs. We thus compared the TS of marketed drugs with the contents of various databases of natural products. When this is done, we find that some 80\% of marketed drugs are within a TS of 0.7 to a natural product, even using just the MACCS encoding. For patterned and TYPICAL encodings, 80\% and 98\% of drugs are within a TS of 0.8 to (an endogenite or) an exogenous natural product. This implies strongly that it is these exogeneous (dietary and medicinal) natural products that are more to be seen as the {\textquoteleft}natural{\textquoteright} substrates of drug transporters (as is recognised, for instance, for the solute carrier SLC22A4 and ergothioneine). This novel analysis casts an entirely different light on the kinds of natural molecules that are to be seen as most like marketed drugs, and hence potential transporter substrates, and further suggests that a renewed exploitation of natural products as drug scaffolds would be amply rewarded.}, URL = {https://www.biorxiv.org/content/early/2017/02/21/110437}, eprint = {https://www.biorxiv.org/content/early/2017/02/21/110437.full.pdf}, journal = {bioRxiv} }