<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Pharmacol.</journal-id>
<journal-title>Frontiers in Pharmacology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Pharmacol.</abbrev-journal-title>
<issn pub-type="epub">1663-9812</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1085665</article-id>
<article-id pub-id-type="doi">10.3389/fphar.2022.1085665</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Pharmacology</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>3CLpro inhibitors: DEL-based molecular generation</article-title>
<alt-title alt-title-type="left-running-head">Xiong et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fphar.2022.1085665">10.3389/fphar.2022.1085665</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Xiong</surname>
<given-names>Feng</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<xref ref-type="fn" rid="fn1">
<sup>&#x2020;</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Xu</surname>
<given-names>Honggui</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="fn" rid="fn1">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2078185/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Yu</surname>
<given-names>Mingao</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="fn" rid="fn1">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1899321/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Chen</surname>
<given-names>Xingyu</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhong</surname>
<given-names>Zhenmin</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Guo</surname>
<given-names>Yuhan</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Chen</surname>
<given-names>Meihong</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Ou</surname>
<given-names>Huanfang</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Wu</surname>
<given-names>Jiaqi</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Xie</surname>
<given-names>Anhua</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Xiong</surname>
<given-names>Jiaqi</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Xu</surname>
<given-names>Linlin</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhang</surname>
<given-names>Lanmei</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhong</surname>
<given-names>Qijian</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Huang</surname>
<given-names>Liye</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Li</surname>
<given-names>Zhenwei</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhang</surname>
<given-names>Tianyuan</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Jin</surname>
<given-names>Feng</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1892530/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>He</surname>
<given-names>Xun</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Shenzhen Innovation Center for Small Molecule Drug Discovery Co., Ltd.</institution>, <addr-line>Shenzhen</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Shenzhen NewDEL Biotech Co., Ltd.</institution>, <addr-line>Shenzhen</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/633893/overview">Dan Gao</ext-link>, Tsinghua University, China</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2079832/overview">Feng Ni</ext-link>, Ningbo University, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1721010/overview">Yizhou Li</ext-link>, Chongqing University, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1732759/overview">Xiaoni Ai</ext-link>, Peking University, China</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Feng Xiong, <email>fxiong@innosmd.com</email>; Feng Jin, <email>jinfeng@newdel.com.cn</email>; Xun He, <email>xhe@innosmd.com</email>
</corresp>
<fn fn-type="other">
<p>This article was submitted to Experimental Pharmacology and Drug Discovery, a section of the journal Frontiers in Pharmacology</p>
</fn>
<fn fn-type="equal" id="fn1">
<label>
<sup>&#x2020;</sup>
</label>
<p>These authors have contributed equally to this work</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>07</day>
<month>12</month>
<year>2022</year>
</pub-date>
<pub-date pub-type="collection">
<year>2022</year>
</pub-date>
<volume>13</volume>
<elocation-id>1085665</elocation-id>
<history>
<date date-type="received">
<day>31</day>
<month>10</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>23</day>
<month>11</month>
<year>2022</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2022 Xiong, Xu, Yu, Chen, Zhong, Guo, Chen, Ou, Wu, Xie, Xiong, Xu, Zhang, Zhong, Huang, Li, Zhang, Jin and He.</copyright-statement>
<copyright-year>2022</copyright-year>
<copyright-holder>Xiong, Xu, Yu, Chen, Zhong, Guo, Chen, Ou, Wu, Xie, Xiong, Xu, Zhang, Zhong, Huang, Li, Zhang, Jin and He</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Molecular generation (MG) <italic>via</italic> machine learning (ML) has speeded drug structural optimization, especially for targets with a large amount of reported bioactivity data. However, molecular generation for structural optimization is often powerless for new targets. DNA-encoded library (DEL) can generate systematic, target-specific activity data, including novel targets with few or unknown activity data. Therefore, this study aims to overcome the limitation of molecular generation in the structural optimization for the new target. Firstly, we generated molecules using the structure-affinity data (2.96 million samples) for 3C-like protease (3CLpro) from our own-built DEL platform to get rid of using public databases (e.g., CHEMBL and ZINC). Subsequently, to analyze the effect of transfer learning on the positive rate of the molecule generation model, molecular docking and affinity model based on DEL data were applied to explore the enhanced impact of transfer learning on molecule generation. In addition, the generated molecules are subjected to multiple filtering, including physicochemical properties, drug-like properties, and pharmacophore evaluation, molecular docking to determine the molecules for further study and verified by molecular dynamics simulation.</p>
</abstract>
<kwd-group>
<kwd>del</kwd>
<kwd>machine learning</kwd>
<kwd>molecule generation</kwd>
<kwd>3C-like protease</kwd>
<kwd>transfer learning</kwd>
</kwd-group>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>Drug structural optimization (<xref ref-type="bibr" rid="B43">Stokes et al., 2020</xref>; <xref ref-type="bibr" rid="B47">Wang Z.-Y. et al., 2022</xref>) is to design new molecules with better specific properties, either to increase desired bioactivities or decrease side effects. In the early stages, the conception and evaluation of new proposed molecules rely on medicinal chemists&#x2019; experience and knowledge of basic chemistry and biology. Later, with the improved computer-aided drug programming level, the ligand-based quantitative structure-activity relationship (QSAR) model (<xref ref-type="bibr" rid="B11">Cherkasov et al., 2014</xref>) combined with molecular docking and molecular dynamics simulation were used for large numbers of molecule screening to obtain molecules efficiently. With the substantial increase in data and the continuous improvement of computing resources, deep learning (DL) has developed rapidly. This new tool facilitated drug development, especially structural optimization. Zhavoronkov et al. (<xref ref-type="bibr" rid="B56">Zhavoronkov et al., 2019</xref>) discovered a kinase inhibitor of DDR1 in 41&#xa0;days by building a deep-learning molecular generation architecture GENTRL. Then, applying deep learning in drug design became one of the top 10 breakthrough technologies in MIT Technology Review 2020 (<xref ref-type="bibr" rid="B1">MIT Technology Review, 2020</xref>). Full use of this tool can explore more expansive chemical space and generate molecules of desired physicochemical and pharmacological properties, accelerating drug development (<xref ref-type="bibr" rid="B50">Xu et al., 2019</xref>; <xref ref-type="bibr" rid="B3">Ar&#xfa;s-Pous et al., 2020</xref>; <xref ref-type="bibr" rid="B22">Kotsias et al., 2020</xref>).</p>
<p>The strategy for molecular generative models through deep learning can be divided into ligand-based and structure-based (also called receptor-based). Ligand-based molecule generation requires a set of experimentally validated active compounds. ML generates molecules by learning the common features of the active compounds (<xref ref-type="bibr" rid="B27">Liu et al., 2021</xref>; <xref ref-type="bibr" rid="B46">Wang M.-Y. et al., 2022</xref>). Structure-based molecular generation considers ligand and receptor interactions. Traditional structure-based molecular generation is a fragment-based approach that adds, deletes, or replaces chemical fragments of ligands in pockets (<xref ref-type="bibr" rid="B5">Batool et al., 2019</xref>; <xref ref-type="bibr" rid="B23">Krishnan et al., 2022</xref>). The Algorithm using the protein&#x2019;s structural information to design new molecules has not been widely validated due to the limitation of high computational resource consumption (<xref ref-type="bibr" rid="B40">Skalic et al., 2019</xref>; <xref ref-type="bibr" rid="B8">Born et al., 2021</xref>; <xref ref-type="bibr" rid="B18">Grechishnikova, 2021</xref>). A structure-based molecular generation often requires three-dimensional information on the binding pockets within ligands (<xref ref-type="bibr" rid="B46">Wang M.-Y. et al., 2022</xref>; <xref ref-type="bibr" rid="B28">Long et al., 2022</xref>). Facing new targets, often there are neither revealed binding pockets nor experimentally validated ligands. The datasets used for ligand-based molecule generation usually come from public databases (such as CHEMBLE, ZINC, etc.), and the specific targets sub-datasets are generally needed to guide the structural optimization. Such a procedure has an unavoidable limitation because of its heavily dependent on public experimental data. For new targets, such dataset is severely lacking. Machine learning cannot be developed without an available dataset. This is the main reason for molecular generation, and even AIDD is still challenging to apply to the structure optimization of hit compounds for new targets. DNA-encoded library (DEL) (<xref ref-type="bibr" rid="B14">Dickson et al., 2019</xref>; <xref ref-type="bibr" rid="B25">Li et al., 2022</xref>; <xref ref-type="bibr" rid="B31">Nie et al., 2022</xref>; <xref ref-type="bibr" rid="B41">Song et al., 2020</xref>; <xref ref-type="bibr" rid="B51">Yang et al., 2022</xref>; <xref ref-type="bibr" rid="B53">Zhao et al., 2019</xref>; <xref ref-type="bibr" rid="B54">Zhao et al., 2022</xref>) is a powerful tool from combinatorial screening and DNA-encoded technology. Compared with traditional high-throughput screening (HTS), DEL technology can efficiently and economically generate a large amount of affinity data for specific targets, including new target data (hundreds of billions scale) (<xref ref-type="bibr" rid="B10">Buller et al., 2010</xref>; <xref ref-type="bibr" rid="B20">Kalliokoski, 2015</xref>). Therefore, using the DEL dataset, mainly the structure-affinity relationship, for molecular generation could be a reasonable solution to the problem of efficient structural optimization for new target drug development.</p>
<p>Deep learning generative algorithms have been explored for aided drug design. Generally, standard inputs in generative models are linear input symbols like Simplified molecular input line entry specification (SMILES) and molecular graphs. Common generative model architectures include recurrent neural networks (RNNs) (<xref ref-type="bibr" rid="B7">Bjerrum and Threlfall, 2017</xref>; <xref ref-type="bibr" rid="B38">Segler et al., 2018</xref>; <xref ref-type="bibr" rid="B22">Kotsias et al., 2020</xref>), autoencoders [AE, VAE (variational AE), AAE (adversarial AE)] (<xref ref-type="bibr" rid="B21">Kingma and Welling, 2013</xref>; <xref ref-type="bibr" rid="B36">Rezende et al., 2014</xref>; <xref ref-type="bibr" rid="B29">Makhzani et al., 2015</xref>), generative adversarial networks (<xref ref-type="bibr" rid="B17">Goodfellow et al., 2020</xref>). Optimization strategies for generative models include transfer learning (<xref ref-type="bibr" rid="B38">Segler et al., 2018</xref>), Bayesian optimization (<xref ref-type="bibr" rid="B16">G&#xf3;mez-Bombarelli et al., 2018</xref>), reinforcement learning (<xref ref-type="bibr" rid="B45">Wang et al., 2021</xref>), and conditional generation (<xref ref-type="bibr" rid="B26">Li et al., 2018</xref>). Transfer learning is a strategy for transferring knowledge from pre-learned tasks to improve learning performance. Public datasets are usually needed for pre-training till obtaining a greater probability of generating valid molecules. Subsequently, the pre-trained model is retrained using known active molecules. Generally, the overall distribution of the pre-trained CHEMBL or ZINC large dataset is quite different from that of specific target active molecules, negatively affecting transfer learning (<xref ref-type="bibr" rid="B55">Zhao et al., 2014</xref>). Transfer learning using the DEL dataset is expected to address this obstacle effectively. The DEL dataset herein is composed of 3 groups of building blocks. We used the high-affinity molecules from DEL (which appeared as compounds with high count values in DEL) to reduce the distribution inconsistency between the pre-trained model and transfer learning. The beneficial effect of transfer learning herein is confirmed and consistent with the DEL dataset. In this experiment, we used the molecular dataset with higher counts in DEL, molecules with more potent binding force to the target 3CLpro, for transfer learning, thereby increasing the probability of generating active molecules.</p>
<p>In this study, we set out to solve the following challenges: using DEL technology to construct two DEL libraries for 3CLpro and performing data analysis combined with chemical synthesis. Active hit compounds <bold>H1</bold> and <bold>H2</bold> (<xref ref-type="scheme" rid="sch1">Scheme 1</xref>) were found <italic>via</italic> bioactivity assay. Subsequently, the own-built DEL dataset was used to establish a molecular generative model to obtain a dataset with broad chemical space distribution. The obtained molecule dataset was directly applied with several subsequent filtering steps. On the other hand, molecules with high count values in the DEL dataset were defined as positive samples for transfer learning to obtain another dataset. The above two datasets were filtered by the druggability and pharmacophore model. Finally, the obtained molecules were verified by molecular docking and dynamics simulation, which confirmed the potential bioactivity of the newly designed molecule (<xref ref-type="fig" rid="F1">Figure 1</xref>).</p>
<fig id="sch1" position="float">
<label>SCHEME 1</label>
<caption>
<p>DNA-encoded library1 and library2 designed information <bold>(A,B)</bold>; Structures and corresponding inhibition activity for 3CLpro (IC<sub>50</sub>/&#xb5;M) of <bold>H1</bold> and <bold>H2 (C,D)</bold>.</p>
</caption>
<graphic xlink:href="FPHAR_fphar-2022-1085665_wc_sch1.tif"/>
</fig>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Flow chart of the current study.</p>
</caption>
<graphic xlink:href="fphar-13-1085665-g001.tif"/>
</fig>
</sec>
<sec id="s2">
<title>2 Materials and experiments</title>
<sec id="s2-1">
<title>2.1 DNA-encoded libraries screening, chemical synthesis, and bio experiments</title>
<p>Supporting information describes DEL screening, chemical synthesis, bio-activity experiments, and compound characterization.</p>
</sec>
<sec id="s2-2">
<title>2.2 Machine learning modeling</title>
<sec id="s2-2-1">
<title>2.2.1 Data preparation</title>
<p>The two DEL libraries are combined as a dataset containing 1.04 billion molecules with corresponding count and enrichment fold (EF) values. We provide the information of DEL in the supplement material. We cut out the data with very low count, the remaining data is 3,702,672. Then it was divided into a training set and a test set (0.8/0.2), of which the training set and test set have 2,962,138 and 740,534 molecules, respectively. First, the training set is used to train the molecule generation model. Then, molecules 18,129) with higher count values in DEL were selected as the positive samples of transfer learning to fine-tune the pre-trained model.</p>
</sec>
<sec id="s2-2-2">
<title>2.2.2 Molecular generation</title>
<p>SMILES or molecular graphs are commonly used for molecular representation. SMILES is a text of molecular structures (<xref ref-type="bibr" rid="B48">Weininger, 1988</xref>). Molecular graphs represent the structure of molecules as graphs, where the edges of the graph represent bonds and the dots represent atomic structures (<xref ref-type="bibr" rid="B44">Sun et al., 2020</xref>). Molecules were represented as SMILES in three models, RNN, VAE, and AAE, to perform molecule generation. We used pytorch, sklearn, numpy, rdkit,umap-learn, and MOSES benchmark platform (<xref ref-type="bibr" rid="B33">Polykovskiy et al., 2020</xref>) to complete the experiments.</p>
<p>Recurrent neural network (RNN) (<xref ref-type="bibr" rid="B38">Segler et al., 2018</xref>) are designed to learn sequential data such as text or speech. The SMILES in DEL are made into a corpus. RNN can learn the grammatical information about the SMILES to know which parts of molecules tend to be connected. RNN can generate sequences through forward propagation (<xref ref-type="bibr" rid="B7">Bjerrum and Threlfall, 2017</xref>). By treating the molecule generation process as a series of steps and sampling the network at each step, generating effective molecules is highly probable and structurally similar to the training molecules. The architecture used in RNN consists of an embedding layer, three LSTM layers, and a linear layer.</p>
<p>Variational autoencoder (VAE) (<xref ref-type="bibr" rid="B9">Bowman et al., 2015</xref>) consists of an encoder and a decoder. The former encodes the input data into a latent vector, which obeys the Gaussian Distribution. The decoder restores the latent vector result to the target sample. SMILES are used as the model&#x2019;s input and output to establish a VAE model. The VAE architecture consists of an embedding layer, an encoder layer, and a decoder layer. The encoder and decoder layers consist of a GRU layer and two linear layers.</p>
<p>Adversarial Autoencoder (AAE) (<xref ref-type="bibr" rid="B29">Makhzani et al., 2015</xref>) is similar to VAE principally. The difference is that based on the encoder and the decoder, a discriminator is introduced, which is responsible for distinguishing the calculated latent vector in the encoder from the real sample. The encoder and decoder are still accountable for encoding and reconstructing the data. AAE also uses SMILES as input and output. The AAE&#x2019;s encoder part includes an embedding layer, an LSTM layer, and a linear layer. The decoder consists of two linear layers, one embedding layer, and one LSTM layer. The discriminator consists of two linear layers where the activation function is ELU (<xref ref-type="bibr" rid="B12">Clevert et al., 2015</xref>).</p>
</sec>
<sec id="s2-2-3">
<title>2.2.3 Transfer learning</title>
<p>Transfer learning (<xref ref-type="bibr" rid="B2">Amabilino et al., 2020</xref>) is a fine-tuning model technique that fixes the original model&#x2019;s specific parameters while others are still training and updating. This technique aims to streamline expansive chemical space in the generative model, searching for target molecules in the relatively small chemical space. The model is retrained by inputting molecules with high-count values to generate more distribution-similar molecules than those with high-count values. To AAE, we fine-tune the decoder&#x2019;s last linear layer and the discriminator&#x2019;s last linear layer. To VAE, the last two linear layers of the decoder&#x2019;s model are fine-tuned. To RNN, we also fine-tune the last linear layer.</p>
</sec>
<sec id="s2-2-4">
<title>2.2.4 Evaluation metrics</title>
<p>Each model generated 10,000 molecules, which were evaluated using the evaluation metrics provided in Moses (<xref ref-type="bibr" rid="B33">Polykovskiy et al., 2020</xref>), including valid, unique, novelty, internal diversity (IntDiv), and scaffold similarity (Scaff).<list list-type="simple">
<list-item>
<p>Validity is the proportion of valid molecules in the generated molecules.</p>
</list-item>
<list-item>
<p>Uniqueness is the proportion of molecules not duplicated in the generated molecules.</p>
</list-item>
<list-item>
<p>Novelty is the proportion of molecules that do not exist in the training set.</p>
</list-item>
<list-item>
<p>Internal diversity (IntDiv) (<xref ref-type="bibr" rid="B6">Benhenda, 2018</xref>) is a metric to assess the chemical variety of generated molecules. The value range is [0,1]. The higher value means higher diversity of the generated molecules.</p>
</list-item>
<list-item>
<p>Scaffold similarity (Scaff) represents the similarity between scaffolds in the generated set and reference dataset. The value range is [0,1]. The higher the value of Scaff, the more similar the two are.</p>
</list-item>
</list>
</p>
</sec>
<sec id="s2-2-5">
<title>2.2.5 Filtering</title>
<p>The number of molecules generated by each model is 1,000,000 molecular datasets. First, the dataset was filtered for validity and reproducibility, followed by drug-likeness: 250 &#x2264; MW &#x2264; 750, logP &#x2264; 5, HBD &#x2264; 5, HBA &#x2264; 10, RB &#x3c; 10, and 0.5 &#x3c; QED. The next step was improving drugability by applying Medicinal Chemistry filters (MCFs) (<xref ref-type="bibr" rid="B19">Kalgutkar et al., 2005</xref>) and Pan Assay Interference Compounds (PAINS) filtering (<xref ref-type="bibr" rid="B4">Baell and Holloway, 2010</xref>). Without transfer learning, RNN, AAE, and VAE have no significant performance differences and predicted affinity distribution. Therefore, these datasets are merged and divided into groups depending on whether transfer learning is applied or not. 5000 molecules were obtained after pharmacophore filtration. Finally, molecular docking was employed, and molecule with higher docking score was selected for molecular dynamics simulation.</p>
</sec>
<sec id="s2-2-6">
<title>2.2.6 Chemical space visualization</title>
<p>The remained SMILES after validity and repeatability filtration and the original 3,702,672 SMILES from the DEL dataset were transformed into Morgan fingerprints with 1024 dimensions and 2 radius (<xref ref-type="bibr" rid="B37">Rogers and Hahn, 2010</xref>). These fingerprints were then used to build a UMAP(Uniform manifold approximation and projection) (metric &#x3d; &#x201c;jaccard,&#x201d; n_components &#x3d; 2) model for dimensionality reduction visualization (<xref ref-type="bibr" rid="B30">McInnes et al., 2018</xref>).</p>
</sec>
<sec id="s2-2-7">
<title>2.2.7 Affinity modeling</title>
<p>The model for affinity prediction was established according to our previous study (<xref ref-type="bibr" rid="B49">Xiong et al., 2022</xref>). First, we sorted the molecules in the DEL dataset by the count value, then oversampled the top 10,000 ranked molecules by ten times. The step was set as 0, and every other step of the remaining molecules was sampled to form a training set.</p>
</sec>
<sec id="s2-2-8">
<title>2.2.8 3D conformation and pharmacophore-based screening</title>
<p>The 3D molecular similarity was calculated through the shape and color similarity score (SC score), which represents the pharmacophoric feature similarity (<xref ref-type="bibr" rid="B24">Landrum et al., 2006</xref>) and the shape similarity (<xref ref-type="bibr" rid="B34">Putta et al., 2005</xref>). This score was used for the previously generated dataset. The 3D similarity score is a floating point value in the range of [0, 1], with a higher value indicating higher similarity between candidate and reference molecules. The native ligand in PDB:7L13 from the RSC-PDB database was used as a reference structure (<xref ref-type="bibr" rid="B52">Zhang et al., 2021</xref>). 100 conformations were generated for each molecule from the dataset using the RDKit UFF (Universal Force Field) force field. The lowest energy conformation was applied for the next step.</p>
</sec>
<sec id="s2-2-9">
<title>2.2.9 Molecular docking</title>
<p>The A-chain of the complex PDB:7L13 (resolution 2.17&#xa0;&#xc5;) of 3CLpro protein was split as a docking template to obtain accurate docking results. Subsequently, the complex was preprocessed using the Protein Preparation Wizard module of the Maestro suite (version: 13.1.141, Schr&#xf6;dinger Inc.) with the default setting, including the addition of hydrogen and side chains, removal of water molecules, and calculation of partial charges and protonation states using the OPLS4 force field (<xref ref-type="bibr" rid="B32">Poltev et al., 1996</xref>). Followed by a grid generation module, a similar-sized grid box centered on the native ligand was made to determine the binding pocket. All molecules were preprocessed by the LigPrep module. The ionization states were calculated using Epik (<xref ref-type="bibr" rid="B39">Shelley et al., 2007</xref>) at pH &#x3d; 7.0 &#xb1; 2.0. Finally, all molecules were docked into the binding pocket within the grid and evaluated using the standard precision (SP) of Glide-v9.4. The scale factor and partial charge intercept are set to 0.8 and 0.15, respectively. 1000 poses per ligand were generated for docking evaluation. Post-docking binding site analysis and generation of interaction graphs were finished using Maestro.</p>
</sec>
<sec id="s2-2-10">
<title>2.2.10 Molecular dynamics simulations</title>
<p>Amolecular dynamics simulation was carried out to analyze further the dynamic interaction process between protein and ligand and the stability of binding status. Molecular dynamics simulation is a popular technique to study protein motion by tracking its conformational changes over time (<xref ref-type="bibr" rid="B13">Collier et al., 2020</xref>). Molecular interaction and visualization analysis based on SP docking results, the top-ranked molecules were used for the molecular dynamics simulation (MD-simulation) study. MD-simulation was performed using the GROMACS software package (version 2021.5) (<xref ref-type="bibr" rid="B35">Rakhshani et al., 2019</xref>). The AMBER14SB force field parameter was used for the protein. The ligand atomic charge was calculated using the B3LYP/6-31G&#x2a; basis set. The ligand topology was computed using the GAFF2 force field parameter. The TIP3P water model was used to add Na<sup>&#x2b;</sup> and Cl<sup>&#x2212;</sup> ions to neutralize the charge. Electrostatic interactions are handled separately using the Particle Mesh Ewald (PME) and Verlet algorithms. The heavy atoms of the protein are constrained, and the energy minimization is carried out through 50,000 steps using the steepest descent method. The simulated system was equilibrated for 100 ps using a canonical ensemble (NVT) and an isothermal-isobaric ensemble (NPT). Both van der Waals and Coulomb interactions were calculated using a cutoff of 1.4&#xa0;nm. Afterward, the system was run at constant temperature (300&#xa0;K) and constant pressure (1&#xa0;bar) for 100 ns molecular dynamics simulations with a time step of 2 fs and trajectory data saved every 5 ps. Finally, the ligand and protein complex&#x2019;s root mean square deviation (RMSD, &#xc5;) at 100 ns was measured. By examining the interaction of the ligand with active site residues and the structural changes of the complex, the complexes were considered stable.</p>
</sec>
</sec>
</sec>
<sec sec-type="results|discussion" id="s3">
<title>3 Results and discussion</title>
<sec id="s3-1">
<title>3.1 Machine learning modeling</title>
<sec id="s3-1-1">
<title>3.1.1 Evaluation of molecular generative models</title>
<p>We evaluate each model&#x2019;s validity, uniqueness, novelty, intDiv, and Scaff metrics (<xref ref-type="table" rid="T1">Tables 1</xref>, <xref ref-type="table" rid="T2">2</xref>). The validity and uniqueness indicators of all models perform satisfactorily, indicating that the models can learn the grammatical information of the SMILES structure. The performance of novelty and IntDiv indicators is relatively poor, meaning that the model&#x2019;s generalization ability may not be strong enough. VAE and RNN have higher Scaff values, meaning that the model can generate the same skeleton as the training set, but the ability of generating new skeleton is weak, while AAE is the opposite. In other words, the generated dataset and the training dataset had an apparent overlap. Especially after using transfer learning, Novelty&#x2019;s metrics dropped further. A more complex model may be beneficial to address such a problem, so the MCMG (Multi-constraint molecular generation) model was also established (<xref ref-type="bibr" rid="B45">Wang et al., 2021</xref>). Unfortunately, MCMG performed relatively poorly in affinity prediction, so we decided not to analyze it further (<xref ref-type="sec" rid="s10">Supplementary Figure S13</xref>).</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Performance of each model without transfer learning.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Model</th>
<th align="left">Validity</th>
<th align="left">Uniqueness</th>
<th align="left">Novelty</th>
<th align="left">IntDiv</th>
<th align="left">Scaff/Test</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">VAE</td>
<td align="char" char=".">0.9480</td>
<td align="char" char=".">
<bold>0.9990</bold>
</td>
<td align="char" char=".">0.6421</td>
<td align="char" char=".">0.7496</td>
<td align="char" char=".">
<bold>0.8998</bold>
</td>
</tr>
<tr>
<td align="left">AAE</td>
<td align="char" char=".">0.9343</td>
<td align="char" char=".">0.9981</td>
<td align="char" char=".">0.6394</td>
<td align="char" char=".">0.7397</td>
<td align="char" char=".">0.6483</td>
</tr>
<tr>
<td align="left">RNN</td>
<td align="char" char=".">
<bold>0.9994</bold>
</td>
<td align="char" char=".">0.9750</td>
<td align="char" char=".">0.6116</td>
<td align="char" char=".">0.7646</td>
<td align="char" char=".">0.8884</td>
</tr>
<tr>
<td align="left">MCMG</td>
<td align="char" char=".">0.8611</td>
<td align="char" char=".">0.9980</td>
<td align="char" char=".">
<bold>0.9952</bold>
</td>
<td align="char" char=".">
<bold>0.7894</bold>
</td>
<td align="char" char=".">0.6359</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The bold values are specific values with best-performance in each column.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Performance of each model with transfer learning.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Model</th>
<th align="left">Valid</th>
<th align="left">Uniqueness</th>
<th align="left">Novelty</th>
<th align="left">IntDiv</th>
<th align="left">Scaff/Test</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">VAE_TL</td>
<td align="char" char=".">0.9653</td>
<td align="char" char=".">0.8989</td>
<td align="char" char=".">0.3692</td>
<td align="char" char=".">0.7461</td>
<td align="char" char=".">0.6638</td>
</tr>
<tr>
<td align="left">AAE_TL</td>
<td align="char" char=".">0.8753</td>
<td align="char" char=".">0.8823</td>
<td align="char" char=".">
<bold>0.4793</bold>
</td>
<td align="char" char=".">0.7397</td>
<td align="char" char=".">0.6177</td>
</tr>
<tr>
<td align="left">RNN_TL</td>
<td align="char" char=".">
<bold>0.9780</bold>
</td>
<td align="char" char=".">
<bold>0.9299</bold>
</td>
<td align="char" char=".">0.3880</td>
<td align="char" char=".">
<bold>0.7503</bold>
</td>
<td align="char" char=".">
<bold>0.6979</bold>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The bold values are specific values with best-performance in each column.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3-1-2">
<title>3.1.2 Chemical space visualization</title>
<p>The results visualization using UMAP dimensionality reduction are shown in <xref ref-type="fig" rid="F2">Figure 2</xref>. The molecules generated by each model closely resemble the chemical space distribution of DEL&#x2019;s. This indicates that the model could learn molecular distribution sufficiently from the source dataset.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Dimensional reduction visualization of the training and generated molecular datasets (from left to right: DEL, VAE, AAE, and RNN).</p>
</caption>
<graphic xlink:href="fphar-13-1085665-g002.tif"/>
</fig>
</sec>
<sec id="s3-1-3">
<title>3.1.3 Affinity model performance</title>
<p>
<xref ref-type="fig" rid="F3">Figure 3</xref> shows the distribution of the molecules from each model&#x2019;s affinity predictions. The molecules distribution without transfer learning is mainly located in the area of 0.1&#x2013;0.2, while the corresponding values after using transfer learning are mostly above 0.2. Such improved affinity indicates the beneficial effect of transfer learning, which expectedly to improve the success rate and efficiency for further structural optimization. <xref ref-type="scheme" rid="sch2">Scheme 2</xref> shows the representative molecules with high affinity scores, which were expected to be potentially bio-active.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>The numerical affinity distribution of molecules generated by each model with or without transfer learning [<bold>(A)</bold> VAE, <bold>(B)</bold> AAE, <bold>(C)</bold> RNN].</p>
</caption>
<graphic xlink:href="fphar-13-1085665-g003.tif"/>
</fig>
<fig id="sch2" position="float">
<label>SCHEME 2</label>
<caption>
<p>The representative molecules (<bold>A1&#x2013;A8</bold>) with high affinity scores according to affinity model.</p>
</caption>
<graphic xlink:href="FPHAR_fphar-2022-1085665_wc_sch2.tif"/>
</fig>
</sec>
<sec id="s3-1-4">
<title>3.1.4 Molecular docking</title>
<p>According to the calculated SC scores of all molecules in the dataset and native ligand, 5,000 ligands with the highest SC score were selected for the follow-up study.</p>
<p>Molecular docking was utilized to analyze the 3D conformational and pharmacophore-based screens and to study the structural basis of the interaction between 3CLPro and ligands. First, the reliability of the glide docking algorithm (standard precision mode, enhanced conformational sampling by four times) was confirmed by re-docking the native ligand to the receptor.</p>
<p>The re-docked conformation was presented in supporting information. Subsequently, the selected 5000 molecules were preliminarily docked to the revealed binding site using standard precision mode. According to the docking evaluation score and molecular conformation, 500 different conformations were selected, and four times enhanced conformational sampling was used to generate the ligand-binding pose more accurately.</p>
<p>In addition, the docking scores with and without transfer learning for pharmacophoric models were analyzed (<xref ref-type="table" rid="T3">Table 3</xref>). Applying transfer learning, 4.9% of ligands possess a score greater than 8, while 0.3% have a score higher than 9. In contrast, the corresponding values without transfer learning are 3.3% and 0.1%. This result is consistent with the affinity model, indicating that transfer learning can effectively increase the percentage of positive samples.</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Differences in docking scores for molecules generated with and without transfer learning.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Docking scores</th>
<th align="left">Model with TL (pharmacophore)</th>
<th align="left">Model without TL (pharmacophore)</th>
<th align="left">Model with TL (DEL-built affinity)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">1 (%)-5</td>
<td align="left">8.26</td>
<td align="left">9.08</td>
<td align="left">8.97</td>
</tr>
<tr>
<td align="left">5-7</td>
<td align="left">61.61</td>
<td align="left">66.32</td>
<td align="left">64.37</td>
</tr>
<tr>
<td align="left">7-8</td>
<td align="left">25.22</td>
<td align="left">21.30</td>
<td align="left">21.40</td>
</tr>
<tr>
<td align="left">8-9</td>
<td align="left">4.64</td>
<td align="left">3.18</td>
<td align="left">4.57</td>
</tr>
<tr>
<td align="left">9-11</td>
<td align="left">0.27</td>
<td align="left">0.12</td>
<td align="left">0.70</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Moreover, the molecules filtered by the affinity model also performed molecular docking as a pharmacophoric model. This aims to explore the possibility of replacing an external pharmacophore model with an own-built affinity model. The docking scores of the filtered molecules were combined in <xref ref-type="table" rid="T3">Table 3</xref> for comparison. We found an exciting revelation that the proportion of molecules filtered by the affinity model with docking scores of 9&#x2013;11 was twice as high as that from the pharmacophore model. Therefore, replacing pharmacophore filtering with affinity models alone may be a promising option to utilize DEL&#x2019;s datasets more fully.</p>
</sec>
<sec id="s3-1-5">
<title>3.1.5 Binding free energy calculation</title>
<p>The binding free energy can be used as a reference standard for assessing the activity of molecules. Generally, the lower the binding value means, the more stable the complex formed is. The ligands&#x2019; binding free energies were calculated by psp-v6.7 MMGBSA. The self-established ADMET prediction model and Maestro&#x2019;s QikProp module evaluated the corresponding properties of molecules with better conformation. In fact, our ADMET prediction model is similar to ADMETlab (<xref ref-type="bibr" rid="B15">Dong et al., 2018</xref>). Molecules <bold>N1-N8</bold> were finally selected for follow-up research considering the above ADMET properties with results of binding energy calculation (<xref ref-type="scheme" rid="sch3">Scheme 3</xref>).</p>
<fig id="sch3" position="float">
<label>SCHEME 3</label>
<caption>
<p>The representative molecules <bold>N1&#x2013;N8</bold>.</p>
</caption>
<graphic xlink:href="FPHAR_fphar-2022-1085665_wc_sch3.tif"/>
</fig>
<p>From <xref ref-type="table" rid="T4">Table 4</xref>, the binding free energies (dG_Bind) of <bold>N1</bold> to <bold>N8</bold> indicate their potential biological activities. Van der Waals energy (dG_Bind_vdW) shows that hydrophobic interaction is the main contributor to the ligand binding process. According to the molecular docking conformation, we selected <bold>N1</bold> and <bold>N2</bold> for subsequent research, in which the position and interaction of <bold>N1</bold> and <bold>N2</bold> on receptors are consistent with the previous report (<xref ref-type="fig" rid="F4">Figure 4</xref>) (<xref ref-type="bibr" rid="B52">Zhang et al., 2021</xref>; <xref ref-type="bibr" rid="B42">Stille et al., 2022</xref>).</p>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>The calculated binding energy of <bold>N1-N8</bold> binding to 3CLPro.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Name</th>
<th align="left">MMGBSA_dG_Bind</th>
<th align="left">MMGBSA_dG_Bind_vdW</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">
<bold>N1</bold>
</td>
<td align="left">&#x2212;40.17&#xa0;kcal/mol</td>
<td align="left">&#x2212;38.68&#xa0;kcal/mol</td>
</tr>
<tr>
<td align="left">
<bold>N2</bold>
</td>
<td align="left">&#x2212;42.12&#xa0;kcal/mol</td>
<td align="left">&#x2212;41.44&#xa0;kcal/mol</td>
</tr>
<tr>
<td align="left">
<bold>N3</bold>
</td>
<td align="left">&#x2212;49.24&#xa0;kcal/mol</td>
<td align="left">&#x2212;57.68&#xa0;kcal/mol</td>
</tr>
<tr>
<td align="left">
<bold>N4</bold>
</td>
<td align="left">&#x2212;50.44&#xa0;kcal/mol</td>
<td align="left">&#x2212;60.25&#xa0;kcal/mol</td>
</tr>
<tr>
<td align="left">
<bold>N5</bold>
</td>
<td align="left">&#x2212;42.63&#xa0;kcal/mol</td>
<td align="left">&#x2212;48.83&#xa0;kcal/mol</td>
</tr>
<tr>
<td align="left">
<bold>N6</bold>
</td>
<td align="left">&#x2212;41.25&#xa0;kcal/mol</td>
<td align="left">&#x2212;49.25&#xa0;kcal/mol</td>
</tr>
<tr>
<td align="left">
<bold>N7</bold>
</td>
<td align="left">&#x2212;45.96&#xa0;kcal/mol</td>
<td align="left">&#x2212;58.05&#xa0;kcal/mol</td>
</tr>
<tr>
<td align="left">
<bold>N8</bold>
</td>
<td align="left">&#x2212;34.71&#xa0;kcal/mol</td>
<td align="left">&#x2212;51.34&#xa0;kcal/mol</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Conformation and interaction of <bold>N1</bold> (left) and <bold>N2</bold> (right) binding to 3CLPro.</p>
</caption>
<graphic xlink:href="fphar-13-1085665-g004.tif"/>
</fig>
</sec>
<sec id="s3-1-6">
<title>3.1.6 Molecular dynamics simulation</title>
<p>To further analyze the interaction-related atomic details between molecules and 3CLPro, GROMACS was applied for molecular dynamics simulations using the above docking results. 100&#xa0;ns run time for MD simulation is considered sufficient for side chain rearrangement. The result will confirm whether or not the complex remains in the most stable association. Since <bold>N2</bold> is believed to be more compatible with the receptor pocket (ligand conformation), and the calculation of MMGBSA binding free energy indicates that <bold>N2</bold> has a stronger interaction with the receptor, molecular dynamics simulations of the complex formed by <bold>N2</bold> were conducted.</p>
<p>RMSD values plotted over the simulation time revealed a stable kinetic equilibrium of the complex. In detail, the 3CLPro protein with <bold>N2</bold> and the ligand <bold>N2</bold> showed steady kinetics after 30 and 50&#xa0;ns, respectively (<xref ref-type="fig" rid="F5">Figure 5</xref>). By monitoring the fluctuation of RMSD, each system is in the range of 2&#xc5; after 50&#xa0;ns. This suggests that the complex undergoes a conformational change during the simulation that promotes tight binding between the <bold>N2</bold> and receptor, and finally, the system reaches a steady state. The RMSF value showed minimal fluctuation, and it remained in the range of 0.05&#x2013;0.2&#xa0;nm throughout the simulation period for most residues, except that a peak in RMSF value was observed only at residue 1. The less fluctuating performance confirms the strong attachment of the ligand to the protein (<xref ref-type="fig" rid="F6">Figure 6</xref>).</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Fluctuation of RMSD values for complexes during 100&#xa0;ns MD simulation.</p>
</caption>
<graphic xlink:href="fphar-13-1085665-g005.tif"/>
</fig>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Residue-based fluctuations of protease backbone of complexes over 100&#xa0;ns simulation.</p>
</caption>
<graphic xlink:href="fphar-13-1085665-g006.tif"/>
</fig>
</sec>
</sec>
</sec>
<sec sec-type="conclusion" id="s4">
<title>4 Conclusion</title>
<p>As far as we know, this is the first study that DEL&#x2019;s dataset has been used for the molecular generation, which will promote the development of the application field of DEL combined with AI. This study preliminarily found that it may be feasible to use DEL data instead of public databases for molecular generation. In particular, for the new target pipeline, molecular generation and affinity model establishment based on DEL data are expected to become a tool with dual functions of drug discovery and further structural optimization. This advantage would be difficult to achieve with public databases due to the scarcity of datasets.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s5">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="sec" rid="s10">Supplementary Material</xref>, further inquiries can be directed to the corresponding authors.</p>
</sec>
<sec id="s6">
<title>Author contributions</title>
<p>All authors listed have made a substantial, direct, and intellectual contribution to the work and approved it for publication.</p>
</sec>
<sec id="s7">
<title>Funding</title>
<p>This work was supported by the Industry and Information Technology Bureau of Shenzhen Municipality (No. 20200627103531228, for Shenzhen Innovation Center for Small Molecule Drug Discovery Co., Ltd.).</p>
</sec>
<sec sec-type="COI-statement" id="s8">
<title>Conflict of interest</title>
<p>All authors were employed by either Shenzhen Innovation Center for Small Molecule Drug Discovery Co., Ltd. or Shenzhen NewDEL Biotech Co., Ltd.</p>
</sec>
<sec sec-type="disclaimer" id="s9">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s10">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fphar.2022.1085665/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fphar.2022.1085665/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="Table1.DOCX" id="SM1" mimetype="application/DOCX" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Amabilino</surname>
<given-names>Silvia</given-names>
</name>
<name>
<surname>Pogany</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Pickett</surname>
<given-names>S. D.</given-names>
</name>
<name>
<surname>Green</surname>
<given-names>D. V. S.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Guidelines for recurrent neural network transfer learning-based molecular generation of focused libraries</article-title>. <source>J. Chem. Inf. Model.</source> <volume>6012</volume>, <fpage>5699</fpage>&#x2013;<lpage>5713</lpage>. <pub-id pub-id-type="doi">10.1021/acs.jcim.0c00343</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ar&#xfa;s-Pous</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Patronov</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Bjerrum</surname>
<given-names>E. J.</given-names>
</name>
<name>
<surname>Tyrchan</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Reymond</surname>
<given-names>J.-L.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>H.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>SMILES-based deep generative scaffold decorator for de -novo drug design</article-title>. <source>J. Cheminform.</source> <volume>12</volume> (<issue>1</issue>), <fpage>38</fpage>&#x2013;<lpage>18</lpage>. <pub-id pub-id-type="doi">10.1186/s13321-020-00441-8</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Baell</surname>
<given-names>J. B.</given-names>
</name>
<name>
<surname>Holloway</surname>
<given-names>G. A.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>New substructure filters for removal of pan assay interference compounds (PAINS) from screening libraries and for their exclusion in bioassays</article-title>. <source>J. Med. Chem.</source> <volume>53</volume> (<issue>7</issue>), <fpage>2719</fpage>&#x2013;<lpage>2740</lpage>. <pub-id pub-id-type="doi">10.1021/jm901137j</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Batool</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Ahmad</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Choi</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>A structure-based drug discovery paradigm</article-title>. <source>Int. J. Mol. Sci.</source> <volume>20</volume> (<issue>11</issue>), <fpage>2783</fpage>. <pub-id pub-id-type="doi">10.3390/ijms20112783</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Benhenda</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2018</year>). <source>Can AI reproduce observed chemical diversity?</source> <comment>bioRxiv [Preprint]</comment> <comment>(Accessed October 10, 2022)</comment>. <pub-id pub-id-type="doi">10.1101/292177</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bjerrum</surname>
<given-names>E. J.</given-names>
</name>
<name>
<surname>Threlfall</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Molecular generation with Recurrent Neural Networks (RNNs)</article-title> <comment>arXiv [preprint]</comment> <comment>(Accessed October 10, 2022)</comment>. <pub-id pub-id-type="doi">10.48550/arXiv.1705.04612</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Born</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Manica</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Cadow</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Markert</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Mill</surname>
<given-names>N. A.</given-names>
</name>
<name>
<surname>Filipavicius</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Data-driven molecular design for discovery and synthesis of novel ligands: A case study on SARS-CoV-2</article-title>. <source>Mach. Learn, Sci. Technol.</source> <volume>2</volume> (<issue>2</issue>), <fpage>025024</fpage>. <pub-id pub-id-type="doi">10.1088/2632-2153/abe808</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bowman</surname>
<given-names>S. R.</given-names>
</name>
<name>
<surname>Vilnis</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Vinyals</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Dai</surname>
<given-names>A. M.</given-names>
</name>
<name>
<surname>Jozefowicz</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Bengio</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2015</year>
<fpage>06349</fpage>). <article-title>Generating sentences from a continuous space</article-title>. <comment>arXiv [Preprint]</comment> (<comment>Accessed October 10, 2022</comment>). <pub-id pub-id-type="doi">10.48550/arXiv.1511</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Buller</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Mannocci</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Scheuermann</surname>
<given-names>Jr</given-names>
</name>
<name>
<surname>Neri</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Drug discovery with DNA-encoded chemical libraries</article-title>. <source>Bioconjug. Chem.</source> <volume>21</volume> (<issue>9</issue>), <fpage>1571</fpage>&#x2013;<lpage>1580</lpage>. <pub-id pub-id-type="doi">10.1021/bc1001483</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cherkasov</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Muratov</surname>
<given-names>E. N.</given-names>
</name>
<name>
<surname>Fourches</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Varnek</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Baskin</surname>
<given-names>II</given-names>
</name>
<name>
<surname>Cronin</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2014</year>). <article-title>QSAR modeling: Where have you been? Where are you going to?</article-title> <source>J. Med. Chem.</source> <volume>57</volume> (<issue>12</issue>), <fpage>4977</fpage>&#x2013;<lpage>5010</lpage>. <pub-id pub-id-type="doi">10.1021/jm4004285</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Clevert</surname>
<given-names>D.-A.</given-names>
</name>
<name>
<surname>Unterthiner</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Hochreiter</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2015</year>). <source>Fast and accurate deep network learning by exponential linear units (elus)</source>. <comment>arXiv [preprint] Available at:Accessed Oct 10, 2022</comment>. <pub-id pub-id-type="doi">10.48550/arXiv.1511.07289</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Collier</surname>
<given-names>T. A.</given-names>
</name>
<name>
<surname>Piggot</surname>
<given-names>T. J.</given-names>
</name>
<name>
<surname>Allison</surname>
<given-names>J. R.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Molecular dynamics simulation of proteins</article-title>,&#x201d; in <source>Protein nanotechnology</source> (<publisher-name>Springer</publisher-name>), Berlin, Germany, <fpage>311</fpage>&#x2013;<lpage>327</lpage>.</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dickson</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Kodadek</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Chemical composition of DNA-encoded libraries, past present and future</article-title>. <source>Org. Biomol. Chem.</source> <volume>17</volume> (<issue>19</issue>), <fpage>4676</fpage>&#x2013;<lpage>4688</lpage>. <pub-id pub-id-type="doi">10.1039/c9ob00581a</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dong</surname>
<given-names>Jie</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>N. N.</given-names>
</name>
<name>
<surname>Yao</surname>
<given-names>Z. J.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Ouyang</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>ADMETlab: A platform for systematic ADMET evaluation based on a comprehensively collected ADMET database</article-title>. <source>J. Cheminform.</source> <volume>10</volume> (<issue>1</issue>), <fpage>29</fpage>&#x2013;<lpage>11</lpage>. <pub-id pub-id-type="doi">10.1186/s13321-018-0283-x</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>G&#xf3;mez-Bombarelli</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Wei</surname>
<given-names>J. N.</given-names>
</name>
<name>
<surname>Duvenaud</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Hern&#xe1;ndez-Lobato</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>S&#xe1;nchez-Lengeling</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Sheberla</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>Automatic chemical design using a data-driven continuous representation of molecules</article-title>. <source>ACS Cent. Sci.</source> <volume>4</volume> (<issue>2</issue>), <fpage>268</fpage>&#x2013;<lpage>276</lpage>. <pub-id pub-id-type="doi">10.1021/acscentsci.7b00572</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Goodfellow</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Pouget-Abadie</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Mirza</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Warde-Farley</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Ozair</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Generative adversarial networks</article-title>. <source>Commun. ACM</source> <volume>63</volume> (<issue>11</issue>), <fpage>139</fpage>&#x2013;<lpage>144</lpage>. <pub-id pub-id-type="doi">10.1145/3422622</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Grechishnikova</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Transformer neural network for protein-specific de novo drug generation as a machine translation problem</article-title>. <source>Sci. Rep.</source> <volume>11</volume> (<issue>1</issue>), <fpage>1</fpage>&#x2013;<lpage>13</lpage>. <pub-id pub-id-type="doi">10.1038/s41598-020-79682-4</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kalgutkar</surname>
<given-names>A. S.</given-names>
</name>
<name>
<surname>Gardner</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Obach</surname>
<given-names>R. S.</given-names>
</name>
<name>
<surname>Shaffer</surname>
<given-names>C. L.</given-names>
</name>
<name>
<surname>Callegari</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Henne</surname>
<given-names>K. R.</given-names>
</name>
<etal/>
</person-group> (<year>2005</year>). <article-title>A comprehensive listing of bioactivation pathways of organic functional groups</article-title>. <source>Curr. Drug Metab.</source> <volume>6</volume> (<issue>3</issue>), <fpage>161</fpage>&#x2013;<lpage>225</lpage>. <pub-id pub-id-type="doi">10.2174/1389200054021799</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kalliokoski</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Price-focused analysis of commercially available building blocks for combinatorial library synthesis</article-title>. <source>ACS Comb. Sci.</source> <volume>17</volume> (<issue>10</issue>), <fpage>600</fpage>&#x2013;<lpage>607</lpage>. <pub-id pub-id-type="doi">10.1021/acscombsci.5b00063</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Kingma</surname>
<given-names>D. P.</given-names>
</name>
<name>
<surname>Welling</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Auto-encoding variational bayes</article-title>. <comment>arXiv [Preprint]</comment> <comment>(Accessed October 10, 2022)</comment>. <pub-id pub-id-type="doi">10.48550/arXiv.1312.6114</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kotsias</surname>
<given-names>P.-C.</given-names>
</name>
<name>
<surname>Ar&#xfa;s-Pous</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Engkvist</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Tyrchan</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Bjerrum</surname>
<given-names>E. J.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Direct steering of de novo molecular generation with descriptor conditional recurrent neural networks</article-title>. <source>Nat. Mach. Intell.</source> <volume>2</volume> (<issue>5</issue>), <fpage>254</fpage>&#x2013;<lpage>265</lpage>. <pub-id pub-id-type="doi">10.1038/s42256-020-0174-5</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Krishnan</surname>
<given-names>S. R.</given-names>
</name>
<name>
<surname>Bung</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Vangala</surname>
<given-names>S. R.</given-names>
</name>
<name>
<surname>Srinivasan</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Bulusu</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Roy</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>De novo structure-based drug design using deep learning</article-title>. <source>J. Chem. Inf. Model.</source> <volume>62</volume> (<issue>21</issue>), <fpage>5100</fpage>&#x2013;<lpage>5109</lpage>. <pub-id pub-id-type="doi">10.1021/acs.jcim.1c01319</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Landrum</surname>
<given-names>G. A.</given-names>
</name>
<name>
<surname>Penzotti</surname>
<given-names>J. E.</given-names>
</name>
<name>
<surname>Putta</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>Feature-map vectors: A new class of informative descriptors for computational drug discovery</article-title>. <source>J. Comput. Aided. Mol. Des.</source> <volume>20</volume> (<issue>12</issue>), <fpage>751</fpage>&#x2013;<lpage>762</lpage>. <pub-id pub-id-type="doi">10.1007/s10822-006-9085-8</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>G.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Aryl diazonium intermediates enable mild DNA-compatible CC bond formation for medicinally relevant combinatorial library synthesis</article-title>. <source>Chem. Sci.</source> <volume>13</volume>, <fpage>13100</fpage>&#x2013;<lpage>13109</lpage>. <pub-id pub-id-type="doi">10.1039/d2sc04482j</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>Y.-J.</given-names>
</name>
<name>
<surname>Vinyals</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Dyer</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Pascanu</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Battaglia</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Learning deep generative models of graphs</article-title>. <comment>arXiv [Preprint]</comment> <comment>(Accessed October 10, 2022)</comment>. <pub-id pub-id-type="doi">10.48550/arXiv.1803</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>R.-Z.</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>J.-K.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>A.-L.</given-names>
</name>
<name>
<surname>Du</surname>
<given-names>G.-H.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Progress on the application of artificial intelligence technology in ligand-based and receptor structure-based drug screening</article-title>. <source>Acta Pharm. Sin.</source> <volume>12</volume>, <fpage>2136</fpage>&#x2013;<lpage>2145</lpage>.</citation>
</ref>
<ref id="B28">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Long</surname>
<given-names>S.-Y.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Dai</surname>
<given-names>X.-Y.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Zero-shot 3D drug design by sketching and generating</article-title>. <comment>arXiv [Preprint]</comment> <comment>(Accessed October 10, 2022)</comment>. <pub-id pub-id-type="doi">10.48550/arXiv.2209.13865</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Makhzani</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Shlens</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Jaitly</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Goodfellow</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Frey</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Adversarial autoencoders</article-title>. <comment>arXiv [Preprint]</comment> <comment>(Accessed October 10, 2022)</comment>. <pub-id pub-id-type="doi">10.48550/arXiv.1511</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>McInnes</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Healy</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Melville</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Umap: Uniform manifold approximation and projection for dimension reduction</article-title>. <comment>arXiv [Preprint]</comment> <comment>(Accessed October 10, 2022)</comment>. <pub-id pub-id-type="doi">10.48550/arXiv.1802</pub-id>
</citation>
</ref>
<ref id="B1">
<citation citation-type="book">
<collab>MIT Technology Review</collab> (<year>2020</year>). <source>10 Breakthrough technologies 2020</source>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://www.technologyreview.com/10-breakthrough-technologies/2020/#ai-discovered-molecules">https://www.technologyreview.com/10-breakthrough-technologies/2020/&#x0023;ai-discovered-molecules</ext-link>
</comment> (<comment>Accessed October 10, 2022</comment>).</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nie</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Zhong</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Second-generation DNA-encoded multiple display on a constant macrocyclic scaffold enabled by an orthogonal protecting group strategy</article-title>. <source>Chin. Chem. Lett.</source> <volume>33</volume> (<issue>5</issue>), <fpage>2559</fpage>&#x2013;<lpage>2563</lpage>. <pub-id pub-id-type="doi">10.1016/j.cclet.2021.09.041</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Poltev</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Malenkov</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Gonzalez</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Teplukhin</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Rein</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Shibata</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>1996</year>). <article-title>Modeling DNA hydration: Comparison of calculated and experimental hydration properties of nuclic acid bases</article-title>. <source>J. Biomol. Struct. Dyn.</source> <volume>13</volume> (<issue>4</issue>), <fpage>717</fpage>&#x2013;<lpage>726</lpage>. <pub-id pub-id-type="doi">10.1080/07391102.1996.10508884</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Polykovskiy</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Zhebrak</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Sanchez-Lengeling</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Golovanov</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Tatanov</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Belyaev</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Molecular sets (MOSES): A benchmarking platform for molecular generation models</article-title>. <source>Front. Pharmacol.</source> <volume>11</volume>, <fpage>565644</fpage>. <pub-id pub-id-type="doi">10.3389/fphar.2020.565644</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Putta</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Landrum</surname>
<given-names>G. A.</given-names>
</name>
<name>
<surname>Penzotti</surname>
<given-names>J. E.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>Conformation mining: An algorithm for finding biologically relevant conformations</article-title>. <source>J. Med. Chem.</source> <volume>48</volume> (<issue>9</issue>), <fpage>3313</fpage>&#x2013;<lpage>3318</lpage>. <pub-id pub-id-type="doi">10.1021/jm049066l</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rakhshani</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Dehghanian</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Rahati</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Enhanced GROMACS: Toward a better numerical simulation framework</article-title>. <source>J. Mol. Model.</source> <volume>25</volume> (<issue>12</issue>), <fpage>355</fpage>&#x2013;<lpage>358</lpage>. <pub-id pub-id-type="doi">10.1007/s00894-019-4232-z</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Rezende</surname>
<given-names>D. J.</given-names>
</name>
<name>
<surname>Mohamed</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Wierstra</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2014</year>). &#x201c;<article-title>Stochastic backpropagation and approximate inference in deep generative models</article-title>,&#x201d; in <conf-name>Proceedings of the International conference on machine learning: PMLR)</conf-name>, <conf-loc>Beijing China</conf-loc>, <conf-date>June 21-26,2014</conf-date>, <fpage>1278</fpage>&#x2013;<lpage>1286</lpage>.</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rogers</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Hahn</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Extended-connectivity fingerprints</article-title>. <source>J. Chem. Inf. Model.</source> <volume>50</volume> (<issue>5</issue>), <fpage>742</fpage>&#x2013;<lpage>754</lpage>. <pub-id pub-id-type="doi">10.1021/ci100050t</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Segler</surname>
<given-names>M. H.</given-names>
</name>
<name>
<surname>Kogej</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Tyrchan</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Waller</surname>
<given-names>M. P.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Generating focused molecule libraries for drug discovery with recurrent neural networks</article-title>. <source>ACS Cent. Sci.</source> <volume>4</volume> (<issue>1</issue>), <fpage>120</fpage>&#x2013;<lpage>131</lpage>. <pub-id pub-id-type="doi">10.1021/acscentsci.7b00512</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shelley</surname>
<given-names>J. C.</given-names>
</name>
<name>
<surname>Cholleti</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Frye</surname>
<given-names>L. L.</given-names>
</name>
<name>
<surname>Greenwood</surname>
<given-names>J. R.</given-names>
</name>
<name>
<surname>Timlin</surname>
<given-names>M. R.</given-names>
</name>
<name>
<surname>Uchimaya</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>Epik: A software program for pK a prediction and protonation state generation for drug-like molecules</article-title>. <source>J. Comput. Aided. Mol. Des.</source> <volume>21</volume> (<issue>12</issue>), <fpage>681</fpage>&#x2013;<lpage>691</lpage>. <pub-id pub-id-type="doi">10.1007/s10822-007-9133-z</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Skalic</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Sabbadin</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Sattarov</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Sciabola</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>De Fabritiis</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>From target to drug: Generative modeling for the multimodal structure-based ligand design</article-title>. <source>Mol. Pharm.</source> <volume>16</volume> (<issue>10</issue>), <fpage>4282</fpage>&#x2013;<lpage>4291</lpage>. <pub-id pub-id-type="doi">10.1021/acs.molpharmaceut.9b00634</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Song</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Hwang</surname>
<given-names>G. T.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>DNA-encoded library screening as core platform technology in drug discovery: Its synthetic method development and applications in DEL synthesis</article-title>. <source>J. Med. Chem.</source> <volume>63</volume> (<issue>13</issue>), <fpage>6578</fpage>&#x2013;<lpage>6599</lpage>. <pub-id pub-id-type="doi">10.1021/acs.jmedchem.9b01782</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Stille</surname>
<given-names>J. K.</given-names>
</name>
<name>
<surname>Tjutrins</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Venegas</surname>
<given-names>F. A.</given-names>
</name>
<name>
<surname>Hennecker</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Rueda</surname>
<given-names>A. M.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Design, synthesis and <italic>in vitro</italic> evaluation of novel SARS-CoV-2 3CLpro covalent inhibitors</article-title>. <source>Eur. J. Med. Chem.</source> <volume>229</volume>, <fpage>114046</fpage>. <pub-id pub-id-type="doi">10.1016/j.ejmech.2021.114046</pub-id>
</citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Stokes</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Swanson</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Jin</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Cubillos-Ruiz</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Donghia</surname>
<given-names>N. M.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>A deep learning approach to antibiotic discovery</article-title>. <source>Cell</source> <volume>180</volume> (<issue>4</issue>), <fpage>688</fpage>&#x2013;<lpage>702</lpage>. <pub-id pub-id-type="doi">10.1016/j.cell.2020.01.021</pub-id>
</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sun</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Gilvary</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Elemento</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Graph convolutional networks for computational drug development and discovery</article-title>. <source>Brief. Bioinform.</source> <volume>21</volume> (<issue>3</issue>), <fpage>919</fpage>&#x2013;<lpage>935</lpage>. <pub-id pub-id-type="doi">10.1093/bib/bbz042</pub-id>
</citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>J.-K.</given-names>
</name>
<name>
<surname>Hsieh</surname>
<given-names>C.-Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>M.-Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X.-R.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>Z.-X.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>D.-J.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Multi-constraint molecular generation based on conditional transformer, knowledge distillation and reinforcement learning</article-title>. <source>Nat. Mach. Intell.</source> <volume>3</volume> (<issue>10</issue>), <fpage>914</fpage>&#x2013;<lpage>922</lpage>. <pub-id pub-id-type="doi">10.1038/s42256-021-00403-1</pub-id>
</citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>M.-Y.</given-names>
</name>
<name>
<surname>Hsieh</surname>
<given-names>C.-Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.-K.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Weng</surname>
<given-names>G.-Q.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2022a</year>). <article-title>Relation : A deep generative model for structure-based de novo drug design</article-title>. <source>J. Med. Chem.</source> <volume>65</volume> (<issue>13</issue>), <fpage>9478</fpage>&#x2013;<lpage>9492</lpage>. <pub-id pub-id-type="doi">10.1021/acs.jmedchem.2c00732</pub-id>
</citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>Z.-Y.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>Y.-Z.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Xie</surname>
<given-names>Y.-C.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>L.-M.</given-names>
</name>
<etal/>
</person-group> (<year>2022b</year>). <article-title>Advanced graph and sequence neural networks for molecular property prediction and drug discovery</article-title>. <source>Bioinformatics</source> <volume>38</volume> (<issue>9</issue>), <fpage>2579</fpage>&#x2013;<lpage>2586</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btac112</pub-id>
</citation>
</ref>
<ref id="B48">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Weininger</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>1988</year>). <article-title>SMILES, a chemical language and information system. 1. Introduction to methodology and encoding rules</article-title>. <source>J. Chem. Inf. Model.</source> <volume>28</volume> (<issue>1</issue>), <fpage>31</fpage>&#x2013;<lpage>36</lpage>. <pub-id pub-id-type="doi">10.1021/ci00057a005</pub-id>
</citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xiong</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>M.-G.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>H.-G.</given-names>
</name>
<name>
<surname>Zhong</surname>
<given-names>Z.-M.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Z.-W.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>Y.-H.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Discovery of TIGIT inhibitors based on DEL and machine learning</article-title>. <source>Front. Chem.</source> <volume>10</volume>, <fpage>982539</fpage>. <pub-id pub-id-type="doi">10.3389/fchem.2022.982539</pub-id>
</citation>
</ref>
<ref id="B50">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname>
<given-names>Y.-J.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>K.-J.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>S.-W.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Cai</surname>
<given-names>C.-J.</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Deep learning for molecular generation</article-title>. <source>Future Med. Chem.</source> <volume>11</volume> (<issue>6</issue>), <fpage>567</fpage>&#x2013;<lpage>597</lpage>. <pub-id pub-id-type="doi">10.4155/fmc-2018-0358</pub-id>
</citation>
</ref>
<ref id="B51">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Fan</surname>
<given-names>X.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>In-solution direct oxidative coupling for the integration of sulfur/selenium into DNA-encoded chemical libraries</article-title>. <source>Chem. Sci.</source> <volume>13</volume> (<issue>9</issue>), <fpage>2604</fpage>&#x2013;<lpage>2613</lpage>. <pub-id pub-id-type="doi">10.1039/d1sc06268a</pub-id>
</citation>
</ref>
<ref id="B52">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>C.-H.</given-names>
</name>
<name>
<surname>Stone</surname>
<given-names>E. A.</given-names>
</name>
<name>
<surname>Deshmukh</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Ippolito</surname>
<given-names>J. A.</given-names>
</name>
<name>
<surname>Ghahremanpour</surname>
<given-names>M. M.</given-names>
</name>
<name>
<surname>Tirado-Rives</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Potent noncovalent inhibitors of the main protease of SARS-CoV -2 from molecular sculpting of the drug perampanel guided by free energy perturbation calculations</article-title>. <source>ACS Cent. Sci.</source> <volume>7</volume> (<issue>3</issue>), <fpage>467</fpage>&#x2013;<lpage>475</lpage>. <pub-id pub-id-type="doi">10.1021/acscentsci.1c00039</pub-id>
</citation>
</ref>
<ref id="B53">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Future challenges with DNA-encoded chemical libraries in the drug discovery domain</article-title>. <source>Expert Opin. Drug Discov.</source> <volume>14</volume> (<issue>8</issue>), <fpage>735</fpage>&#x2013;<lpage>753</lpage>. <pub-id pub-id-type="doi">10.1080/17460441.2019.1614559</pub-id>
</citation>
</ref>
<ref id="B54">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Zhong</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Reversible covalent headpiece enables interconversion between double&#x2010;and single&#x2010;stranded DNA&#x2010;encoded chemical libraries</article-title>. <source>Angew. Chem. Int. Ed. Engl.</source> <volume>134</volume> (<issue>7</issue>), <fpage>e202115157</fpage>. <pub-id pub-id-type="doi">10.1002/anie.202115157</pub-id>
</citation>
</ref>
<ref id="B55">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname>
<given-names>P.-L.</given-names>
</name>
<name>
<surname>Hoi</surname>
<given-names>S. C. H.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.-L.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Online transfer learning</article-title>. <source>Artif. Intell.</source> <volume>216</volume>, <fpage>76</fpage>&#x2013;<lpage>102</lpage>. <pub-id pub-id-type="doi">10.1016/j.artint.2014.06.003</pub-id>
</citation>
</ref>
<ref id="B56">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhavoronkov</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Ivanenkov</surname>
<given-names>Y. A.</given-names>
</name>
<name>
<surname>Aliper</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Veselov</surname>
<given-names>M. S.</given-names>
</name>
<name>
<surname>Aladinskiy</surname>
<given-names>V. A.</given-names>
</name>
<name>
<surname>Aladinskaya</surname>
<given-names>A. V.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Deep learning enables rapid identification of potent DDR1 kinase inhibitors</article-title>. <source>Nat. Biotechnol.</source> <volume>37</volume> (<issue>9</issue>), <fpage>1038</fpage>&#x2013;<lpage>1040</lpage>. <pub-id pub-id-type="doi">10.1038/s41587-019-0224-x</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>