<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Bioinform.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Bioinformatics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Bioinform.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2673-7647</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1628800</article-id>
<article-id pub-id-type="doi">10.3389/fbinf.2025.1628800</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Technology and Code</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>PharmacoForge: pharmacophore generation with diffusion models</article-title>
<alt-title alt-title-type="left-running-head">Flynn et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fbinf.2025.1628800">10.3389/fbinf.2025.1628800</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Flynn</surname>
<given-names>Emma L.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/3054397"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Shah&#x2009;</surname>
<given-names>Riya</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/3075338"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Dunn&#x2009;</surname>
<given-names>Ian</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/3092470"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Aggarwal&#x2009;</surname>
<given-names>Rishal</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/3178124"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Koes&#x2009;</surname>
<given-names>David Ryan</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1070468"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
</contrib>
</contrib-group>
<aff id="aff1">
<label>1</label>
<institution>Department of Computational and Systems Biology, University of Pittsburgh</institution>, <city>Pittsburgh</city>, <addr-line>PA</addr-line>, <country country="US">United States</country>
</aff>
<aff id="aff2">
<label>2</label>
<institution>School of Computer Science, Carnegie Mellon University</institution>, <city>Pittsburgh</city>, <addr-line>PA</addr-line>, <country country="US">United States</country>
</aff>
<author-notes>
<corresp id="c001">
<label>&#x2a;</label>Correspondence: David Ryan Koes&#x2009;, <email xlink:href="dkoes@pitt.edu">dkoes@pitt.edu</email>
</corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2025-09-08">
<day>08</day>
<month>09</month>
<year>2025</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2025</year>
</pub-date>
<volume>5</volume>
<elocation-id>1628800</elocation-id>
<history>
<date date-type="received">
<day>14</day>
<month>05</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>08</day>
<month>08</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2025 Flynn, Shah&#x2009;, Dunn&#x2009;, Aggarwal&#x2009; and Koes&#x2009;.</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Flynn, Shah&#x2009;, Dunn&#x2009;, Aggarwal&#x2009; and Koes&#x2009;</copyright-holder>
<license>
<ali:license_ref start_date="2025-09-08">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<p>Structure-based drug design (SBDD) is enhanced by machine learning (ML) to improve both virtual screening and <italic>de novo</italic> design. Despite advances in ML tools for both strategies, screening remains bounded by time and computational cost, while generative models frequently produce invalid and synthetically inaccessible molecules. Screening time can be improved with pharmacophore search, which quickly identifies ligands in a database that match a pharmacophore query. In this work, we introduce PharmacoForge, a diffusion model for generating 3D pharmacophores conditioned on a protein pocket. Generated pharmacophore queries identify ligands that are guaranteed to be valid, commercially available molecules. We evaluate PharmacoForge against automated pharmacophore generation methods using the LIT-PCBA benchmark and ligand generative models through a docking-based evaluation framework. We further assess pharmacophore quality through a retrospective screening of the DUD-E dataset. PharmacoForge surpasses other pharmacophore generation methods in the LIT-PCBA benchmark, and resulting ligands from pharmacophore queries performed similarly to <italic>de novo</italic> generated ligands when docking to DUD-E targets and had lower strain energies compared to <italic>de novo</italic> generated ligands.</p>
</abstract>
<kwd-group>
<kwd>structure-based drug discovery</kwd>
<kwd>pharmacophore</kwd>
<kwd>diffusion models</kwd>
<kwd>virtual screening</kwd>
<kwd>generative models</kwd>
<kwd>molecule generation</kwd>
<kwd>computational drug discovery</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declare that financial support was received for the research and/or publication of this article. This work is funded through R35GM140753 from the National Institute of General Medical Sciences. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institute of General Medical Sciences or the National Institutes of Health.</funding-statement>
</funding-group>
<counts>
<fig-count count="10"/>
<table-count count="3"/>
<equation-count count="7"/>
<ref-count count="37"/>
<page-count count="13"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-in-acceptance</meta-name>
<meta-value>Drug Discovery in Bioinformatics</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>Following identification of a disease-causing protein, rational drug discovery aims to design a ligand that binds to the protein target with high specificity and affinity to mitigate disease effects. Structure-based drug design (SBDD) seeks to identify or create a ligand using the molecular structure of a target protein pocket (<xref ref-type="bibr" rid="B2">Anderson, 2003</xref>). Computational methods are critical tools in modern SBDD campaigns.</p>
<p>SBDD campaigns are primarily composed of screening-based strategies. Screening-based strategies often involve testing numerous compounds to evaluate their binding to a target protein (<xref ref-type="bibr" rid="B14">Hughes et al., 2011</xref>; <xref ref-type="bibr" rid="B4">Blay et al., 2020</xref>). Screening is an inherently expensive process regardless of the particular screening method. Direct experimental measurement is exceptionally costly and therefore limited in the size of the chemical space that can be screened. As a result, the use of computational methods to estimate affinity, generally called virtual screening, has become a routine method in drug discovery campaigns over the last several decades <xref ref-type="bibr" rid="B26">Sadybekov and Katritch (2023)</xref>. Virtual screening methods can evaluate significantly larger chemical spaces than methods based on physical experimentation. Molecular docking, one of the most broadly used virtual screening methods, enables screening of millions or billions of compounds given substantial computing resources, but the screening process remains expensive and time-consuming (<xref ref-type="bibr" rid="B8">Gentile et al., 2020</xref>; <xref ref-type="bibr" rid="B9">2022</xref>).</p>
<p>Pharmacophore-based virtual screening is a resource-efficient alternative to molecular docking. Pharmacophore search can be done in sub-linear time, allowing the search of millions of compounds at speeds orders of magnitude faster than traditional virtual screening (<xref ref-type="bibr" rid="B19">Koes and Camacho, 2011</xref>; <xref ref-type="bibr" rid="B30">Sunseri and Koes, 2016</xref>). A pharmacophore query defines the essential interactions between the ligand and protein where they occur in the binding pocket (<xref ref-type="bibr" rid="B17">Kaserer et al., 2015</xref>; <xref ref-type="bibr" rid="B18">Koes, 2015</xref>). A molecule matches a pharmacophore query if a valid conformation of the molecule can be positioned such that the essential interactions occur in the correct position. Pharmacophores filter out molecules that do not match the pharmacophore query, which significantly decreases the number of molecules that need to be scored and ranked (<xref ref-type="bibr" rid="B10">Giordano et al., 2022</xref>).</p>
<p>The utility of pharmacophore screening results is heavily dependent on the quality of the pharmacophore. Manual pharmacophore design requires identification of potential interaction points in the binding pocket of the receptor either based on the receptor structure or a known reference ligand binding pose; software and automated frameworks have reduced the time and domain-knowledge barriers to improve pharmacophore elucidation processes (<xref ref-type="bibr" rid="B11">Heider et al., 2022</xref>). Current pharmacophore design techniques include software implementations, such as Pharmit and Pharmer, that identify interaction points between the protein pocket and a reference ligand and allow user customization of identified centers (<xref ref-type="bibr" rid="B30">Sunseri and Koes, 2016</xref>; <xref ref-type="bibr" rid="B19">Koes and Camacho, 2011</xref>). Apo2ph4, a framework for pharmacophore elucidation from receptor structure, is proven to perform well in retrospective virtual screening but requires intensive manual checks from a domain expert at each step (<xref ref-type="bibr" rid="B11">Heider et al., 2022</xref>). PharmRL, a reinforcement learning method for automated pharmacophore generation, speeds up pharmacophore generation relative to non-automated methods but struggles with generalization and requires training with positive and negative training examples for each protein system (<xref ref-type="bibr" rid="B1">Aggarwal and Koes, 2024</xref>). For drug discovery pipelines to fully realize the advantages of pharmacophore screening, user-friendly, automated, and generalizable methods for pharmacophore elucidation are needed.</p>
<p>
<italic>De novo</italic> molecule design creates new potential ligands from scratch; <italic>de novo</italic> methods often design ligands based on key structural features of the binding pocket. Techniques include fragment-based drug discovery, which docks smaller building blocks such as a ring structure or amine group into the protein binding pocket then connects them to form a reasonable ligand structure, and traditional methods, which typically involve a combinatorial search (<xref ref-type="bibr" rid="B3">Batool et al., 2019</xref>; <xref ref-type="bibr" rid="B6">Durrant et al., 2009</xref>).</p>
<p>Applications of generative models to molecule generation have given rise to models capable of predicting <italic>de novo</italic> ligand structures based on the protein pocket (<xref ref-type="bibr" rid="B13">Hoogeboom et al., 2022</xref>; <xref ref-type="bibr" rid="B27">Schneuing et al., 2022</xref>; <xref ref-type="bibr" rid="B23">Peng et al., 2022</xref>; <xref ref-type="bibr" rid="B24">Pinheiro et al., 2024</xref>; <xref ref-type="bibr" rid="B25">Ragoza et al., 2022</xref>; <xref ref-type="bibr" rid="B5">Dunn and Koes, 2024</xref>). <xref ref-type="bibr" rid="B13">Hoogeboom et al. (2022)</xref> initially proposed applying equivariant diffusion models to small organic molecules, and various other models, namely, auto-regression-based Pocket2Mol and diffusion-based DiffSBDD, followed to enable conditional generation for a given receptor pocket (<xref ref-type="bibr" rid="B27">Schneuing et al., 2022</xref>; <xref ref-type="bibr" rid="B23">Peng et al., 2022</xref>). However, several limitations preclude the practical use of these models. <italic>De novo</italic> models that directly condition on 3D structure and assemble individual atoms in a pocket often produce unrealistic or synthetically inaccessible molecules.</p>
<p>We propose circumventing the shortcomings of both virtual screening and <italic>de novo</italic> design methods by leveraging generative modeling to design pharmacophores for a given protein pocket. We introduce PharmacoForge, a diffusion model capable of rapidly generating pharmacophore candidates of any desired size conditioned on a protein pocket of interest. Screening with generated pharmacophores results in matching ligands that are guaranteed to be valid and commercially available. We evaluate generated pharmacophores by both enrichment factor, measuring the ability to identify an enriched subset of active compounds in a database, and docking score of top hits following virtual screening.</p>
</sec>
<sec id="s2">
<label>2</label>
<title>Background</title>
<sec id="s2-1">
<label>2.1</label>
<title>Pharmacophores</title>
<p>A pharmacophore is a set of points <inline-formula id="inf46">
<mml:math id="m46">
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>V</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> that represents areas of interactions between a protein and a ligand. <xref ref-type="fig" rid="F1">Figure 1</xref> shows a reference ligand and its pharmacophore. The areas of interaction are commonly referred to as pharmacophore centers; a pharmacophore&#x2019;s size is its number of centers. Each pharmacophore center has an associated position <inline-formula id="inf47">
<mml:math id="m47">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and feature type <inline-formula id="inf48">
<mml:math id="m48">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>Z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> {Hydrogen Acceptor, Hydrogen Donor, Hydrophobic, Aromatic, Negative Ion, and Positive Ion}. Collectively, the centers define the spatial and feature constraints that a molecule needs in order to interact with its protein target.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Reference pharmacophore as identified by Pharmit for a ligand binding to AmpC-<inline-formula id="inf49">
<mml:math id="m49">
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>-lactamase (PDB 1L2S). Sphere colors correspond to feature type; Blue: Positive Ion, Green: Hydrophobic, Orange: Hydrogen Acceptor, Red: Negative Ion, Purple: Aromatic, White: Hydrogen Donor.</p>
</caption>
<graphic xlink:href="fbinf-05-1628800-g001.tif">
<alt-text content-type="machine-generated">Chemical structure visualization with color-coded features. Orange spheres indicate hydrogen acceptors, blue spheres represent positive ions, and red spheres depict negative ions. Green areas highlight hydrophobic regions, while purple signifies aromatic regions. The feature key assists in interpretation.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s2-2">
<label>2.2</label>
<title>Denoising diffusion probabilistic models</title>
<p>Denoising diffusion probabilistic models (DDPMs) use a Markov process to apply Gaussian random noise to a sample in a noising process and then train a neural network to iteratively denoise the sample (<xref ref-type="bibr" rid="B28">Sohl-Dickstein et al., 2015</xref>; <xref ref-type="bibr" rid="B12">Ho et al., 2020</xref>). New samples may be drawn from the target distribution by initializing a process from random noise and then iteratively denoising back to a clean sample with the trained model. The noising process can be described by the equation:<disp-formula id="equ7">
<mml:math id="m50">
<mml:mrow>
<mml:mi>q</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="script">N</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mi mathvariant="bold">I</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
<p>Where <inline-formula id="inf50">
<mml:math id="m51">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the data sample, <inline-formula id="inf51">
<mml:math id="m52">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the fully noised sample, <inline-formula id="inf52">
<mml:math id="m53">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> controls the fraction of original signal maintained, and <inline-formula id="inf53">
<mml:math id="m54">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> defines the amount of noise added to the data sample at each time step.</p>
</sec>
<sec id="s2-3">
<label>2.3</label>
<title>Equivariant diffusion models for molecules</title>
<p>
<xref ref-type="bibr" rid="B13">Hoogeboom et al. (2022)</xref> adapted the <xref ref-type="bibr" rid="B28">Sohl-Dickstein et al. (2015)</xref> framework for generative tasks with molecules. Unlike previous applications of diffusion models for image generation, generating molecules introduces the added requirement of E (3)-equivariance. A function is considered equivariant for a group if when the function is applied to both the group, <inline-formula id="inf54">
<mml:math id="m55">
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, and a transformed group, <inline-formula id="inf55">
<mml:math id="m56">
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>G</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula id="inf56">
<mml:math id="m57">
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> represents a transformation, <inline-formula id="inf57">
<mml:math id="m58">
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is equal to <inline-formula id="inf58">
<mml:math id="m59">
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. For molecules, Euclidean group E (3) transformations (reflection, rotation, and translation) are relevant as molecules retain their identity regardless of any of these transformations; thus models generating molecules must be E (3)-equivariant. <xref ref-type="bibr" rid="B16">Jing et al. (2020)</xref> introduced the geometric vector perceptron architecture (GVP), another E (3)-equivariant neural network, as an alternative to standard graph neural networks (GNNs). The standard multi-layer perceptron feed-forward layer in GNNs is replaced by a GVP layer in GVP-GNNs. Unlike GNNs, GVP-GNNs split nodes into scalar and vector channels, adding a directional component which allows more expressive modeling of molecular geometries.</p>
</sec>
</sec>
<sec id="s3">
<label>3</label>
<title>Related work</title>
<sec id="s3-1">
<label>3.1</label>
<title>Automated pharmacophore generation</title>
<p>Previously proposed automated workflows to generate pharmacophores for a protein pocket, namely Apo2ph4 and PharmRL, apply different computational techniques to create new pharmacophores for virtual screening (<xref ref-type="bibr" rid="B11">Heider et al., 2022</xref>; <xref ref-type="bibr" rid="B1">Aggarwal and Koes, 2024</xref>). Apo2ph4 primarily relies on fragment docking. The Apo2ph4 workflow identifies a protein pocket based on a provided ligand center of mass or user-specified coordinates then docks 1456 lead-like molecular fragments into the pocket. The docked fragments are filtered to include only those with a docking energy below 2 kcal/mol; a maximum of two poses is kept per successfully docked fragment. Following fragment docking, each selected fragment pose is converted into a pharmacophore; a single pharmacophore is created from the fragment pharmacophores by scoring each center based on proximity to other pharmacophore centers of the same type. Clustering and filtering of proximal centers results in the final pharmacophore for the pocket (<xref ref-type="bibr" rid="B11">Heider et al., 2022</xref>).</p>
<p>PharmRL is a reinforcement learning-based pharmacophore generation method which first identifies potential pharmacophore features in a protein pocket by passing a voxelized pocket representation through a CNN. The CNN outputs all possible pharmacophore feature types that an area of the binding pocket may support. The CNN-identified pharmacophore features form the starting point for a deep-Q learning algorithm which iteratively optimizes the features to maximize a reward function and generate a single pharmacophore for the pocket (<xref ref-type="bibr" rid="B1">Aggarwal and Koes, 2024</xref>).</p>
</sec>
<sec id="s3-2">
<label>3.2</label>
<title>Conditional molecular generation</title>
<p>Equivariant diffusion models are capable of generating full ligands unconditionally with no specified properties or conditionally to fit a specific protein pocket. <xref ref-type="bibr" rid="B27">Schneuing et al. (2022)</xref> extended the framework of <xref ref-type="bibr" rid="B13">Hoogeboom et al. (2022)</xref> to conditional molecule generation with DiffSBDD, an EGNN-based model for molecule generation conditioned on a given protein pocket. Adding protein pocket atoms to the sample graph allows for ligand generation in the context of the protein pocket. Prior to diffusion models&#x2019; use in molecular generation, <xref ref-type="bibr" rid="B23">Peng et al. (2022)</xref> applied auto-regressive models to the molecule generation task with Pocket2Mol. Pocket2Mol uses an encoder and predictors for molecule coordinates and features.</p>
</sec>
</sec>
<sec sec-type="methods" id="s4">
<label>4</label>
<title>Methods</title>
<sec id="s4-1">
<label>4.1</label>
<title>Model implementation details</title>
<p>We trained PharmacoForge using the CrossDocked2020 dataset, which consists of over 18,000 complexes with 22.5 million docked ligand poses (<xref ref-type="bibr" rid="B7">Francoeur et al., 2020</xref>). Pharmit is used to identify the interaction pharmacophore centers of a reference protein and ligand complex and build the training dataset of reference pharmacophores and proteins (<xref ref-type="bibr" rid="B30">Sunseri and Koes, 2016</xref>). We augment the training dataset by randomly subsampling the ground truth pharmacophore centers provided by Pharmit; a minimum of three and a maximum of eight centers are selected from the pharmacophore. The model is trained with the Adam optimizer at a learning rate of <inline-formula id="inf59">
<mml:math id="m60">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mi>e</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>4</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> for a total of 80 epochs with a batch size of 24.</p>
</sec>
<sec id="s4-2">
<label>4.2</label>
<title>Building the pocket-pharmacophore graph</title>
<p>We represent the protein pocket and pharmacophore as a heterogeneous graph consisting of protein and pharmacophore nodes. A subsampling of the reference pharmacophore centers are added to the graph. We identify the k-nearest neighboring protein atoms to the pharmacophore centers and add those to the final pharmacophore-protein graph. The pharmacophore nodes are fully connected while the protein nodes are connected only to neighboring pharmacophore or protein nodes. <xref ref-type="fig" rid="F2">Figure 2</xref> illustrates the receptor pocket-pharmacophore graph construction.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>An outline of the protein-pharmacophore graph construction and training process used in PharmacoForge. Proteins and ligands from the CrossDocked dataset are passed through Pharmit to identify the interaction pharmacophore (1). The pharmacophore centers (2) and nearest protein atoms form the protein-pharmacophore graph (3), which then is iteratively noised to train the learned denoising process (4). Created with BioRender.</p>
</caption>
<graphic xlink:href="fbinf-05-1628800-g002.tif">
<alt-text content-type="machine-generated">A diagram illustrates a multi-step molecular process involving four stages. 1: Molecules enter a system labeled &#x22;Pharmit.&#x22; 2: Molecules are altered with different colored nodes. 3: A structure with intermixed colored nodes forms. 4: A connected network with pink and gray nodes undergoes diffusion and learned denoising processes, indicated by bidirectional arrows and labeled with &#x22;t&#x3d;0...T.&#x22;</alt-text>
</graphic>
</fig>
<p>Each protein or pharmacophore node has a 3D position represented as <inline-formula id="inf61">
<mml:math id="m62">
<mml:mrow>
<mml:mi>X</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>; pharmacophore nodes have a feature type <inline-formula id="inf62">
<mml:math id="m63">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>6</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> while protein nodes have an atom type <inline-formula id="inf63">
<mml:math id="m64">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula id="inf64">
<mml:math id="m65">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the number of atom types. Feature types and atom types are both encoded as one-hot vectors.</p>
</sec>
<sec id="s4-3">
<label>4.3</label>
<title>Diffusion and denoising</title>
<p>We perform an equivariant diffusion process over the heterogeneous graph, noising only the pharmacophore nodes. The protein-pharmacophore graph is embedded into continuous space and then passed through multiple GVP-GNN convolution layers which are used to parameterize the noising process. More details on the GVP-GNN convolutions can be found in the Model Architecture section of <xref ref-type="sec" rid="s13">Supplementary Material</xref>. The variance-preserving noising process follows that of <xref ref-type="bibr" rid="B13">Hoogeboom et al. (2022)</xref> (<xref ref-type="bibr" rid="B29">Song et al., 2020</xref>; <xref ref-type="bibr" rid="B27">Schneuing et al., 2022</xref>):<disp-formula id="equ1">
<mml:math id="m66">
<mml:mrow>
<mml:mi>q</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">data</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="script">N</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">data</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mi mathvariant="bold">I</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
<p>Where <inline-formula id="inf65">
<mml:math id="m67">
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is set by a predefined polynomial noise schedule. Both the feature and coordinate vector for each pharmacophore are noised by the same process.</p>
<p>The result of the GVP-GNN convolution layers is passed through a GVP network which predicts the noise added to the sample and obtains the clean coordinates and feature type prediction for each pharmacophore node (<xref ref-type="bibr" rid="B16">Jing et al., 2020</xref>). The denoising process also follows that of&#x2003;<xref ref-type="bibr" rid="B13">Hoogeboom et al. (2022)</xref> and is learned by optimizing a mean squared error (MSE) loss, where the added noise is predicted and a clean sample is generated by removing the predicted noise.<disp-formula id="equ2">
<mml:math id="m68">
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:mrow>
<mml:mover>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mover>
</mml:mrow>
</mml:mstyle>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>&#x3f5;</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3f5;</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</disp-formula>where <inline-formula id="inf66">
<mml:math id="m69">
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>&#x3f5;</mml:mi>
</mml:mrow>
<mml:mo>&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula> represents the predicted noise and <inline-formula id="inf67">
<mml:math id="m70">
<mml:mrow>
<mml:mi>&#x3f5;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> represents the true noise. The trained GVP network learns the denoising process and becomes capable of generating clean pharmacophore samples from noise.</p>
</sec>
<sec id="s4-4">
<label>4.4</label>
<title>Generating pharmacophores</title>
<p>To generate a new pharmacophore conditioned on a protein pocket, we construct the protein portion of the graph from the pocket atoms; the pocket is identified either through a reference ligand or a list of residues that make up the pocket. We initialize a user-specified number of pharmacophore nodes with random feature vectors and random coordinates near the center of the binding pocket. Using the trained GVP model, we then denoise to get predicted pharmacophore center coordinates and feature types. <xref ref-type="fig" rid="F3">Figure 3</xref> depicts the pharmacophore generation process from randomly initialized centers to a pharmacophore. The clean pharmacophore is provided to the user as feature types with associated 3D coordinates that can be converted into a pharmacophore query for the protein pocket.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Generating a pharmacophore with four centers for a binding pocket of AmpC-<inline-formula id="inf60">
<mml:math id="m61">
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>-lactamase (PDB 1L2S); the pharmacophore is generated over 1000 diffusion time steps (t). Sphere colors correspond to feature type; Blue: Positive Ion, Orange: Hydrogen Acceptor, Red: Negative Ion, Purple: Aromatic, White: Hydrogen Donor.</p>
</caption>
<graphic xlink:href="fbinf-05-1628800-g003.tif">
<alt-text content-type="machine-generated">Illustration of pharmacophore generation showing the denoising of centers in five stages, labeled t&#x3d;0, t&#x3d;250, t&#x3d;500, t&#x3d;750, and t&#x3d;1000. Molecules are depicted as colored spheres in a complex, multicolored environment of reds, blues, and whites, indicating changes over time.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s4-5">
<label>4.5</label>
<title>Evaluation metrics</title>
<p>Pharmacophores are difficult to evaluate on inherent value as their utility comes from how accurately and effectively they filter active compounds from a large database. A pharmacophore may correctly identify an area or areas for potential interaction in a protein pocket while still failing to filter enough non-binding ligands to be faster than traditional virtual screening. To assess the accuracy of interactions identified in PharmacoForge during training, we compute pharmacophore validity, which we define as the fraction of centers within a threshold distance of a complementary feature in the binding pocket. The thresholds are based on the interaction type and can be found in the Pharmacophore Validity section of <xref ref-type="sec" rid="s13">Supplementary Material</xref>.</p>
<p>To determine pharmacophore efficacy, we evaluate PharmacoForge-generated pharmacophores based on their ability to find active binders of target proteins and to identify compounds in a pharmacophore search with docking scores competitive with state-of-the-art <italic>de novo</italic> generative models. We assess the generated pharmacophores with both the LIT-PCBA and the Directory of Useful Decoys, Enhanced (DUD-E) benchmarks (<xref ref-type="bibr" rid="B22">Mysinger et al., 2012</xref>; <xref ref-type="bibr" rid="B32">Tran-Nguyen et al., 2020</xref>). LIT-PCBA contains 15 protein targets with active and decoy compounds for each target. An active compound is a known binder to the target protein; in the LIT-PCBA dataset, all decoys are confirmed inactive compounds for the target (<xref ref-type="bibr" rid="B32">Tran-Nguyen et al., 2020</xref>). The DUD-E dataset consists of 102 protein targets and corresponding active and decoy compounds for each target. Decoys included in the DUD-E databases are presumed non-binding compounds (<xref ref-type="bibr" rid="B22">Mysinger et al., 2012</xref>).</p>
<p>We query each protein target database with pharmacophores generated for the target and evaluate the results on enrichment factor (EF) and F1 score. EF measures the fraction of active compounds in the total pool of ligands identified relative to the fraction of active compounds present in the queried database.</p>
<p>
<disp-formula id="equ3">
<mml:math id="m71">
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mi>F</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mtext>Fraction&#x2009;of&#x2009;actives&#x2009;in&#x2009;query&#x2009;results</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mtext>Fraction&#x2009;of&#x2009;actives&#x2009;in&#x2009;database</mml:mtext>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>An EF score of one indicates the pharmacophore query result is equal to selecting ligands from the database at random; above a one indicates the query result contains an enriched subset of actives.</p>
<p>An F1 score is the geometric mean of precision and recall and considers true positives (actives identified by the query), false positives (decoys in the query result), and false negatives (actives present in the database not included in the query result) (<xref ref-type="bibr" rid="B1">Aggarwal and Koes, 2024</xref>). Precision and recall are used to calculate the F1 score as shown below:<disp-formula id="equ4">
<mml:math id="m72">
<mml:mrow>
<mml:mtext>Precision</mml:mtext>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mtext>TP</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mtext>TP</mml:mtext>
<mml:mo>&#x2b;</mml:mo>
<mml:mtext>FP</mml:mtext>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="equ5">
<mml:math id="m73">
<mml:mrow>
<mml:mtext>Recall</mml:mtext>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mtext>TP</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mtext>TP</mml:mtext>
<mml:mo>&#x2b;</mml:mo>
<mml:mtext>FN</mml:mtext>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="equ6">
<mml:math id="m74">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>1</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mo>&#x2217;</mml:mo>
<mml:mtext>Precision</mml:mtext>
<mml:mo>&#x2217;</mml:mo>
<mml:mtext>Recall</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mtext>Precision</mml:mtext>
<mml:mo>&#x2b;</mml:mo>
<mml:mtext>Recall</mml:mtext>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>where TP (true positive) is an active in the database that appears in the query result, FP (false positive) is a decoy that appears in the query result, and FN (false negative) is an active in the database that does not appear in the query result.</p>
<p>We also assess pharmacophores by their performance in a pharmacophore search. We seek to identify ligands with a high binding affinity for the DUD-E target proteins by querying a large chemical database of potential ligands. To evaluate the ligands, we perform minimization and docking using GNINA (<xref ref-type="bibr" rid="B21">McNutt et al., 2021</xref>), a fork of the AutoDock Vina (<xref ref-type="bibr" rid="B33">Trott and Olson, 2010</xref>) docking software that uses a convolutional neural network to score protein-ligand interactions. Minimization finds the local optimal pose for a ligand bound to a protein, while docking searches for the global optimal bound pose.</p>
<p>A ligand&#x2019;s binding affinity for the protein is gauged by multiple scores provided by GNINA: Vina score, CNN affinity, and CNN VS score. The Vina score is the predicted affinity of the ligand for the protein in kJ/mol. CNN affinity is the affinity score predicted by GNINA using a CNN. The CNN VS score is the CNN affinity multiplied by the CNN score, which predicts the pose probability, and represents the affinity of the ligand in the pose as well as the likelihood that pose would occur (<xref ref-type="bibr" rid="B31">Sunseri and Koes, 2021</xref>). We compare our pharmacophore query results to <italic>de novo</italic> generated ligands across all GNINA scores after selecting the top ligands based on Vina affinity score.</p>
</sec>
</sec>
<sec sec-type="results" id="s5">
<label>5</label>
<title>Results</title>
<sec id="s5-1">
<label>5.1</label>
<title>Comparison to other pharmacophore generation methods</title>
<p>We compare PharmacoForge to two automated pharmacophore generation methods, PharmRL and Apo2ph4, using the LIT-PCBA baseline. Apo2ph4 reported screening results for pharmacophores generated for the 15 LIT-PCBA targets, selecting 20 PDBs of the full dataset. PharmRL created pharmacophores and performed screening for the same target PDBs. Because screening with Apo2ph4 pharmacophores was originally conducted with proprietary software, <xref ref-type="bibr" rid="B1">Aggarwal and Koes (2024)</xref> used the pharmacophores provided by Apo2ph4 and screened instead with Pharmit.</p>
<p>To compare with Apo2ph4 and PharmRL, we generated five pharmacophores of each size 3&#x2013;8 centers for a total of 30 pharmacophores. We then screened with Pharmit using receptor exclusion to be consistent with the other two methods; receptor exclusion ensures that identified ligands in the pharmacophore search do not overlap with the protein receptor. We compared the results for each method as reported in PharmRL, selecting the best performing pharmacophore based on highest F1 score and breaking any ties with the greatest EF score; similarly for PharmacoForge, the pharmacophore for comparison was selected based on top F1 score with EF score deciding any ties. We report the EF and F1 scores of all methods in <xref ref-type="fig" rid="F4">Figures 4</xref>, <xref ref-type="fig" rid="F5">5</xref>. PharmacoForge-generated pharmacophores perform best in screening based on F1 scores for 12 out of 18 targets while PharmRL has the top score for five targets and Apo2ph4 for one; when comparing by EF, PharmacoForge achieves the best result for 13 out of 18, PharmRL for four, and Apo2ph4 for one.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Log EF results for the LIT-PCBA benchmark for PharmacoForge, PharmRL, and Apo2ph4. Each bar represents the highest Log EF achieved by a query based on each method for each target in the benchmark.</p>
</caption>
<graphic xlink:href="fbinf-05-1628800-g004.tif">
<alt-text content-type="machine-generated">Bar chart titled &#x22;Best Log EF by Method Across Targets&#x22; showing Log EF values for various targets using three methods: PharmacoForge (purple), PharmRL (orange), and Apo2ph4 (green). Targets are displayed on the x-axis, while Log EF is on the y-axis. PharmacoForge shows higher values for several targets, particularly GBA:3rik. PharmRL and Apo2ph4 vary across targets. Data is categorized under &#x22;Target: PDB System.&#x22;</alt-text>
</graphic>
</fig>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>F1 results for the LIT-PCBA benchmark for PharmacoForge, PharmRL, and Apo2ph4. Each bar represents the highest F1 achieved by a query based on each method for each target in the benchmark.</p>
</caption>
<graphic xlink:href="fbinf-05-1628800-g005.tif">
<alt-text content-type="machine-generated">Bar chart showing F1 scores by method across various PDB system targets. Methods include PharmacoForge (purple), PharmRL (orange), and Apo2ph4 (green). PharmRL has the highest score in ESR1_ago:2q9, while PharmacoForge shows strong performance across most targets. Targets include GBA:3rik, ADRB2:4lde, and ALDH1:5ac2, among others.</alt-text>
</graphic>
</fig>
<p>PharmacoForge creates at least one pharmacophore with an EF above one for all targets in LIT-PCBA, which is not true of either PharmRL or Apo2ph4; furthermore, PharmacoForge queries achieve an F1 score above zero at least once for all targets. The performance on both metrics demonstrates that PharmacoForge generates informative pharmacophores that perform comparatively well in a pharmacophore search.</p>
</sec>
<sec id="s5-2">
<label>5.2</label>
<title>Identifying active compounds with generated pharmacophores</title>
<p>For this benchmark we sought to identify an enriched subset of active compounds from the DUD-E target databases using a generated pharmacophore query. We first generate pharmacophores with 3&#x2013;8 centers for each target conditioned on the reference receptor PDB provided by DUD-E; we sample five pharmacophores of each size for a total of 30 pharmacophores per target. We then query a database of DUD-E ligands for each target containing both actives and decoys and calculate the EF and F1 score of each query result. We perform the database queries using Pharmit, which identifies ligands in the database that match the pharmacophore centers. Pharmit generates 25 conformers per ligand in the database to compare the pharmacophore against, but we limit the number of conformers returned in the query results to one per molecule. For a molecule to match a pharmacophore query, the conformer pose must contain interaction features that align to within <inline-formula id="inf68">
<mml:math id="m75">
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>&#xc5; RMSD of all pharmacophore centers. We compare the generated pharmacophores&#x2019; performance to a reference set of pharmacophores constructed by randomly subsampling the reference pharmacophore for each target. The reference pharmacophore centers capture ground truth interaction features between the ligand and protein and offer an approximation of how well a pharmacophore may perform as a query; however, not all reference ligands provided by DUD-E are included as actives in the target database. The reference ligand is only included as an active for 19 out of 102 targets, so a reference pharmacophore may not necessarily match an active compound despite containing ground truth interactions that would allow a ligand to bind. The reference pharmacophore set represents an informative basis for comparison as it contains accurate interaction points for the protein that have the potential to match multiple active scaffolds but is still limited in the diversity of ligand scaffolds that match. A reference pharmacophore is found by Pharmit based on the reference protein and ligand PDB and SDF files provided by DUD-E. To obtain a reference set of 30 pharmacophores, we created five pharmacophores of each size 3&#x2013;8 centers by randomly selecting the desired number of centers. We again queried the DUD-E target databases with the reference pharmacophores using Pharmit and computed the EF and F1 scores of each query result.</p>
<p>The full resulting EF and F1 scores of both generated and reference pharmacophores are shown in Additional DUD-E Screening Benchmark Results section of <xref ref-type="sec" rid="s13">Supplementary Material</xref>. Pharmacophore queries that returned no results (undefined EF) are excluded from the EF analysis. 1584 reference and 1229 generated pharmacophore queries returned no results, representing 52% and 40% of total queries, respectively. Reference pharmacophores failed to find actives in 63% of queries; generated pharmacophore queries returned no actives in 58% of queries. We compare the average and maximum EF and F1 scores of reference and generated pharmacophores across all pharmacophore queries that returned this result is shown in <xref ref-type="fig" rid="F6">Figures 6, 7</xref>.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Comparison of the maximum EF scores for each target of 30 pharmacophores generated by PharmacoForge and 30 pharmacophores subsampled from the reference ligand provided by DUD-E.</p>
</caption>
<graphic xlink:href="fbinf-05-1628800-g006.tif">
<alt-text content-type="machine-generated">Bar chart titled &#x22;Max Enrichment Factors (EF) of Screen Results by Target&#x22; with purple bars representing PharmacoForge and diamond markers for Reference. The x-axis shows various targets, and the y-axis shows Log EF ranging from 0.00 to 1.75.</alt-text>
</graphic>
</fig>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>Comparison of the maximum F1 scores for each target of 30 pharmacophores generated by PharmacoForge and 30 pharmacophores subsampled from the reference ligand provided by DUD-E.</p>
</caption>
<graphic xlink:href="fbinf-05-1628800-g007.tif">
<alt-text content-type="machine-generated">Bar chart titled &#x22;Max F1 Score of Screen Results by Target&#x22; compares F1 Scores for different targets. Purple bars represent PharmacoForge scores, while black diamonds denote reference scores. Targets are labeled on the x-axis, and F1 Scores range from zero to point eight on the y-axis.</alt-text>
</graphic>
</fig>
<p>
<xref ref-type="table" rid="T1">Table 1</xref> reports the number of targets for which the generated or reference pharmacophores achieved a higher mean or maximum EF or F1 score. The generated pharmacophores achieved an average EF greater than that of the reference pharmacophores on 18 out of 102 targets and had a maximum EF greater than or equal to the reference maximum EF on 37 out of 102 targets. Outperforming the reference pharmacophores for some targets demonstrates that the generated pharmacophores are informative and capable of finding enriched subsets of active compounds for a target. The generated pharmacophores often outperform the reference pharmacophores on average F1 score and perform similarly on maximum F1 score. The improvement in F1 scores for generated pharmacophores over reference results from a higher average recall on 95 out of 102 targets.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Number of targets for which Generated or Reference pharmacophores have the best result for each metric; Equal indicates when the Generated and Reference pharmacophores had the same value for the metric on a target.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left"/>
<th align="center">Avg EF</th>
<th align="center">Max EF</th>
<th align="center">Avg F1</th>
<th align="center">Max F1</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Generated</td>
<td align="center">18</td>
<td align="center">20</td>
<td align="center">71</td>
<td align="center">46</td>
</tr>
<tr>
<td align="left">Reference</td>
<td align="center">84</td>
<td align="center">65</td>
<td align="center">31</td>
<td align="center">56</td>
</tr>
<tr>
<td align="left">Equal</td>
<td align="center">0</td>
<td align="center">17</td>
<td align="center">0</td>
<td align="center">0</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s5-3">
<label>5.3</label>
<title>Pharmacophore-matching ligand comparison with <italic>de novo</italic> ligand methods</title>
<p>Calculating the EF and F1 scores of pharmacophore query results provides useful metrics of how well the pharmacophore matches known active ligands, but a pharmacophore may still identify useful interactions while not matching previously identified congeneric series of actives. To further evaluate the quality of the compounds selected through our generated pharmacophores, we use docking scores as a proxy for binding affinity. We evaluated the results of a pharmacophore query by minimizing and docking the result ligands to the target protein and comparing the predicted affinity scores. A more negative affinity value suggests that the ligand is an active binder of the target. We minimized and docked the filtered query results to their respective target proteins using GNINA (<xref ref-type="bibr" rid="B21">McNutt et al., 2021</xref>).</p>
<p>For this analysis, we sought to identify hit molecules for the DUD-E targets and screened the CHEMBL database, which we downsampled from two million compounds to 200,000 compounds. Using our previously generated 30 pharmacophores for each target, we queried CHEMBL and selected pharmacophores with query results of 2000 or fewer molecules; this cutoff represents 1% of the queried database size and was used to eliminate pharmacophores lacking specificity of results. This amounted to 1175 pharmacophore queries remaining. Each returned hit is minimized with GNINA to find the local optimal solution most aligned with the pharmacophore. We also docked the query results with GNINA for an approximation of global optimal pose of the identified compound and direct comparison with our random baseline. We then identified the top 100 ligands based on Vina affinity score for each target to compare against other methods.</p>
<sec id="s5-3-1">
<label>5.3.1</label>
<title>Baselines</title>
<p>We compared PharmacoForge-identified CHEMBL compounds with ligands generated by two <italic>de novo</italic> ligand generative models, Pocket2Mol and DiffSBDD. These models were chosen based on their high performance relative to other <italic>de novo</italic> ligand generative models and availability of a trained model (<xref ref-type="bibr" rid="B27">Schneuing et al., 2022</xref>; <xref ref-type="bibr" rid="B23">Peng et al., 2022</xref>). As a further baseline, we also randomly selected 10,000 molecules from CHEMBL and docked those to each DUD-E target for comparison.</p>
<p>For each generative model, we used the default settings of each model to sample 1000 ligands for each target; for some targets, DiffSBDD and Pocket2Mol were unable to generate 1000 unique ligands, but at least 850 ligands were generated for all targets. We then minimized and docked each ligand to its corresponding receptor. We again selected the best 100 ligands based on Vina affinity score for each target and compared to the affinity scores for our pharmacophore query result ligands.</p>
</sec>
<sec id="s5-3-2">
<label>5.3.2</label>
<title>Ligand strain energies</title>
<p>Further analysis and visualization of all ligand results revealed that <italic>de novo</italic> generated ligands were sometimes wedged in the target protein pocket in highly strained poses. To evaluate the strain of both pharmacophore queried ligands and <italic>de novo</italic> generated ligands, we calculated the total energy of each molecule before and after geometry optimization using the Universal Force Field (UFF) as implemented in RDKit (<xref ref-type="bibr" rid="B20">Landrum et al., 2024</xref>). The strain is quantified as the energy difference between the unoptimized and optimized structures. Pharmacophore queried ligands exhibited a median strain value of 0.05 kcal/mol, with only small energy reductions observed upon optimization. In comparison, DiffSBDD and Pocket2Mol had median strain energies of 295.7 kcal/mol and 351.5 kcal/mol, respectively. The difference in average strain energies of each set of ligands is visualized in <xref ref-type="fig" rid="F8">Figure 8</xref>. The orders of magnitude difference in strain energies between methods indicates that PharmacoForge retrieves commercially available molecules in realistic conformations, improving on an existing problem with <italic>de novo</italic> 3D molecule generative methods.</p>
<fig id="F8" position="float">
<label>FIGURE 8</label>
<caption>
<p>Comparison of average strain energies for ligands in minimized poses. Average strain energy shown on log scale.</p>
</caption>
<graphic xlink:href="fbinf-05-1628800-g008.tif">
<alt-text content-type="machine-generated">Bar chart titled &#x22;Average Ligand Strain Energy by Model&#x22; showing three models: Pharmacoforge with a value slightly above 5, DiffSBDD with the highest value over 20, and Pocket2Mol around 7. Horizontal axis represents the logarithm of average strain energy.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s5-3-3">
<label>5.3.3</label>
<title>Ligand minimization with GNINA</title>
<p>The results of minimizing the de-strained molecules are shown in <xref ref-type="fig" rid="F9">Figure 9</xref> and reported in <xref ref-type="table" rid="T2">Table 2</xref>. Ligands identified with pharmacophore search from generated pharmacophores have similar predicted affinity for the target proteins as generated ligands. The strained predicted affinities for the best 100 ligands are similar between PharmacoForge and DiffSBDD results; Pocket2Mol compounds have the best average affinity for the targets. After de-straining, compounds from both DiffSBDD and Pocket2Mol see decreased affinity for the targets as indicated by a larger Vina score, with Vina scores of DiffSBDD ligands increasing by 5.1 kcal/mol and Pocket2Mol by 3.2 kcal/mol. The Vina score for PharmacoForge increases by just 0.37 kcal/mol by comparison, and the de-strained results have the highest affinity for the targets. PharmacoForge is less impacted by the issue of highly strained ligands seen with purely generative models. PharmacoForge also has a narrower distribution compared to the predicted affinity range of both DiffSBDD and Pocket2Mol, with a standard deviation for de-strained predicted affinity of 2.13 compared to 5.15 and 2.56, respectively; PharmacoForge achieves more consistent results across targets.</p>
<fig id="F9" position="float">
<label>FIGURE 9</label>
<caption>
<p>Distribution of Vina docking scores from minimization, which evaluates generated poses at a locally optimum configuration close to the generated pose. Results shown for the top 100 ligands for each DUD-E target. Original pose Vina scores on top half of each violin plot with de-strained Vina scores on the bottom half.</p>
</caption>
<graphic xlink:href="fbinf-05-1628800-g009.tif">
<alt-text content-type="machine-generated">Violin plot comparing minimization affinity scores for PharmacoForge, DiffSBBD, and Pocket2Mol. Vina affinity scores on the x-axis range from negative twenty to zero kcal/mol, with PharmacoForge, DiffSBBD, and Pocket2Mol represented in purple, teal, and yellow, respectively.</alt-text>
</graphic>
</fig>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Minimization mean scores comparison between original and de-strained poses with standard error.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left"/>
<th align="left">Model</th>
<th align="left">Affinity (Vina) <inline-formula id="inf1">
<mml:math id="m1">
<mml:mrow>
<mml:mi>&#x2193;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th align="left">CNN affinity <inline-formula id="inf2">
<mml:math id="m2">
<mml:mrow>
<mml:mi>&#x2191;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th align="left">CNN VS score <inline-formula id="inf3">
<mml:math id="m3">
<mml:mrow>
<mml:mi>&#x2191;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="3" align="left">Original Poses</td>
<td align="left">Pocket2Mol</td>
<td align="left">
<inline-formula id="inf4">
<mml:math id="m4">
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi mathvariant="bold">11.69</mml:mi>
<mml:mi mathvariant="bold">&#xb1;</mml:mi>
<mml:mi mathvariant="bold">0.03</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">
<inline-formula id="inf5">
<mml:math id="m5">
<mml:mrow>
<mml:mn>7.12</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.01</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">
<inline-formula id="inf6">
<mml:math id="m6">
<mml:mrow>
<mml:mn>3.92</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.03</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="left">DiffSBDD</td>
<td align="left">
<inline-formula id="inf7">
<mml:math id="m7">
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>10.06</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.02</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">
<inline-formula id="inf8">
<mml:math id="m8">
<mml:mrow>
<mml:mi mathvariant="bold">7.27</mml:mi>
<mml:mi mathvariant="bold">&#xb1;</mml:mi>
<mml:mi mathvariant="bold">0.01</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">
<inline-formula id="inf9">
<mml:math id="m9">
<mml:mrow>
<mml:mi mathvariant="bold">4.14</mml:mi>
<mml:mi mathvariant="bold">&#xb1;</mml:mi>
<mml:mi mathvariant="bold">0.02</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="left">
<bold>PharmacoForge</bold>
</td>
<td align="left">
<inline-formula id="inf10">
<mml:math id="m10">
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>9.38</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.01</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">
<inline-formula id="inf11">
<mml:math id="m11">
<mml:mrow>
<mml:mn>7.05</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.01</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">
<inline-formula id="inf12">
<mml:math id="m12">
<mml:mrow>
<mml:mn>2.95</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.02</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td rowspan="3" align="left">Destrain Poses</td>
<td align="left">Pocket2Mol</td>
<td align="left">
<inline-formula id="inf13">
<mml:math id="m13">
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>8.45</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.05</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">
<inline-formula id="inf14">
<mml:math id="m14">
<mml:mrow>
<mml:mi mathvariant="bold">7.35</mml:mi>
<mml:mi mathvariant="bold">&#xb1;</mml:mi>
<mml:mi mathvariant="bold">0.01</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">
<inline-formula id="inf15">
<mml:math id="m15">
<mml:mrow>
<mml:mn>2.06</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.02</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="left">DiffSBDD</td>
<td align="left">
<inline-formula id="inf16">
<mml:math id="m16">
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>4.94</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.05</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">
<inline-formula id="inf17">
<mml:math id="m17">
<mml:mrow>
<mml:mn>6.34</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.01</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">
<inline-formula id="inf18">
<mml:math id="m18">
<mml:mrow>
<mml:mn>2.19</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.02</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="left">
<bold>PharmacoForge</bold>
</td>
<td align="left">
<inline-formula id="inf19">
<mml:math id="m19">
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi mathvariant="bold">9.01</mml:mi>
<mml:mi mathvariant="bold">&#xb1;</mml:mi>
<mml:mi mathvariant="bold">0.02</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">
<inline-formula id="inf20">
<mml:math id="m20">
<mml:mrow>
<mml:mn>6.99</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.01</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">
<inline-formula id="inf21">
<mml:math id="m21">
<mml:mrow>
<mml:mi mathvariant="bold">2.82</mml:mi>
<mml:mi mathvariant="bold">&#xb1;</mml:mi>
<mml:mi mathvariant="bold">0.02</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Best score for each column is listed in bold.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s5-3-4">
<label>5.3.4</label>
<title>Ligand docking with GNINA</title>
<p>The docking result shown in <xref ref-type="fig" rid="F10">Figure 10</xref> and <xref ref-type="table" rid="T3">Table 3</xref> includes a random baseline of CHEMBL compounds for comparison; the randomly selected compounds were kept in their original pose and not de-strained. All methods surpass the random baseline for both original and de-strained poses by at least &#x2212;5.5 kcal/mol. As also seen in minimization results, PharmacoForge and DiffSBDD perform comparably while Pocket2Mol ligands have the greatest affinity for the target proteins. De-straining of the ligands results in an increased Vina score, indicating a decrease in predicted affinity for DiffSBDD and Pocket2Mol, while de-straining PharmacoForge found ligands leads to a slight improvement in predicted affinity. The Vina score increases for DiffSBDD and Pocket2Mol by 3.2 kcal/mol and 1.1 kcal/mol, respectively. The ligands identified through pharmacophore search have high predicted affinity for their targets while maintaining a natural pose, and de-straining of the pose does not lead to a loss of affinity for the target as seen in generated ligands.</p>
<fig id="F10" position="float">
<label>FIGURE 10</label>
<caption>
<p>Distribution of predicted binding affinity from docking, which does not use the initial generated pose and so results can be meaningfully compared to a random sample of compounds. Results shown for the top 100 ligands for each target. Original pose Vina scores on top half of each violin plot with de-strained Vina scores on the bottom half. Random baseline includes only original scores.</p>
</caption>
<graphic xlink:href="fbinf-05-1628800-g010.tif">
<alt-text content-type="machine-generated">Violin plot comparing docking affinity scores of Pharmacoforge, DiffSBDD, Pocket2Mol, and Random methods. The horizontal axis shows Vina affinity scores in kilocalories per mole ranging from negative twenty to zero. Each method's distribution is represented with a different color: purple, green, yellow, and blue, respectively.</alt-text>
</graphic>
</fig>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Docking mean scores comparison between original and de-strained poses with standard error.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left"/>
<th align="left">Model</th>
<th align="left">Affinity (Vina) <inline-formula id="inf22">
<mml:math id="m22">
<mml:mrow>
<mml:mi>&#x2193;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th align="left">CNN affinity <inline-formula id="inf23">
<mml:math id="m23">
<mml:mrow>
<mml:mi>&#x2191;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th align="left">CNN VS score <inline-formula id="inf24">
<mml:math id="m24">
<mml:mrow>
<mml:mi>&#x2191;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="4" align="left">Original Poses</td>
<td align="left">Random</td>
<td align="left">
<inline-formula id="inf25">
<mml:math id="m25">
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>5.08</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.01</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">
<inline-formula id="inf26">
<mml:math id="m26">
<mml:mrow>
<mml:mn>6.70</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.00</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">
<inline-formula id="inf27">
<mml:math id="m27">
<mml:mrow>
<mml:mn>3.63</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.0</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="left">Pocket2Mol</td>
<td align="left">
<inline-formula id="inf28">
<mml:math id="m28">
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi mathvariant="bold">11.73</mml:mi>
<mml:mi mathvariant="bold">&#xb1;</mml:mi>
<mml:mi mathvariant="bold">0.03</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">
<inline-formula id="inf29">
<mml:math id="m29">
<mml:mrow>
<mml:mn>8.01</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.01</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">
<inline-formula id="inf30">
<mml:math id="m30">
<mml:mrow>
<mml:mn>6.66</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.02</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="left">DiffSBDD</td>
<td align="left">
<inline-formula id="inf31">
<mml:math id="m31">
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>10.83</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.02</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">
<inline-formula id="inf32">
<mml:math id="m32">
<mml:mrow>
<mml:mi mathvariant="bold">8.55</mml:mi>
<mml:mi mathvariant="bold">&#xb1;</mml:mi>
<mml:mi mathvariant="bold">0.02</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">
<inline-formula id="inf33">
<mml:math id="m33">
<mml:mrow>
<mml:mn>6.66</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.01</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="left">
<bold>PharmacoForge</bold>
</td>
<td align="left">
<inline-formula id="inf34">
<mml:math id="m34">
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>10.39</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.01</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">
<inline-formula id="inf35">
<mml:math id="m35">
<mml:mrow>
<mml:mn>8.40</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.12</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">
<inline-formula id="inf36">
<mml:math id="m36">
<mml:mrow>
<mml:mi mathvariant="bold">8.30</mml:mi>
<mml:mi mathvariant="bold">&#xb1;</mml:mi>
<mml:mi mathvariant="bold">0.09</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td rowspan="3" align="left">Destrain Poses</td>
<td align="left">Pocket2Mol</td>
<td align="left">
<inline-formula id="inf37">
<mml:math id="m37">
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>10.59</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.03</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">
<inline-formula id="inf38">
<mml:math id="m38">
<mml:mrow>
<mml:mn>7.71</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.02</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">
<inline-formula id="inf39">
<mml:math id="m39">
<mml:mrow>
<mml:mn>3.68</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.04</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="left">DiffSBDD</td>
<td align="left">
<inline-formula id="inf40">
<mml:math id="m40">
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>7.65</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.03</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">
<inline-formula id="inf41">
<mml:math id="m41">
<mml:mrow>
<mml:mn>6.80</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.03</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">
<inline-formula id="inf42">
<mml:math id="m42">
<mml:mrow>
<mml:mn>4.06</mml:mn>
<mml:mo>&#xb1;</mml:mo>
<mml:mn>0.03</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="left">
<bold>PharmacoForge</bold>
</td>
<td align="left">
<inline-formula id="inf43">
<mml:math id="m43">
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi mathvariant="bold">10.49</mml:mi>
<mml:mi mathvariant="bold">&#xb1;</mml:mi>
<mml:mi mathvariant="bold">0.02</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">
<inline-formula id="inf44">
<mml:math id="m44">
<mml:mrow>
<mml:mi mathvariant="bold">7.73</mml:mi>
<mml:mi mathvariant="bold">&#xb1;</mml:mi>
<mml:mi mathvariant="bold">0.02</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">
<inline-formula id="inf45">
<mml:math id="m45">
<mml:mrow>
<mml:mi mathvariant="bold">4.98</mml:mi>
<mml:mi mathvariant="bold">&#xb1;</mml:mi>
<mml:mi mathvariant="bold">0.04</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Best score for each column is listed in bold.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
</sec>
</sec>
<sec sec-type="conclusion" id="s6">
<label>6</label>
<title>Conclusion</title>
<p>In this work, we presented PharmacoForge, which generates novel pharmacophores conditioned on a protein pocket. Generating pharmacophores leverages the power of generative modeling to create a structural description of the desired molecules that can be used to rapidly screen libraries of valid, commercially available, synthetically accessible molecules.</p>
<p>Our pharmacophore screening results surpass existing automated pharmacophore generation methods and are comparable with existing methods for <italic>de novo</italic> ligand generation without suffering from high strain. Further additions to predict directionality for relevant pharmacophore features as well as learned model-determined pharmacophore size may improve the screening performance.</p>
<p>Automated pharmacophore elucidation eliminates barriers to further adoption of pharmacophore screening in drug discovery campaigns to allow for accelerated screening of large chemical databases. The interpretability of pharmacophores enables human-in-the-loop discovery where experts work with generative models to ultimately uncover commercially available leads for drug discovery. Recent work in generative models for <italic>de novo</italic> ligand design create new ligands based on a pharmacophore, which has led to improvements in validity and target affinity for generated ligands (<xref ref-type="bibr" rid="B37">Ziv et al., 2025</xref>; <xref ref-type="bibr" rid="B36">Zhu et al., 2023</xref>; <xref ref-type="bibr" rid="B34">Wang and Rajapakse, 2024</xref>; <xref ref-type="bibr" rid="B35">Wang et al., 2022</xref>; <xref ref-type="bibr" rid="B15">Imrie et al., 2021</xref>). Accurately identifying key ligand-protein interactions in the binding pocket allows for better informed ligand generation but pharmacophore-based ligand generative models require a high-quality pharmacophore to be effective. Automating pharmacophore generation can directly complement these efforts by enabling a fully-automated ligand generation pipeline that produces higher quality ligands than current ligand generative models. Automated pharmacophore generation has immediate value as an aid to existing virtual screening pipelines and holds promise as an important step in future ligand generative models.</p>
<p>PharmacoForge is available for use as a Google Colaboratory notebook here, and the full model implementation and open source training code are available at <ext-link ext-link-type="uri" xlink:href="https://github.com/eflynn8/pharmacophore-diffusion">https://github.com/eflynn8/pharmacophore-diffusion</ext-link>.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s7">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="sec" rid="s13">Supplementary Material</xref>, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec sec-type="author-contributions" id="s8">
<title>Author contributions</title>
<p>EF: Methodology, Project administration, Investigation, Writing &#x2013; review and editing, Validation, Visualization, Formal Analysis, Writing &#x2013; original draft, Software, Data curation. RS: Visualization, Formal Analysis, Software, Writing &#x2013; original draft, Writing &#x2013; review and editing. ID: Data curation, Project administration, Writing &#x2013; original draft, Conceptualization, Investigation, Writing &#x2013; review and editing, Methodology, Software. RA: Writing &#x2013; review and editing, Methodology, Software, Writing &#x2013; original draft, Investigation. DK: Methodology, Writing &#x2013; review and editing, Supervision, Resources, Funding acquisition, Writing &#x2013; original draft, Project administration.</p>
</sec>
<ack>
<title>Acknowledgements</title>
<p>The authors thank the full Koes lab for valuable discussions throughout method development.</p>
</ack>
<sec sec-type="COI-statement" id="s10">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest. All authors read and approved the final manuscript.</p>
</sec>
<sec sec-type="ai-statement" id="s11">
<title>Generative AI statement</title>
<p>The author(s) declare that no Generative AI was used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="s12">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec sec-type="supplementary-material" id="s13">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fbinf.2025.1628800/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fbinf.2025.1628800/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="DataSheet1.pdf" id="SM1" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<fn-group>
<fn fn-type="custom" custom-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1041416/overview">Garrett M. Morris</ext-link>, University of Oxford, United Kingdom</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/467807/overview">Anchala Kumari</ext-link>, Jawaharlal Nehru University, India</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3056583/overview">Fergus Imrie</ext-link>, University of Oxford, United Kingdom</p>
</fn>
</fn-group>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Aggarwal</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Koes</surname>
<given-names>D. R.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>PharmRL: pharmacophore elucidation with deep geometric reinforcement learning</article-title>. <source>BMC Biol.</source> <volume>22</volume>, <fpage>301</fpage>. <pub-id pub-id-type="doi">10.1186/s12915-024-02096-5</pub-id>
<pub-id pub-id-type="pmid">39736736</pub-id>
</mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Anderson</surname>
<given-names>A. C.</given-names>
</name>
</person-group> (<year>2003</year>). <article-title>The process of structure-based drug design</article-title>. <source>Chem. and Biol.</source> <volume>10</volume>, <fpage>787</fpage>&#x2013;<lpage>797</lpage>. <pub-id pub-id-type="doi">10.1016/j.chembiol.2003.09.002</pub-id>
<pub-id pub-id-type="pmid">14522049</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Batool</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Ahmad</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Choi</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>A structure-based drug discovery paradigm</article-title>. <source>Int. J. Mol. Sci.</source> <volume>20</volume>, <fpage>2783</fpage>. <pub-id pub-id-type="doi">10.3390/ijms20112783</pub-id>
<pub-id pub-id-type="pmid">31174387</pub-id>
</mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Blay</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Tolani</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Ho</surname>
<given-names>S. P.</given-names>
</name>
<name>
<surname>Arkin</surname>
<given-names>M. R.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>High-throughput screening: today&#x2019;s biochemical and cell-based approaches</article-title>. <source>Drug Discov. Today</source> <volume>25</volume>, <fpage>1807</fpage>&#x2013;<lpage>1821</lpage>. <pub-id pub-id-type="doi">10.1016/j.drudis.2020.07.024</pub-id>
<pub-id pub-id-type="pmid">32801051</pub-id>
</mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dunn</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Koes</surname>
<given-names>D. R.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Accelerating inference in molecular diffusion models with latent representations of protein structure</article-title>. <source>arXiv Prepr. arXiv:2311.13466</source>. <pub-id pub-id-type="doi">10.48550/arXiv.2311.13466</pub-id>
</mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Durrant</surname>
<given-names>J. D.</given-names>
</name>
<name>
<surname>Amaro</surname>
<given-names>R. E.</given-names>
</name>
<name>
<surname>McCammon</surname>
<given-names>J. A.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>AutoGrow: a novel algorithm for protein inhibitor design</article-title>. <source>Chem. Biol. and Drug Des.</source> <volume>73</volume>, <fpage>168</fpage>&#x2013;<lpage>178</lpage>. <pub-id pub-id-type="doi">10.1111/j.1747-0285.2008.00761.x</pub-id>
<pub-id pub-id-type="pmid">19207419</pub-id>
</mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Francoeur</surname>
<given-names>P. G.</given-names>
</name>
<name>
<surname>Masuda</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Sunseri</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Jia</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Iovanisci</surname>
<given-names>R. B.</given-names>
</name>
<name>
<surname>Snyder</surname>
<given-names>I.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Three-dimensional convolutional neural networks and a cross-docked data set for structure-based drug design</article-title>. <source>J. Chem. Inf. Model.</source> <volume>60</volume>, <fpage>4200</fpage>&#x2013;<lpage>4215</lpage>. <pub-id pub-id-type="doi">10.1021/acs.jcim.0c00411</pub-id>
<pub-id pub-id-type="pmid">32865404</pub-id>
</mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gentile</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Agrawal</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Hsing</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Ton</surname>
<given-names>A.-T.</given-names>
</name>
<name>
<surname>Ban</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Norinder</surname>
<given-names>U.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Deep docking: a deep learning platform for augmentation of structure based drug discovery</article-title>. <source>ACS Central Sci.</source> <volume>6</volume>, <fpage>939</fpage>&#x2013;<lpage>949</lpage>. <pub-id pub-id-type="doi">10.1021/acscentsci.0c00229</pub-id>
<pub-id pub-id-type="pmid">32607441</pub-id>
</mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gentile</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Yaacoub</surname>
<given-names>J. C.</given-names>
</name>
<name>
<surname>Gleave</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Fernandez</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Ton</surname>
<given-names>A.-T.</given-names>
</name>
<name>
<surname>Ban</surname>
<given-names>F.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Artificial intelligence&#x2013;enabled virtual screening of ultra-large chemical libraries with deep docking</article-title>. <source>Nat. Protoc.</source> <volume>17</volume>, <fpage>672</fpage>&#x2013;<lpage>697</lpage>. <pub-id pub-id-type="doi">10.1038/s41596-021-00659-2</pub-id>
<pub-id pub-id-type="pmid">35121854</pub-id>
</mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Giordano</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Biancaniello</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Argenio</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Facchiano</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Drug design by pharmacophore and virtual screening approach</article-title>. <source>Pharmaceuticals</source> <volume>15</volume>, <fpage>646</fpage>. <pub-id pub-id-type="doi">10.3390/ph15050646</pub-id>
<pub-id pub-id-type="pmid">35631472</pub-id>
</mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Heider</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Kilian</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Garifulina</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Hering</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Langer</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Seidel</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Apo2ph4: a versatile workflow for the generation of receptor-based pharmacophore models for virtual screening</article-title>. <source>J. Chem. Inf. Model.</source> <volume>63</volume>, <fpage>101</fpage>&#x2013;<lpage>110</lpage>. <pub-id pub-id-type="doi">10.1021/acs.jcim.2c00814</pub-id>
<pub-id pub-id-type="pmid">36526584</pub-id>
</mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ho</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Jain</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Abbeel</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Denoising diffusion probabilistic models</article-title>. <source>Adv. Neural Inf. Process. Syst.</source> <volume>33</volume>, <fpage>6840</fpage>&#x2013;<lpage>6851</lpage>. <pub-id pub-id-type="doi">10.48550/arXiv.2006.11239</pub-id>
</mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Hoogeboom</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Satorras</surname>
<given-names>V. G.</given-names>
</name>
<name>
<surname>Vignac</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Welling</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2022</year>). &#x201c;<article-title>Equivariant diffusion for molecule generation in 3D</article-title>,&#x201d; in <source>International conference on machine learning</source> (<publisher-loc>Baltimore, MD</publisher-loc>: <publisher-name>Lille, France: PMLR</publisher-name>), <fpage>8867</fpage>&#x2013;<lpage>8887</lpage>. <pub-id pub-id-type="doi">10.48550/arXiv.2203.17003</pub-id>
</mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hughes</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Rees</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Kalindjian</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Philpott</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Principles of early drug discovery</article-title>. <source>Br. J. Pharmacol.</source> <volume>162</volume>, <fpage>1239</fpage>&#x2013;<lpage>1249</lpage>. <pub-id pub-id-type="doi">10.1111/j.1476-5381.2010.01127.x</pub-id>
<pub-id pub-id-type="pmid">21091654</pub-id>
</mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Imrie</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Hadfield</surname>
<given-names>T. E.</given-names>
</name>
<name>
<surname>Bradley</surname>
<given-names>A. R.</given-names>
</name>
<name>
<surname>Deane</surname>
<given-names>C. M.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Deep generative design with 3D pharmacophoric constraints</article-title>. <source>Chem. Sci.</source> <volume>12</volume>, <fpage>14577</fpage>&#x2013;<lpage>14589</lpage>. <pub-id pub-id-type="doi">10.1039/D1SC02436A</pub-id>
<pub-id pub-id-type="pmid">34881010</pub-id>
</mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Jing</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Eismann</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Suriana</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Townshend</surname>
<given-names>R. J. L.</given-names>
</name>
<name>
<surname>Dror</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Learning from protein structure with geometric vector perceptrons</article-title>,&#x201d; in <source>International conference on learning representations</source>. <pub-id pub-id-type="doi">10.48550/arXiv.2009.01411</pub-id>
</mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kaserer</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Beck</surname>
<given-names>K. R.</given-names>
</name>
<name>
<surname>Akram</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Odermatt</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Schuster</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Pharmacophore models and pharmacophore-based virtual screening: concepts and applications exemplified on hydroxysteroid dehydrogenases</article-title>. <source>Molecules</source> <volume>20</volume>, <fpage>22799</fpage>&#x2013;<lpage>22832</lpage>. <pub-id pub-id-type="doi">10.3390/molecules201219880</pub-id>
<pub-id pub-id-type="pmid">26703541</pub-id>
</mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Koes</surname>
<given-names>D. R.</given-names>
</name>
</person-group> (<year>2015</year>). &#x201c;<article-title>Pharmacophore modeling: methods and applications</article-title>,&#x201d; in <source>Computer-aided drug discovery</source>. Editor <person-group person-group-type="editor">
<name>
<surname>Zhang</surname>
<given-names>W.</given-names>
</name>
</person-group> (<publisher-name>Springer</publisher-name>), <fpage>167</fpage>&#x2013;<lpage>188</lpage>. <pub-id pub-id-type="doi">10.1007/7653_2015_46</pub-id>
</mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Koes</surname>
<given-names>D. R.</given-names>
</name>
<name>
<surname>Camacho</surname>
<given-names>C. J.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Pharmer: efficient and exact pharmacophore search</article-title>. <source>J. Chem. Inf. Model.</source> <volume>51</volume>, <fpage>1307</fpage>&#x2013;<lpage>1314</lpage>. <pub-id pub-id-type="doi">10.1021/ci200097m</pub-id>
<pub-id pub-id-type="pmid">21604800</pub-id>
</mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="web">
<person-group person-group-type="author">
<name>
<surname>Landrum</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>RDKit: open-source cheminformatics</article-title>. <comment>Available online at: <ext-link ext-link-type="uri" xlink:href="https://www.rdkit.org">https://www.rdkit.org</ext-link>.</comment>
</mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>McNutt</surname>
<given-names>A. T.</given-names>
</name>
<name>
<surname>Francoeur</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Aggarwal</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Masuda</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Meli</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Ragoza</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>GNINA 1.0: molecular docking with deep learning</article-title>. <source>J. Cheminformatics</source> <volume>13</volume>, <fpage>43</fpage>. <pub-id pub-id-type="doi">10.1186/s13321-021-00522-2</pub-id>
<pub-id pub-id-type="pmid">34108002</pub-id>
</mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mysinger</surname>
<given-names>M. M.</given-names>
</name>
<name>
<surname>Carchia</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Irwin</surname>
<given-names>J. J.</given-names>
</name>
<name>
<surname>Shoichet</surname>
<given-names>B. K.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Directory of useful decoys, enhanced (DUD-E): better ligands and decoys for better benchmarking</article-title>. <source>J. Med. Chem.</source> <volume>55</volume>, <fpage>6582</fpage>&#x2013;<lpage>6594</lpage>. <pub-id pub-id-type="doi">10.1021/jm300687e</pub-id>
<pub-id pub-id-type="pmid">22716043</pub-id>
</mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Peng</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Guan</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Xie</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Peng</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2022</year>). &#x201c;<article-title>Pocket2Mol: efficient molecular sampling based on 3D protein pockets</article-title>,&#x201d; in <source>International conference on machine learning</source> (<publisher-loc>Baltimore, MD</publisher-loc>: <publisher-name>PMLR</publisher-name>), <fpage>17644</fpage>&#x2013;<lpage>17655</lpage>. <pub-id pub-id-type="doi">10.48550/arXiv.2205.07249</pub-id>
</mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pinheiro</surname>
<given-names>P. O.</given-names>
</name>
<name>
<surname>Jamasb</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Mahmood</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Sresht</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Saremi</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Structure-based drug design by denoising voxel grids</article-title>. <source>arXiv Prepr. arXiv:2405.03961</source>. <pub-id pub-id-type="doi">10.48550/arXiv.2405.03961</pub-id>
</mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ragoza</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Masuda</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Koes</surname>
<given-names>D. R.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Generating 3D molecules conditional on receptor binding sites with deep generative models</article-title>. <source>Chem. Sci.</source> <volume>13</volume>, <fpage>2701</fpage>&#x2013;<lpage>2713</lpage>. <pub-id pub-id-type="doi">10.1039/D1SC05976A</pub-id>
<pub-id pub-id-type="pmid">35356675</pub-id>
</mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sadybekov</surname>
<given-names>A. V.</given-names>
</name>
<name>
<surname>Katritch</surname>
<given-names>V.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Computational approaches streamlining drug discovery</article-title>. <source>Nature</source> <volume>616</volume>, <fpage>673</fpage>&#x2013;<lpage>685</lpage>. <pub-id pub-id-type="doi">10.1038/s41586-023-05905-z</pub-id>
<pub-id pub-id-type="pmid">37100941</pub-id>
</mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schneuing</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Du</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Harris</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Jamasb</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Igashov</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Du</surname>
<given-names>W.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Structure-based drug design with equivariant diffusion models</article-title>. <comment>arXiv preprint arXiv:2210.13695</comment>. <pub-id pub-id-type="doi">10.48550/arXiv.2210.13695</pub-id>
</mixed-citation>
</ref>
<ref id="B28">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Sohl-Dickstein</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Weiss</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Maheswaranathan</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Ganguli</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2015</year>). &#x201c;<article-title>Deep unsupervised learning using nonequilibrium thermodynamics</article-title>,&#x201d; in <source>International conference on machine learning</source> (<publisher-name>Lille, France: PMLR</publisher-name>), <fpage>2256</fpage>&#x2013;<lpage>2265</lpage>. <pub-id pub-id-type="doi">10.48550/arXiv.1503.03585</pub-id>
</mixed-citation>
</ref>
<ref id="B29">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Song</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Sohl-Dickstein</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Kingma</surname>
<given-names>D. P.</given-names>
</name>
<name>
<surname>Kumar</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Ermon</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Poole</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Score-based generative modeling through stochastic differential equations</article-title>,&#x201d; in <source>International conference on learning representations</source>. <pub-id pub-id-type="doi">10.48550/arXiv.2011.13456</pub-id>
</mixed-citation>
</ref>
<ref id="B30">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sunseri</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Koes</surname>
<given-names>D. R.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Pharmit: interactive exploration of chemical space</article-title>. <source>Nucleic Acids Res.</source> <volume>44</volume>, <fpage>W442</fpage>&#x2013;<lpage>W448</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkw287</pub-id>
<pub-id pub-id-type="pmid">27095195</pub-id>
</mixed-citation>
</ref>
<ref id="B31">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sunseri</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Koes</surname>
<given-names>D. R.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Virtual screening with GNINA 1.0</article-title>. <source>Molecules</source> <volume>26</volume>, <fpage>7369</fpage>. <pub-id pub-id-type="doi">10.3390/molecules26237369</pub-id>
<pub-id pub-id-type="pmid">34885952</pub-id>
</mixed-citation>
</ref>
<ref id="B32">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tran-Nguyen</surname>
<given-names>V.-K.</given-names>
</name>
<name>
<surname>Jacquenard</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Rognan</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>LIT-PCBA: an unbiased data set for machine learning and virtual screening</article-title>. <source>J. Chem. Inf. Model.</source> <volume>60</volume>, <fpage>4263</fpage>&#x2013;<lpage>4273</lpage>. <pub-id pub-id-type="doi">10.1021/acs.jcim.0c00155</pub-id>
<pub-id pub-id-type="pmid">32282202</pub-id>
</mixed-citation>
</ref>
<ref id="B33">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Trott</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Olson</surname>
<given-names>A. J.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>AutoDock vina: improving the speed and accuracy of docking with a new scoring function, efficient optimization, and multithreading</article-title>. <source>J. Comput. Chem.</source> <volume>31</volume>, <fpage>455</fpage>&#x2013;<lpage>461</lpage>. <pub-id pub-id-type="doi">10.1002/jcc.21334</pub-id>
<pub-id pub-id-type="pmid">19499576</pub-id>
</mixed-citation>
</ref>
<ref id="B34">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Rajapakse</surname>
<given-names>J. C.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Pharmacophore-guided <italic>de novo</italic> drug design with diffusion bridge</article-title>. <source>arXiv Prepr. arXiv:2412.19812</source>. <pub-id pub-id-type="doi">10.48550/arXiv.2412.19812</pub-id>
</mixed-citation>
</ref>
<ref id="B35">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Hsieh</surname>
<given-names>C.-Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Weng</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>RELATION: a deep generative model for structure-based <italic>de novo</italic> drug design</article-title>. <source>J. Med. Chem.</source> <volume>65</volume>, <fpage>9478</fpage>&#x2013;<lpage>9492</lpage>. <pub-id pub-id-type="doi">10.1021/acs.jmedchem.2c00732</pub-id>
<pub-id pub-id-type="pmid">35713420</pub-id>
</mixed-citation>
</ref>
<ref id="B36">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Cao</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>A pharmacophore-guided deep learning approach for bioactive molecular generation</article-title>. <source>Nat. Commun.</source> <volume>14</volume>, <fpage>6234</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-023-41454-9</pub-id>
<pub-id pub-id-type="pmid">37803000</pub-id>
</mixed-citation>
</ref>
<ref id="B37">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ziv</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Imrie</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Marsden</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Deane</surname>
<given-names>C. M.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>MolSnapper: conditioning diffusion for structure-based drug design</article-title>. <source>J. Chem. Inf. Model.</source> <volume>65</volume>, <fpage>4263</fpage>&#x2013;<lpage>4273</lpage>. <pub-id pub-id-type="doi">10.1021/acs.jcim.4c02008</pub-id>
<pub-id pub-id-type="pmid">40248896</pub-id>
</mixed-citation>
</ref>
</ref-list>
</back>
</article>