<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" dtd-version="1.3" article-type="brief-report">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Microbiol.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Microbiology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Microbiol.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">1664-302X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fmicb.2025.1734561</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Perspective</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Without safeguards, AI-Biology integration risks accelerating future pandemics</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes" equal-contrib="yes">
<name><surname>Wang</surname> <given-names>Dianzhuo</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<xref ref-type="author-notes" rid="fn001"><sup>&#x02020;</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
<uri xlink:href="https://loop.frontiersin.org/people/2931253"/>
</contrib>
<contrib contrib-type="author" equal-contrib="yes">
<name><surname>Huot</surname> <given-names>Marian</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn001"><sup>&#x02020;</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
<uri xlink:href="https://loop.frontiersin.org/people/3260322"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Zhang</surname> <given-names>Zechen</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Jiang</surname> <given-names>Kaiyi</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Shakhnovich</surname> <given-names>Eugene I.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Esvelt</surname> <given-names>Kevin M.</given-names></name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>Department of Chemistry and Chemical Biology, Harvard University</institution>, <city>Cambridge, MA</city>, <country country="us">United States</country></aff>
<aff id="aff2"><label>2</label><institution>Laboratory of Physics, Ecole Normale Sup&#x000E9;rieure and PSL Research</institution>, <city>Paris</city>, <country country="fr">France</country></aff>
<aff id="aff3"><label>3</label><institution>Department of Physics and Center for Brain Science, Harvard University</institution>, <city>Cambridge, MA</city>, <country country="us">United States</country></aff>
<aff id="aff4"><label>4</label><institution>Omenn-Darling Bioengineering Institute, Princeton University</institution>, <city>Princeton, NJ</city>, <country country="us">United States</country></aff>
<aff id="aff5"><label>5</label><institution>Media Lab, Massachusetts Institute of Technology</institution>, <city>Cambridge, MA</city>, <country country="us">United States</country></aff>
<author-notes>
<corresp id="c001"><label>&#x0002A;</label>Correspondence: Dianzhuo Wang, <email xlink:href="mailto:johnwang@g.harvard.edu">johnwang@g.harvard.edu</email>; Eugene I. Shakhnovich, <email xlink:href="mailto:shakhnovich@chemistry.harvard.edu">shakhnovich@chemistry.harvard.edu</email>; Kevin M. Esvelt, <email xlink:href="mailto:esvelt@mit.edu">esvelt@mit.edu</email></corresp>
<fn fn-type="equal" id="fn001"><label>&#x02020;</label><p>These authors have contributed equally to this work</p></fn></author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-01-22">
<day>22</day>
<month>01</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2025</year>
</pub-date>
<volume>16</volume>
<elocation-id>1734561</elocation-id>
<history>
<date date-type="received">
<day>28</day>
<month>10</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>20</day>
<month>11</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>26</day>
<month>11</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2026 Wang, Huot, Zhang, Jiang, Shakhnovich and Esvelt.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Wang, Huot, Zhang, Jiang, Shakhnovich and Esvelt</copyright-holder>
<license>
<ali:license_ref start_date="2026-01-22">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<p>Artificial intelligence now shapes the design of biological matter. Protein language models (pLMs), trained on millions of natural sequences, can predict, generate, and optimize functional proteins with minimal human input. When embedded in experimental pipelines, these systems enable closed-loop biological design at unprecedented speed. The same convergence that accelerates vaccine and therapeutic discovery, however, also creates new dual-use risks. We first map recent progress in using pLMs for fitness optimization across proteins, then critically assess how these approaches have been applied to viral evolution and how they intersect with laboratory workflows, including active learning and automation. Building on this analysis, we outline a capability-oriented framework for integrated AI&#x02013;biology systems, identify evaluation challenges specific to biological outputs, and propose research directions for training- and inference-time safeguards.</p></abstract>
<kwd-group>
<kwd>protein language models</kwd>
<kwd>intelligent automated biology</kwd>
<kwd>dual use research of concern (DURC)</kwd>
<kwd>biosecurity</kwd>
<kwd>protein design</kwd>
</kwd-group>
<funding-group>
 <funding-statement>The author(s) declared that financial support was received for this work and/or its publication. ES was sponsored by National Institutes of Health (R35GM139571).</funding-statement>
</funding-group>
<counts>
<fig-count count="4"/>
<table-count count="2"/>
<equation-count count="1"/>
<ref-count count="77"/>
<page-count count="9"/>
<word-count count="7251"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Systems Microbiology</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="s1">
<label>1</label>
<title>Introduction: the new biosecurity frontier in AI</title>
<p>Artificial intelligence (AI) is transforming the practice of biological discovery. Among the most powerful tools driving this change are protein language models (pLMs)&#x02014;large models trained on vast collections of natural protein sequences (<xref ref-type="bibr" rid="B71">Xiao et al., 2025</xref>). These tools offer unprecedented speed and scope for understanding biological systems, predicting properties like protein fitness (<xref ref-type="bibr" rid="B8">Chen et al., 2023</xref>; <xref ref-type="bibr" rid="B41">Meier et al., 2021</xref>; <xref ref-type="bibr" rid="B65">Vieira et al., 2024</xref>; <xref ref-type="bibr" rid="B77">Zhang et al., 2024</xref>), and even generating novel proteins entirely (<xref ref-type="bibr" rid="B51">Ruffolo and Madani, 2024</xref>; <xref ref-type="bibr" rid="B24">Hsu et al., 2022</xref>). In the recent fight against SARS-CoV-2 pandemic, pLMs help predicting viral fitness (<xref ref-type="bibr" rid="B66">Wang et al., 2024</xref>; <xref ref-type="bibr" rid="B75">Yu et al., 2025</xref>; Ito et al., <xref ref-type="bibr" rid="B28">2024</xref>) and immune escape (<xref ref-type="bibr" rid="B67">Wang et al., 2023</xref>), accelerate the development of vaccines and therapeutics (<xref ref-type="bibr" rid="B22">Hie et al., 2024</xref>; <xref ref-type="bibr" rid="B30">Jiang et al., 2024</xref>; <xref ref-type="bibr" rid="B56">Shanker et al., 2024</xref>) and anticipate viral evolution (<xref ref-type="bibr" rid="B26">Huot et al., 2025b</xref>,<xref ref-type="bibr" rid="B27">c</xref>).</p>
<p>However, the true transformative power, and potential risks, emerge not from pLMs in isolation, but from their integration with wet lab platforms and active learning close feedback loops. This convergence, which we term <italic>Intelligent Automated Biology</italic> (IAB), couples model-guided sequence design with robotic synthesis and experimental feedback, creating a high-throughput loop that iteratively refines biological function. Such systems promise major advances in therapeutic discovery, enzyme design, and pandemic preparedness. Yet they also reshape the landscape of biosecurity by enabling optimization of viral traits or other high-risk functions with diminishing human oversight.</p>
<p>Rather than portraying IAB as a singular threat, our goal is to examine how its technical trajectory alters the biosecurity framework. The integration of predictive modeling, active learning, and automated experimentation yields three interlocking effects. First, the exploration of protein fitness landscapes is dramatically accelerated: active learning allows the efficient identification of functional mutations from minimal data (<xref ref-type="bibr" rid="B30">Jiang et al., 2024</xref>; <xref ref-type="bibr" rid="B26">Huot et al., 2025b</xref>; <xref ref-type="bibr" rid="B72">Yang et al., 2025</xref>). Second, laboratory throughput expands by orders of magnitude, with automated platforms capable of testing thousands of variants per day (<xref ref-type="bibr" rid="B76">Zhang et al., 2025</xref>; <xref ref-type="bibr" rid="B74">Yu et al., 2023</xref>). Finally, these tools collectively lower the expertise required to perform sophisticated protein engineering, widening access to capabilities once restricted to specialized laboratories.</p>
<p>In this Perspective, we map the current progress in fitness optimization with pLMs and critically assess how these approaches are being applied to viral evolution and integrated with automated laboratory workflows. We then propose a capability-oriented framework for evaluating integrated AI&#x02013;biology systems and outline emerging directions for pLM-specific safeguards.</p></sec>
<sec id="s2">
<label>2</label>
<title>Protein language models: a leap in prediction capability</title>
<sec>
<label>2.1</label>
<title>Backgrounds</title>
<p>Inspired by advances in natural language processing, pLMs are trained on large databases of unaligned natural protein sequences using self-supervised objectives. In the <italic>autoregressive</italic> setting, a pLM is trained to predict the next amino acid in a sequence, modeling the joint probability of a protein sequence <italic>x</italic> &#x0003D; (<italic>x</italic><sub>1</sub>, <italic>x</italic><sub>2</sub>, &#x02026;, <italic>x</italic><sub><italic>L</italic></sub>) as:</p>
<disp-formula id="EQ1"><mml:math id="M1"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>P</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x0220F;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>L</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:mi>P</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02223;</mml:mo><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x02026;</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(1)</label></disp-formula>
<p>capturing sequential and context-dependent dependencies across residues. This setup is particularly suited for sequence generation and allows scoring of full sequences or specific mutations via log-likelihood comparisons.</p>
<p>A key advantage of pLMs is that they operate directly on raw sequence data, eliminating the need for the time-consuming and often difficult step of creating multiple sequence alignments required by earlier methods. This makes pLMs more flexible and substantially faster to deploy, especially for novel proteins or viruses where alignment data is limited.</p>
<p>Furthermore, because pLMs are trained on such vast and diverse datasets, they learn highly general principles of protein biology. This allows a single, large pre-trained pLM&#x02014;such as those in the widely used ESM family (<xref ref-type="bibr" rid="B41">Meier et al., 2021</xref>; <xref ref-type="bibr" rid="B34">Lin et al., 2022</xref>)&#x02014;to make meaningful predictions about virtually any protein sequence, even those belonging to protein families not seen during training. This capability is known as &#x0201C;zero-shot&#x0201D; prediction. Recent advances have further improved the performance of pLMs by explicitly incorporating structural information into the model (<xref ref-type="fig" rid="F1">Figure 1</xref>). For example, ESM-3 (<xref ref-type="bibr" rid="B20">Hayes et al., 2024</xref>) unifies sequence and structure modeling by co-training across multiple modalities, including 3D coordinates, sequence likelihood, and masked token recovery. This joint training enables improved accuracy in predicting mutational effects and sequence plausibility within structural constraints. Additionally, some inverse folding models, like ESM-IF (<xref ref-type="bibr" rid="B24">Hsu et al., 2022</xref>), and ProteinMPNN (<xref ref-type="bibr" rid="B10">Dauparas et al., 2022</xref>) are structure-conditioned; they can predict sequences likely to fold into a specific 3D shape, or assess how well a mutation fits within a known structure. Beyond pLMs, other architectures offer similar capabilities. FAMPNN (<xref ref-type="bibr" rid="B57">Shuai et al., 2025</xref>) extends ProteinMPNN (<xref ref-type="bibr" rid="B10">Dauparas et al., 2022</xref>) by jointly modeling sequence identity and sidechain structure through combined masked language modeling and coordinate denoising. A distinct class of models focuses on <italic>de novo</italic> backbone generation: RFdiffusion (<xref ref-type="bibr" rid="B69">Watson et al., 2023</xref>) employs diffusion processes to construct novel protein structures from noise, enabling both unconditional topology design and conditional generation with explicit constraints (e.g., scaffolding functional motifs or designing target binders). While these structure-generation models differ architecturally from pLMs, they demonstrate comparable capabilities in rational protein design.</p>
<fig position="float" id="F1">
<label>Figure 1</label>
<caption><p>Model performance improves over time. Spearman correlation coefficients between predicted mutational effects and experimental ground truth on ProteinGym, colored by input type: MSA-based (green) (<xref ref-type="bibr" rid="B50">Riesselman et al., 2018</xref>; <xref ref-type="bibr" rid="B16">Frazer et al., 2021</xref>), sequence-based (blue) (<xref ref-type="bibr" rid="B41">Meier et al., 2021</xref>; <xref ref-type="bibr" rid="B34">Lin et al., 2022</xref>), or structure &#x0002B; sequence based (red) (<xref ref-type="bibr" rid="B33">Li et al., 2024</xref>; <xref ref-type="bibr" rid="B24">Hsu et al., 2022</xref>; <xref ref-type="bibr" rid="B20">Hayes et al., 2024</xref>).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmicb-16-1734561-g0001.tif">
<alt-text content-type="machine-generated">Graph showing model performance versus year of release for different model types. MSA models in green, Sequence-only models in blue, Sequence + Structure models in red. Data points from 2018 to 2025 with an upward arrow indicating increasing performance over time. Models include DeepSequence, EVE, ESM variants, and others.</alt-text>
</graphic>
</fig></sec>
<sec>
<label>2.2</label>
<title>Models for viral protein properties prediction</title>
<p>(<xref ref-type="bibr" rid="B21">Hie et al. 2021</xref>) demonstrated that pLMs, when trained solely on viral sequence data without fine-tuning or structural supervision, can capture both the functional and antigenic consequences of mutations. They trained separate BiLSTM language models on corpora of aligned sequences for influenza HA, HIV Env, and SARS-CoV-2 Spike, and introduced the Constrained Semantic Change Search (CSCS) framework. In this framework, grammaticality (i.e., the model-assigned likelihood of a sequence) was hypothesized to reflect viral fitness, while semantic change (i.e., the shift in embedding space) served as a proxy for immune escape. Despite being trained only on viral sequences and without escape labels, the models successfully predicted known escape mutations in a zero-shot setting, highlighting the capacity of language models to learn biologically meaningful patterns directly from sequence data.</p>
<p>Building on this, (<xref ref-type="bibr" rid="B1">Allman et al. 2024</xref>) systematically benchmarked grammaticality and semantic change across multiple viral proteins using both the original LSTM-based model and newer pretrained pLMs like ESM-2. Their analysis confirmed that grammaticality scores are consistently higher for viable mutations and can serve as a practical proxy for fitness. This finding held across viral systems, including HIV and influenza. In parallel, (<xref ref-type="bibr" rid="B66">Wang et al. 2024</xref>) used ESM embeddings to predict the fitness of SARS-CoV-2 RBD variants by integrating them into a biophysical model. More broadly, other pLMs and AI models have been developed to predict key viral properties such as binding affinity (<xref ref-type="bibr" rid="B67">Wang et al., 2023</xref>; <xref ref-type="bibr" rid="B37">Loux et al., 2024</xref>; <xref ref-type="bibr" rid="B60">Taft et al., 2022</xref>; <xref ref-type="bibr" rid="B5">Basse et al., 2025</xref>; <xref ref-type="bibr" rid="B35">Liu et al., 2023</xref>), mutation spread (<xref ref-type="bibr" rid="B39">Maher et al., 2022</xref>), and fitness (Ito et al., <xref ref-type="bibr" rid="B28">2024</xref>; <xref ref-type="bibr" rid="B77">Zhang et al., 2024</xref>).</p>
<p>Collectively, these results underscore a crucial point: powerful pLMs, including those trained broadly rather than exclusively on viral data, encode meaningful information about viral protein function and evolution. This enables them to anticipate evolutionary trajectories and assess mutational effects in emerging pathogens, often with remarkable accuracy directly from sequence data.</p>
<p>Importantly, while these models were developed to support beneficial applications like vaccine design or pandemic forecasting, their predictive capabilities could also be misused. Moreover, because many pLMs are open-weight and require minimal fine-tuning, such capabilities may be accessible even without deep virological expertise. <bold>Notably, these tools have been used to design novel SARS-CoV-2 proteins that were experimentally shown to be infectious and capable of evading neutralization</bold> (<xref ref-type="bibr" rid="B73">Youssef et al., 2025</xref>; <xref ref-type="bibr" rid="B25">Huot et al., 2025a</xref>).</p></sec></sec>
<sec id="s3">
<label>3</label>
<title>The accelerator effect: integrating AI with lab experiments</title>
<p>pLMs are not just predictive tools; they are increasingly integrated into active protein engineering workflows, dramatically accelerating the pace and changing the nature of biological design. This integration manifests in several key applications.</p>
<sec>
<label>3.1</label>
<title>Smarter directed evolution</title>
<p>Directed evolution is a laboratory technique that mimics natural selection to improve proteins for specific purposes, such as improving the efficiency of enzymes, increasing binding affinity of therapeutic antibodies (<xref ref-type="bibr" rid="B30">Jiang et al., 2024</xref>). Traditionally, this involves creating large libraries of protein variants and screening them for desired properties, often a laborious, inefficient, and expensive process. <bold>pLMs enables the direct evolution of novel proteins with substantially improved functional properties</bold>. By predicting the likely effects of mutations by either zero shot or few shot, pLMs can guide researchers to focus on variants with a higher probability of success, effectively narrowing down the search space and reducing the experimental burden. Recent studies have demonstrated that general and structure-informed pLMs can substantially improve the binding affinity and neutralization breadth of human antibodies against diverse viral targets, including SARS-CoV-2, Ebola, and influenza, while requiring only minimal rounds of experimental screening (<xref ref-type="bibr" rid="B22">Hie et al., 2024</xref>; <xref ref-type="bibr" rid="B56">Shanker et al., 2024</xref>; <xref ref-type="bibr" rid="B55">Shan et al., 2022</xref>).</p></sec>
<sec>
<label>3.2</label>
<title>Laboratory automation and closed-loop experimentation</title>
<p>The impact of pLMs is amplified when combined with laboratory automation, often referred to as &#x0201C;biofoundries&#x0201D; (<xref ref-type="bibr" rid="B23">Hillson et al., 2019</xref>; <xref ref-type="bibr" rid="B62">Torres-Acosta et al., 2022</xref>). This integration enables fully automated cycles of biological design, construction, testing, and learning (<xref ref-type="fig" rid="F2">Figure 2</xref>), commonly known as the Design&#x02013;Build&#x02013;Test&#x02013;Learn (DBTL) cycle. The DBTL cycle includes: (1) Design: AI/pLMs propose sequences with predicted properties; (2) Build: Robotic systems synthesize DNA and produce variants; (3) Test: Automated assays measure properties; (4) Learn: Results feed back to AI for improved designs in subsequent cycles.</p>
<fig position="float" id="F2">
<label>Figure 2</label>
<caption><p>Schematic of the DBTL cycle in AI-enabled bioengineering. pLMs propose novel sequences (Design), which are synthesized and expressed by robotic platforms (Build), evaluated through high-throughput assays (Test), and iteratively improved based on experimental feedback (Learn).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmicb-16-1734561-g0002.tif">
<alt-text content-type="machine-generated">Flowchart illustrating the DBTL cycle: Design featuring DNA sequences and a protein structure; Build showing lab equipment ; Test showing a multi-well plate; Learn represented by a transformer icon. Arrows connect the stages in a cycle.</alt-text>
</graphic>
</fig>
<p>Platforms like PLMeAE (<xref ref-type="bibr" rid="B76">Zhang et al., 2025</xref>) demonstrate the power of this approach, achieving multiple rounds of enzyme optimization in just 10 days&#x02014;a task that could take many months using traditional methods (<xref ref-type="bibr" rid="B76">Zhang et al., 2025</xref>). This creates a powerful, high-speed, closed loop for biological engineering. While offering tremendous potential for accelerating therapeutic development, this automation also raises concerns. The speed and reduced human intervention inherent in these closed loops could potentially allow for the rapid optimization of harmful properties if misused, with fewer opportunities for oversight or ethical review during the process.</p></sec>
<sec>
<label>3.3</label>
<title>Efficient exploration with active learning</title>
<p>The sheer number of possible mutations, even within a single protein, makes exhaustive experimental or computational testing infeasible. Active learning offers a solution by integrating model predictions with experimental design (<xref ref-type="bibr" rid="B72">Yang et al., 2025</xref>; <xref ref-type="bibr" rid="B4">Balashova et al., 2025</xref>). Instead of testing randomly or relying solely on initial predictions, active learning uses the predictive models to select the most informative experiments to perform at each stage, based on certain acquisition function (<xref ref-type="bibr" rid="B40">Margatina et al., 2021</xref>).</p>
<p>The typical process starts with wet-lab testing a small, initial set of variants. The results are used to train or fine-tune a predictive model (like a pLM) (<xref ref-type="bibr" rid="B52">Schmirler et al., 2023</xref>). The model then evaluates the vast pool of untested variants and identifies those whose experimental evaluation would maximally improve the model&#x00027;s accuracy or are most likely to possess the desired properties (e.g., high fitness, activity, or strong binding). These selected variants are then synthesized and tested, and the new data is used to update the model, repeating the cycle. This iterative strategy dramatically reduces the number of experiments required to explore the mutational landscape and identify top-performing or high-risk variants. Active learning has already shown success in domains like drug discovery (<xref ref-type="bibr" rid="B15">Fralish and Reker, 2024</xref>; <xref ref-type="bibr" rid="B18">Graff et al., 2021</xref>; <xref ref-type="bibr" rid="B68">Warmuth et al., 2001</xref>; <xref ref-type="bibr" rid="B3">Bailey et al., 2023</xref>).</p>
<p>Recent studies have shown that active learning frameworks can optimize enzymes, antibodies, or other protein variants, antibody or protein variants substantially faster than random screening, using only a small fraction of what traditional method required (<xref ref-type="bibr" rid="B30">Jiang et al., 2024</xref>; <xref ref-type="bibr" rid="B72">Yang et al., 2025</xref>). This efficiency can also enable researchers to proactively identify concerning viral mutations before they potentially emerge naturally (<xref ref-type="bibr" rid="B26">Huot et al., 2025b</xref>).</p>
<p><bold>The synergy between pLMs (for design and prediction), active learning (for efficient experimental guidance), and laboratory automation (for rapid build and test cycles) creates an engineering capability greater than the sum of its parts</bold>. This integrated approach enables systematic biological exploration and optimization at an unprecedented speed and scale. While this accelerates beneficial research, it simultaneously increases the risk of malicious biological engineering and potentially reduces human oversight within automated loops.</p></sec></sec>
<sec id="s4">
<label>4</label>
<title>The dual-use dilemma: assessing risks of IAB</title>
<p>The core challenge presented by the convergence of AI and biotechnology lies in its inherent dual-use nature: technologies developed with beneficial intent, such as improving human health or combating pandemics, can often be repurposed to cause harm. pLM substantially amplifies this dilemma by accelerating design cycles, lowering knowledge barriers, and enabling automation at unprecedented scales. To effectively discuss and manage these risks, it is helpful to categorize the capabilities enabled by IAB and assess their associated risk levels.</p>
<p>We propose a framework categorizing IAB capabilities into five levels, reflecting escalating potential for misuse as pLM integration deepens (<xref ref-type="table" rid="T1">Table 1</xref>). This framework builds upon initial concepts and incorporates insights from recent literature on AI capabilities and biosecurity risks.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>IAB capability levels and associated biosecurity risk.</p></caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th valign="top" align="left"><bold>Capability level</bold></th>
<th valign="top" align="left"><bold>Description</bold></th>
<th valign="top" align="left"><bold>Examples</bold></th>
<th valign="top" align="left"><bold>Base risk level</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Level 1: Zero-shot prediction</td>
<td valign="top" align="left">basic pLM predictions (e.g., sequence likelihood as fitness proxy).</td>
<td valign="top" align="left">ESM-1v zero-shot prediction with grammaticality score (<xref ref-type="bibr" rid="B41">Meier et al., 2021</xref>; <xref ref-type="bibr" rid="B1">Allman et al., 2024</xref>).</td>
<td valign="top" align="left">Low&#x02013;Moderate</td>
</tr>
<tr>
<td valign="top" align="left">Level 2: Advanced prediction &#x00026; analysis</td>
<td valign="top" align="left">Accurate ML/pLM prediction of complex molecular properties (e.g., binding affinity changes (&#x00394;<italic>&#x00394;G</italic>), immune escape potential, stability).</td>
<td valign="top" align="left">Fine-tuned ESM3 to predict viral fitness; UniBind (<xref ref-type="bibr" rid="B67">Wang et al., 2023</xref>) predicting binding affinity; EVEscape (<xref ref-type="bibr" rid="B61">Thadani et al., 2023</xref>) and VIRAL (<xref ref-type="bibr" rid="B26">Huot et al., 2025b</xref>) predicting escape variants; MMSite for active site prediction (<xref ref-type="bibr" rid="B47">Ouyang et al., 2024</xref>)</td>
<td valign="top" align="left">Moderate</td>
</tr>
<tr>
<td valign="top" align="left">Level 3: Targeted sequence generation</td>
<td valign="top" align="left">Generative AI/pLMs designing novel sequences optimized for specific functional properties (e.g., enhanced binding, stability, potentially virulence factors or toxins).</td>
<td valign="top" align="left">ProteinMPNN (<xref ref-type="bibr" rid="B10">Dauparas et al., 2022</xref>) or ESM-IF1 (<xref ref-type="bibr" rid="B24">Hsu et al., 2022</xref>) for generative enzyme/antibody design; Potential toxin/pathogen design.</td>
<td valign="top" align="left">High</td>
</tr>
<tr>
<td valign="top" align="left">Level 4: Integrated design &#x00026; active learning</td>
<td valign="top" align="left">Combining generative models with active learning/Bayesian optimization for efficient discovery and optimization of desired (potentially harmful) biological functions.</td>
<td valign="top" align="left">ProteinNPT (<xref ref-type="bibr" rid="B45">Notin et al., 2023</xref>) for Active learning frameworks; EVOLVEpro (<xref ref-type="bibr" rid="B30">Jiang et al., 2024</xref>) and ALDE (<xref ref-type="bibr" rid="B72">Yang et al., 2025</xref>) for direct evolution;</td>
<td valign="top" align="left">Very High</td>
</tr>
<tr>
<td valign="top" align="left">Level 5: Full AI-Bio automation integration</td>
<td valign="top" align="left">Closed-loop systems linking AI protein design, learning, synthesis, and testing (DBTL cycle) with minimal human oversight</td>
<td valign="top" align="left">PLMeAE (<xref ref-type="bibr" rid="B76">Zhang et al., 2025</xref>) or iBioFAB (<xref ref-type="bibr" rid="B74">Yu et al., 2023</xref>) where pLMs are embedded in automated biofoundries</td>
<td valign="top" align="left">Extremely High</td>
</tr></tbody>
</table>
</table-wrap>
<p>This table illustrates that while Level 2 already poses a threat by enabling designing viral proteins that escape antibody binding, the most substantial risks emerge from the DBTL cycle coupled with physical automation. Level 5 enabling rapid, automated, and potentially remote execution of complex bioengineering tasks,<xref ref-type="fn" rid="fn0001"><sup>1</sup></xref> maximizing both the potential for benefit and the potential for misuse. For each level we classified, concrete examples are provided&#x02014;and concerningly, full AI-biology automation integration at Level 5 has already been observed in 2025. A key rationale for this tiered framework is that it enables the development of proportionate safeguards tailored to each IAB capability level.The specific design and implementation of such tiered safeguards, while outside the scope of this work, represents a critical direction for future policy development and technical research for IAB.</p>
<p>To better quantify the acceleration enabled by this integration, we estimated the speed to obtain a functional variant (&#x0201C;hit&#x0201D;) using wet-lab hit rates on an 85-amino-acid peptide (<xref ref-type="bibr" rid="B29">Jawaid et al., 2023</xref>). Hit rate is defined as the fraction of tested sequences that exhibit the desired function. Combining these hit rates with representative experimental throughput values, we find that AI-guided, automated pipelines (Level 5) can yield thousands of hits per day&#x02014;several orders of magnitude more than traditional, manual, non-AI-guided approaches (<xref ref-type="fig" rid="F3">Figure 3</xref>). This illustrates how full-stack automation not only increases capability but compresses timelines, potentially outpacing the safety checks traditionally used to govern wet-lab experimentation.</p>
<fig position="float" id="F3">
<label>Figure 3</label>
<caption><p>Functional protein &#x0201C;hits&#x0201D; per day from AI vs non-AI methods under low- and high-throughput settings. Based on throughput x hit rate estimated from study on a 85 amino acid protein (<xref ref-type="bibr" rid="B29">Jawaid et al., 2023</xref>).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmicb-16-1734561-g0003.tif">
<alt-text content-type="machine-generated">Bar chart comparing IAB capability levels in hits per day, with logarithmic scale. 0.01 for Non-AI, Low Throughput. 0.20 for AI-Enhanced, Low Throughput. 50 for Non-AI but with automation. Approximately 1,000 for AI-Enhanced with lab automation.</alt-text>
</graphic>
</fig>
<p>A critical factor contributing to this assessment difficulty is the &#x0201C;evaluation bottleneck&#x0201D; (<xref ref-type="bibr" rid="B48">Pannu et al., 2025</xref>). AI-Bio models at Capability Level 3 and above can generate novel protein sequences, but accurately predicting their real-world function&#x02014;especially their potential harmfulness&#x02014;remains an open challenge. Definitive functional validation often requires synthesizing the DNA and expressing the protein in a wet lab.</p>
<p>However, if the AI-designed entity possesses hazardous properties, this evaluation step becomes inherently dangerous. This stands in contrast to evaluating text generated by large language models (LLMs) in the medical or virology domain, where outputs remain directly interpretable by humans and standardized benchmarks exist to assess risks (<xref ref-type="bibr" rid="B9">Chen et al., 2025</xref>; Gotting et al., <xref ref-type="bibr" rid="B17">2025</xref>). The inability to safely and reliably assess the biological function of IAB outputs poses a fundamental obstacle to timely risk detection and mitigation. Without robust, trustworthy pLM risk evaluation tools and benchmarks, we risk not knowing the true danger posed by a new IAB or a specific protein design until it has been physically instantiated&#x02014;potentially too late to prevent harm.</p></sec>
<sec id="s5">
<label>5</label>
<title>Open challenges: safeguarding pLMs</title>
<p>On the biosecurity side, traditional regulatory measures are globally insufficient for addressing AI-specific risks. Established frameworks, such as the U.S. Policy on Dual Use Research of Concern (DURC) (U.S. Department of Health and Human Services, Administration for Strategic Preparedness and Response, ASPR), rely on static lists of specific agents and experimental manipulations that fail to capture the versatile nature of tools like pLMs. As such, it does not account for the dual-use potential of IAB (<xref ref-type="bibr" rid="B64">Undheim, 2024</xref>). This disconnect extends to other major players: while China&#x00027;s 2020 Biosecurity Law elevates the issue to a national security priority, it remains heavily focused on physical containment rather than algorithmic risks (<xref ref-type="bibr" rid="B42">Min et al., 2025</xref>). Meanwhile, in Latin America, governance is hindered by limited institutional awareness and a lack of policy harmonization (<xref ref-type="bibr" rid="B14">Flores-Coronado et al., 2025</xref>).</p>
<p>One of the most widely used approaches in biosecurity&#x02014;<bold>DNA synthesis screening</bold> (<xref ref-type="bibr" rid="B54">SecureDNA, 2025</xref>; <xref ref-type="bibr" rid="B6">Baum et al., 2024</xref>) aims to prevent the acquisition of matches to regulated pathogens or known hazardous sequences (<xref ref-type="bibr" rid="B11">DiEuliis et al., 2017</xref>). Yet red-teaming has exposed severe vulnerabilities: an MIT experiment demonstrated how order splitting and camouflaging allowed synthetic fragments capable of reconstructing the 1918 influenza virus to be purchased from many providers. In that test, 93% of U.S. vendors and 100% of international vendors delivered the disputed fragments. Moreover, a separate adversarial exercise by Microsoft scientists underscored the same risk of evasion (<xref ref-type="bibr" rid="B70">Wittmann et al., 2025</xref>). Also, generative models can design entirely novel protein sequences (<xref ref-type="bibr" rid="B10">Dauparas et al., 2022</xref>) or potentially generate sequences designed to evade detection (<xref ref-type="bibr" rid="B38">Lu et al., 2023</xref>).</p>
<p><bold>On the training methodology side, no established safeguard frameworks exist for pLMs</bold>. To address this gap, we explore early-stage technical approaches&#x02014;adapted from the LLM safety literature&#x02014;that may help reduce the risk of generating dangerous biological sequences. Broadly, these approaches can be categorized into <bold>training-time guardrails</bold>, which modify the model&#x00027;s learning process to discourage the generation of harmful content; and <bold>inference-time guardrails</bold>, which filter or steer model outputs at the point of generation. One fundamental training-time strategy is <italic>likelihood suppression</italic>, which aims to discourage the model from assigning high probability to harmful sequences (<xref ref-type="fig" rid="F4">Figure 4</xref>). This can be formalized by modifying the training objective to penalize the likelihood of pathogenic sequences:</p>
<disp-formula id="EQ2"><mml:math id="M2"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mrow><mml:mi mathvariant="script">L</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mrow><mml:mi mathvariant="script">L</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">original</mml:mtext></mml:mstyle></mml:mrow></mml:msub><mml:mo>-</mml:mo><mml:mtext>&#x003BB;</mml:mtext><mml:mo class="qopname">log</mml:mo><mml:mi>P</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">pathogenic</mml:mtext></mml:mstyle></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(2)</label></disp-formula>
<fig position="float" id="F4">
<label>Figure 4</label>
<caption><p>Illustration of examples of training-time and inference-time guardrails for pLMs. Likelihood suppression during training time (<xref ref-type="bibr" rid="B32">Ko et al., 2024</xref>) assigns low probability to pathogenic sequences, while an embedding-space rejection radius (<xref ref-type="bibr" rid="B49">Rebedea et al., 2023</xref>) blocks generation of sequences too close to known harmful proteins in inference time.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmicb-16-1734561-g0004.tif">
<alt-text content-type="machine-generated">Graph on the left shows likelihood suppression with lower likelihood for pathogenic sequences. The right diagram depicts embedding rejection radius with a dotted circle enclosing points representing known viral embeddings.</alt-text>
</graphic>
</fig>
<p>where <inline-formula><mml:math id="M3"><mml:mrow><mml:mi mathvariant="script">L</mml:mi></mml:mrow></mml:math></inline-formula> represents the likelihood of any sequence and &#x003BB; controls the strength of the suppression (<xref ref-type="bibr" rid="B32">Ko et al., 2024</xref>). However, this approach is not without drawbacks, as safeguards that alter the loss function can be difficult to adopt due to their negative effects on beneficial model uses (<xref ref-type="bibr" rid="B12">Dong et al., 2025</xref>). A more adaptive approach to implementing such training-time penalization, or more broadly steering the model toward safer outputs during training, is Reinforcement Learning from Human Feedback (RLHF) (<xref ref-type="bibr" rid="B46">Ouyang et al., 2022</xref>; <xref ref-type="bibr" rid="B2">Bai et al., 2022</xref>). While no end-to-end implementation of RLHF for pLM safety has been empirically demonstrated, we sketch a conceptual mapping here as a foundation for crucial future research and development in this area. In this context, the pLM acts as a policy generating sequences, while a separate reward model (RM)&#x02014;potentially trained on datasets of viral protein sequences, structures, and functions&#x02014;evaluates their potential harmfulness. The pLM can then be fine-tuned using RL algorithms like Proximal Policy Optimization (PPO) (<xref ref-type="bibr" rid="B53">Schulman et al., 2017</xref>) to minimize the generation of dangerous sequences. This approach represents an advanced method for instilling safety considerations during the model training phase. Recent work has demonstrated the feasibility of using RL techniques on pLMs for preference optimizations and fine-tuning (<xref ref-type="bibr" rid="B31">Karimi et al., 2024</xref>; <xref ref-type="bibr" rid="B59">Stocco et al., 2024</xref>; <xref ref-type="bibr" rid="B43">Mistani and Mysore, 2024</xref>; <xref ref-type="bibr" rid="B36">Liu et al., 2025</xref>; <xref ref-type="bibr" rid="B7">Blalock et al., 2025</xref>), suggesting these methods could be adapted for safety purposes. Developing RM for pLM safety could face difficulties, including precisely defining the harmfulness score and obtaining sufficient labeled protein data for it. RLHF for pLMs can inherit issues from LLMs such as reward hacking.</p>
<p>Alternatively, safeguards can be implemented as inference-time guardrails, a strategy most effective for proprietary commercial models where providers embed protections at the API level. In contrast, these external guardrails are less robust for open-source models, as attackers can bypass them by modifying the code directly (<xref ref-type="bibr" rid="B12">Dong et al., 2025</xref>).</p>
<p>These methods typically do not alter the underlying model weights but instead apply checks, filters, or steering mechanisms during or after the generation process. This can involve pre-generation constraint conditioning, where generation is guided away from risky regions of the sequence space using techniques like control tokens or latent variable manipulation. A specific example of an inference-time filter is the embedding-space rejection radius (<xref ref-type="bibr" rid="B49">Rebedea et al., 2023</xref>) (<xref ref-type="fig" rid="F4">Figure 4</xref>). This method blocks the output of generated sequences whose embeddings are found to be too close to those of known harmful proteins. During inference, a generated sequence&#x00027;s embedding would be compared against a curated database of harmful protein embeddings (e.g., using cosine similarity or Euclidean distance). If a sequence falls within a predefined rejection radius of a known harmful protein, its output is blocked or flagged.</p>
<p>Developing robust and generalizable safeguards, however, will also require standardized benchmarks to evaluate model capabilities in high-risk domains such as viral fitness prediction. To support this, we propose a zero-shot benchmark example (<xref ref-type="table" rid="T2">Table 2</xref>) built from publicly available viral mutational scanning datasets, which quantify fitness across thousands of viral protein variants. These could enable assessments of whether a pLM can predict viral properties, offering an empirical basis to evaluate dual-use risk, particularly for open-weight models. We acknowledge that the development of such benchmarks might be prone to being misused for designing new viruses; therefore, efforts are needed to widen the evaluation-genration gap&#x02013;that is, making it harder to generate harmful viruses but easier to detect them. Furthermore, future work should expand on this foundation to develop a more comprehensive dataset.</p>
<table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p>Example of a viral fitness dataset for benchmarking pLM viral capabilities.</p></caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th valign="top" align="left"><bold>Virus</bold></th>
<th valign="top" align="left"><bold>Protein</bold></th>
<th valign="top" align="left"><bold>Fitness proxy</bold></th>
<th valign="top" align="left"><bold>&#x00023; Variants</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left" rowspan="2">SARS-CoV-2</td>
<td valign="top" align="left" rowspan="2">Spike RBD</td>
<td valign="top" align="left">Expression score via yeast display (<xref ref-type="bibr" rid="B58">Starr et al., 2020</xref>)</td>
<td valign="top" align="left">&#x0007E;3,800</td>
</tr>
 <tr>
<td/>
<td/>
<td valign="top" align="left">Binding affinity to ACE2 (<xref ref-type="bibr" rid="B44">Moulana et al., 2022</xref>)</td>
<td valign="top" align="left">&#x0007E;33,000</td>
</tr>
<tr>
<td valign="top" align="left">Influenza A</td>
<td valign="top" align="left">Hemagglutinin (HA)</td>
<td valign="top" align="left">Replication efficiency (<xref ref-type="bibr" rid="B13">Doud and Bloom, 2016</xref>)</td>
<td valign="top" align="left">&#x0007E;10,000</td>
</tr>
<tr>
<td valign="top" align="left">HIV-1</td>
<td valign="top" align="left">Envelope glycoprotein (Env)</td>
<td valign="top" align="left">Replication efficiency (<xref ref-type="bibr" rid="B19">Haddox et al., 2018</xref>)</td>
<td valign="top" align="left">&#x0007E;13,000</td>
</tr></tbody>
</table>
</table-wrap>
<p>While our discussion centers on built-in pLM safeguards, we recognize that comprehensive IAB risk mitigation requires a multi-layered defense strategy. We focus primarily on pLM-specific safeguards for several key reasons. First, pLMs represent a critical and currently under-defended chokepoint in the IAB pipeline&#x02014;while other layers like DNA synthesis screening and laboratory oversight have established (though imperfect) safeguards. Second, pLM safeguards offer broad coverage across multiple downstream applications, potentially preventing harmful sequences from being generated regardless of the specific experimental platform used. However, effective IAB governance requires safeguards across all system components from laboratory-level to model-level.</p></sec>
<sec id="s6">
<label>6</label>
<title>Conclusion: from capability to responsibility</title>
<p>Integrating AI, particularly pLMs, with automated experimental biology platforms marks a significant technological leap. The specific biosecurity implications depend critically on the application of the IAB system. For instance, capabilities advanced for exploring small biomolecules or designing novel therapeutic antibodies pose a vastly different and lower relative risk than systems applied to exploring the immune escape of pandemic pathogen variants. The most acute risks emerge when IAB systems are used to rapidly explore complex biological landscapes to optimize high-risk functions, such as viral fitness or immune evasion, in pathogens of concern. Specifically, malicious actors could generate and release multiple variants that escape antibody-based population immunity. Even with intact T-cell immunity, the serial release of such functional, immune-evasive variants could drive repeated global waves of infection and substantial mortality.</p>
<p>Existing AI and biosecurity frameworks fall short of managing these IAB-specific risks. The path forward lies in developing pLM safeguards that can differentiate between these applications&#x02014;enabling continued innovation in low-risk domains while implementing stringent controls for high-risk uses. Training-time safeguards like likelihood suppression can be calibrated to specifically penalize pathogenic sequences while preserving performance on therapeutic targets. Similarly, inference-time guardrails can implement application-specific screening, blocking outputs in high-risk domains while permitting beneficial uses. Additionally, safety frameworks and safeguards for pLMs should be easy to update, so newly identified weapons-relevant capabilities can be quickly restricted, especially as AI-Bio tools becoming more powerful.</p>
<p>Rather than constraining all IAB development, the goal should be advancing capability selectively while reducing risks differentially. Safeguards could advance on two fronts: (i) the safety of the model itself, by integrating technical controls at the training-time and at inference-time guardrails discussed in Section 5, and (ii) the DNA-synthesis infrastructure where screening must move beyond today&#x00027;s homology-match filters, and be expanded to function-aware or structure-aware methods that use pLMs themselves as screening tools. Unlike LLMs, pLM outputs can be synthesized into real biological threats. Risk must therefore be assessed across the entire pipeline, from design to synthesis, with enforceable safeguards.</p></sec>
</body>
<back>
<sec sec-type="data-availability" id="s7">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material, further inquiries can be directed to the corresponding authors.</p>
</sec>
<sec sec-type="author-contributions" id="s8">
<title>Author contributions</title>
<p>DW: Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing. MH: Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing. ZZ: Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing. KJ: Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing. ES: Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing. KE: Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing.</p>
</sec>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="s10">
<title>Generative AI statement</title>
<p>The author(s) declare that no Gen AI was used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p></sec>
<sec sec-type="disclaimer" id="s11">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Allman</surname> <given-names>B.</given-names></name> <name><surname>Vieira</surname> <given-names>L.</given-names></name> <name><surname>Diaz</surname> <given-names>D. J.</given-names></name> <name><surname>Wilke</surname> <given-names>C. O.</given-names></name></person-group> (<year>2024</year>). <article-title>A systematic evaluation of the language-of-viral-escape model using multiple machine learning frameworks</article-title>. <source>bioRxiv</source>, 2024&#x02013;09. doi: <pub-id pub-id-type="doi">10.1101/2024.09.04.611278</pub-id><pub-id pub-id-type="pmid">40300635</pub-id></mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bai</surname> <given-names>Y.</given-names></name> <name><surname>Jones</surname> <given-names>A.</given-names></name> <name><surname>Ndousse</surname> <given-names>K.</given-names></name> <name><surname>Askell</surname> <given-names>A.</given-names></name> <name><surname>Chen</surname> <given-names>A.</given-names></name> <name><surname>DasSarma</surname> <given-names>N.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Training a helpful and harmless assistant with reinforcement learning from human feedback</article-title>. <source>arXiv preprint arXiv:2204.05862</source>.</mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bailey</surname> <given-names>M.</given-names></name> <name><surname>Moayedpour</surname> <given-names>S.</given-names></name> <name><surname>Li</surname> <given-names>R.</given-names></name> <name><surname>Corrochano-Navarro</surname> <given-names>A.</given-names></name> <name><surname>K&#x000F6;tter</surname> <given-names>A.</given-names></name> <name><surname>Kogler-Anele</surname> <given-names>L.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Deep batch active learning for drug discovery</article-title>. <source>bioRxiv</source>, <fpage>2023</fpage>&#x02013;<lpage>07</lpage>. doi: <pub-id pub-id-type="doi">10.7554/eLife.89679.1</pub-id></mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Balashova</surname> <given-names>D.</given-names></name> <name><surname>Frank</surname> <given-names>R.</given-names></name> <name><surname>Kuzyakina</surname> <given-names>S.</given-names></name> <name><surname>Weltevreden</surname> <given-names>D.</given-names></name> <name><surname>Robert</surname> <given-names>P. A.</given-names></name> <name><surname>Sandve</surname> <given-names>G. K.</given-names></name> <etal/></person-group>. (<year>2025</year>). <article-title>Active learning for improving out-of-distribution lab-in-the-loop experimental design</article-title>. <source>bioRxiv</source>, <fpage>2025</fpage>&#x02013;<lpage>02</lpage>. doi: <pub-id pub-id-type="doi">10.1101/2025.02.26.640110</pub-id></mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Basse</surname> <given-names>M.</given-names></name> <name><surname>Wang</surname> <given-names>D.</given-names></name> <name><surname>Shakhnovich</surname> <given-names>E. I.</given-names></name></person-group> (<year>2025</year>). <article-title>Spatial clustering of interface residues enhances few-shot prediction of viral protein binding</article-title>. <source>bioRxiv</source>, <fpage>2025</fpage>&#x02013;<lpage>04</lpage>. doi: <pub-id pub-id-type="doi">10.1101/2025.04.10.647895</pub-id></mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Baum</surname> <given-names>C.</given-names></name> <name><surname>Berlips</surname> <given-names>J.</given-names></name> <name><surname>Chen</surname> <given-names>W.</given-names></name> <name><surname>Cui</surname> <given-names>H.</given-names></name> <name><surname>Damgard</surname> <given-names>I.</given-names></name> <name><surname>Dong</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>A system capable of verifiably and privately screening global DNA synthesis</article-title>. <source>arXiv preprint arXiv:2403.14023</source>.</mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Blalock</surname> <given-names>N.</given-names></name> <name><surname>Seshadri</surname> <given-names>S.</given-names></name> <name><surname>Babbar</surname> <given-names>A.</given-names></name> <name><surname>Fahlberg</surname> <given-names>S. A.</given-names></name> <name><surname>Kulkarni</surname> <given-names>A.</given-names></name> <name><surname>Romero</surname> <given-names>P. A.</given-names></name></person-group> (<year>2025</year>). <article-title>Functional alignment of protein language models via reinforcement learning</article-title>. <source>bioRxiv</source>, <fpage>2025</fpage>&#x02013;<lpage>05</lpage>. doi: <pub-id pub-id-type="doi">10.1101/2025.05.02.651993</pub-id></mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>L.</given-names></name> <name><surname>Zhang</surname> <given-names>Z.</given-names></name> <name><surname>Li</surname> <given-names>Z.</given-names></name> <name><surname>Li</surname> <given-names>R.</given-names></name> <name><surname>Huo</surname> <given-names>R.</given-names></name> <name><surname>Chen</surname> <given-names>L.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Learning protein fitness landscapes with deep mutational scanning data from multiple sources</article-title>. <source>Cell Syst</source>. <volume>14</volume>, <fpage>706</fpage>&#x02013;<lpage>721</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.cels.2023.07.003</pub-id><pub-id pub-id-type="pmid">37591206</pub-id></mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>S.</given-names></name> <name><surname>Li</surname> <given-names>X.</given-names></name> <name><surname>Zhang</surname> <given-names>M.</given-names></name> <name><surname>Jiang</surname> <given-names>E. H.</given-names></name> <name><surname>Zeng</surname> <given-names>Q.</given-names></name> <name><surname>Yu</surname> <given-names>C.-H.</given-names></name></person-group> (<year>2025</year>). <article-title>Cares: Comprehensive evaluation of safety and adversarial robustness in medical LLMs</article-title>. <source>arXiv preprint arXiv:2505.11413</source>.</mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Dauparas</surname> <given-names>J.</given-names></name> <name><surname>Anishchenko</surname> <given-names>I.</given-names></name> <name><surname>Bennett</surname> <given-names>N.</given-names></name> <name><surname>Bai</surname> <given-names>H.</given-names></name> <name><surname>Ragotte</surname> <given-names>R. J.</given-names></name> <name><surname>Milles</surname> <given-names>L. F.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Robust deep learning-based protein sequence design using proteinmpnn</article-title>. <source>Science</source> <volume>378</volume>, <fpage>49</fpage>&#x02013;<lpage>56</lpage>. doi: <pub-id pub-id-type="doi">10.1126/science.add2187</pub-id><pub-id pub-id-type="pmid">36108050</pub-id></mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>DiEuliis</surname> <given-names>D.</given-names></name> <name><surname>Carter</surname> <given-names>S. R.</given-names></name> <name><surname>Gronvall</surname> <given-names>G. K.</given-names></name></person-group> (<year>2017</year>). <article-title>Options for synthetic DNA order screening, revisited</article-title>. <source>MSphere</source> <volume>2</volume>, <fpage>10</fpage>&#x02013;<lpage>1128</lpage>. doi: <pub-id pub-id-type="doi">10.1128/mSphere.00319-17</pub-id><pub-id pub-id-type="pmid">28861521</pub-id></mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Dong</surname> <given-names>Y.</given-names></name> <name><surname>Mu</surname> <given-names>R.</given-names></name> <name><surname>Zhang</surname> <given-names>Y.</given-names></name> <name><surname>Sun</surname> <given-names>S.</given-names></name> <name><surname>Zhang</surname> <given-names>T.</given-names></name> <name><surname>Wu</surname> <given-names>C.</given-names></name> <etal/></person-group>. (<year>2025</year>). <article-title>Safeguarding large language models: a survey</article-title>. <source>Artif. Intell. Rev</source>. <volume>58</volume>:<fpage>382</fpage>. doi: <pub-id pub-id-type="doi">10.1007/s10462-025-11389-2</pub-id><pub-id pub-id-type="pmid">41114380</pub-id></mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Doud</surname> <given-names>M. B.</given-names></name> <name><surname>Bloom</surname> <given-names>J. D.</given-names></name></person-group> (<year>2016</year>). <article-title>Accurate measurement of the effects of all amino-acid mutations on influenza hemagglutinin</article-title>. <source>Viruses</source> <volume>8</volume>:<fpage>155</fpage>. doi: <pub-id pub-id-type="doi">10.3390/v8060155</pub-id><pub-id pub-id-type="pmid">27271655</pub-id></mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Flores-Coronado</surname> <given-names>J. A.</given-names></name> <name><surname>Alanis-Valdez</surname> <given-names>A. Y.</given-names></name> <name><surname>Herrera-Saldivar</surname> <given-names>M. F.</given-names></name> <name><surname>Flores-Flores</surname> <given-names>A. S.</given-names></name> <name><surname>Vazquez-Guillen</surname> <given-names>J. M.</given-names></name> <name><surname>Tamez-Guerra</surname> <given-names>R. S.</given-names></name> <etal/></person-group>. (<year>2025</year>). <article-title>Awareness of the dual-use dilemma in scientific research: reflections and challenges to Latin America</article-title>. <source>Front. Bioeng. Biotechnol</source>. <volume>13</volume>:<fpage>1649781</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fbioe.2025.1649781</pub-id><pub-id pub-id-type="pmid">40791856</pub-id></mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Fralish</surname> <given-names>Z.</given-names></name> <name><surname>Reker</surname> <given-names>D.</given-names></name></person-group> (<year>2024</year>). <article-title>Taking a deep dive with active learning for drug discovery</article-title>. <source>Nat. Comput. Sci</source>. <volume>4</volume>, <fpage>727</fpage>&#x02013;<lpage>728</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s43588-024-00704-6</pub-id><pub-id pub-id-type="pmid">39443714</pub-id></mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Frazer</surname> <given-names>J.</given-names></name> <name><surname>Notin</surname> <given-names>P.</given-names></name> <name><surname>Dias</surname> <given-names>M.</given-names></name> <name><surname>Gomez</surname> <given-names>A.</given-names></name> <name><surname>Min</surname> <given-names>J. K.</given-names></name> <name><surname>Brock</surname> <given-names>K.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Disease variant prediction with deep generative models of evolutionary data</article-title>. <source>Nature</source> <volume>599</volume>, <fpage>91</fpage>&#x02013;<lpage>95</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41586-021-04043-8</pub-id><pub-id pub-id-type="pmid">34707284</pub-id></mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>G&#x000F6;tting</surname> <given-names>J.</given-names></name> <name><surname>Medeiros</surname> <given-names>P.</given-names></name> <name><surname>Sanders</surname> <given-names>J. G.</given-names></name> <name><surname>Li</surname> <given-names>N.</given-names></name> <name><surname>Phan</surname> <given-names>L.</given-names></name> <name><surname>Elabd</surname> <given-names>K.</given-names></name> <etal/></person-group>. (<year>2025</year>). <article-title>Virology capabilities test (VCT): a multimodal virology q&#x00026;a benchmark</article-title>. <source>arXiv [preprint]. arXiv: 2504.16137</source>.</mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Graff</surname> <given-names>D. E.</given-names></name> <name><surname>Shakhnovich</surname> <given-names>E. I.</given-names></name> <name><surname>Coley</surname> <given-names>C. W.</given-names></name></person-group> (<year>2021</year>). <article-title>Accelerating high-throughput virtual screening through molecular pool-based active learning</article-title>. <source>Chem. Sci</source>. <volume>12</volume>, <fpage>7866</fpage>&#x02013;<lpage>7881</lpage>. doi: <pub-id pub-id-type="doi">10.1039/D0SC06805E</pub-id><pub-id pub-id-type="pmid">34168840</pub-id></mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Haddox</surname> <given-names>H. K.</given-names></name> <name><surname>Dingens</surname> <given-names>A. S.</given-names></name> <name><surname>Hilton</surname> <given-names>S. K.</given-names></name> <name><surname>Overbaugh</surname> <given-names>J.</given-names></name> <name><surname>Bloom</surname> <given-names>J. D.</given-names></name></person-group> (<year>2018</year>). <article-title>Mapping mutational effects along the evolutionary landscape of HIV envelope</article-title>. <source>Elife</source> <volume>7</volume>:<fpage>e34420</fpage>. doi: <pub-id pub-id-type="doi">10.7554/eLife.34420</pub-id><pub-id pub-id-type="pmid">29590010</pub-id></mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hayes</surname> <given-names>T.</given-names></name> <name><surname>Rao</surname> <given-names>R.</given-names></name> <name><surname>Akin</surname> <given-names>H.</given-names></name> <name><surname>Sofroniew</surname> <given-names>N. J.</given-names></name> <name><surname>Oktay</surname> <given-names>D.</given-names></name> <name><surname>Lin</surname> <given-names>Z.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>Simulating 500 million years of evolution with a language model</article-title>. <source>bioRxiv</source>, 2024&#x02013;07. doi: <pub-id pub-id-type="doi">10.1101/2024.07.01.600583</pub-id><pub-id pub-id-type="pmid">39818825</pub-id></mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hie</surname> <given-names>B.</given-names></name> <name><surname>Zhong</surname> <given-names>E. D.</given-names></name> <name><surname>Berger</surname> <given-names>B.</given-names></name> <name><surname>Bryson</surname> <given-names>B.</given-names></name></person-group> (<year>2021</year>). <article-title>Learning the language of viral evolution and escape</article-title>. <source>Science</source> <volume>371</volume>, <fpage>284</fpage>&#x02013;<lpage>288</lpage>. doi: <pub-id pub-id-type="doi">10.1126/science.abd7331</pub-id><pub-id pub-id-type="pmid">33446556</pub-id></mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hie</surname> <given-names>B. L.</given-names></name> <name><surname>Shanker</surname> <given-names>V. R.</given-names></name> <name><surname>Xu</surname> <given-names>D.</given-names></name> <name><surname>Bruun</surname> <given-names>T. U. J.</given-names></name> <name><surname>Weidenbacher</surname> <given-names>P. A.</given-names></name> <name><surname>Tang</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>Efficient evolution of human antibodies from general protein language models</article-title>. <source>Nat. Biotechnol</source>. <volume>42</volume>, <fpage>275</fpage>&#x02013;<lpage>283</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41587-023-01763-2</pub-id><pub-id pub-id-type="pmid">37095349</pub-id></mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hillson</surname> <given-names>N.</given-names></name> <name><surname>Caddick</surname> <given-names>M.</given-names></name> <name><surname>Cai</surname> <given-names>Y.</given-names></name> <name><surname>Carrasco</surname> <given-names>J. A.</given-names></name> <name><surname>Chang</surname> <given-names>M. W.</given-names></name> <name><surname>Curach</surname> <given-names>N. C.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>Building a global alliance of biofoundries</article-title>. <source>Nat. Commun</source>. <volume>10</volume>:<fpage>2040</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41467-019-10079-2</pub-id><pub-id pub-id-type="pmid">31068573</pub-id></mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hsu</surname> <given-names>C.</given-names></name> <name><surname>Verkuil</surname> <given-names>R.</given-names></name> <name><surname>Liu</surname> <given-names>J.</given-names></name> <name><surname>Lin</surname> <given-names>Z.</given-names></name> <name><surname>Hie</surname> <given-names>B.</given-names></name> <name><surname>Sercu</surname> <given-names>T.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>&#x0201C;Learning inverse folding from millions of predicted structures,&#x0201D;</article-title> in <source>Proceedings of the 39th International Conference on Machine Learning, volume 162 of Proceedings of Machine Learning Research</source>, eds. K. Chaudhuri, S. Jegelka, L. Song, C. Szepesvari, G. Niu, and S. Sabato (PMLR), <fpage>8946</fpage>&#x02013;<lpage>8970</lpage>. doi: <pub-id pub-id-type="doi">10.1101/2022.04.10.487779</pub-id></mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Huot</surname> <given-names>M.</given-names></name> <name><surname>Rosenbaum</surname> <given-names>P.</given-names></name> <name><surname>Planchais</surname> <given-names>C.</given-names></name> <name><surname>Mouquet</surname> <given-names>H.</given-names></name> <name><surname>Monasson</surname> <given-names>R.</given-names></name> <name><surname>Cocco</surname> <given-names>S.</given-names></name></person-group> (<year>2025a</year>). <article-title>Generative model of SARS-CoV-2 variants under functional and immune pressure unveils viral escape potential and antibody resilience</article-title>. <source>bioRxiv, 2025-05</source>. doi: <pub-id pub-id-type="doi">10.1101/2025.05.12.653592</pub-id></mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Huot</surname> <given-names>M.</given-names></name> <name><surname>Wang</surname> <given-names>D.</given-names></name> <name><surname>Liu</surname> <given-names>J.</given-names></name> <name><surname>Shakhnovich</surname> <given-names>E. I.</given-names></name></person-group> (<year>2025b</year>). <article-title>Predicting high-fitness viral protein variants with Bayesian active learning and biophysics</article-title>. <source>Proc. Nat. Acad. Sci</source>. <volume>122</volume>:<fpage>e2503742122</fpage>. doi: <pub-id pub-id-type="doi">10.1073/pnas.2503742122</pub-id><pub-id pub-id-type="pmid">40489612</pub-id></mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Huot</surname> <given-names>M.</given-names></name> <name><surname>Wang</surname> <given-names>D.</given-names></name> <name><surname>Shakhnovich</surname> <given-names>E.</given-names></name> <name><surname>Monasson</surname> <given-names>R.</given-names></name> <name><surname>Cocco</surname> <given-names>S.</given-names></name></person-group> (<year>2025c</year>). <article-title>Constrained evolutionary funnels shape viral immune escape</article-title>. <source>bioRxiv, 2025-10</source>. doi: <pub-id pub-id-type="doi">10.1101/2025.10.26.684604</pub-id><pub-id pub-id-type="pmid">41278698</pub-id></mixed-citation>
</ref>
<ref id="B28">
<mixed-citation publication-type="journal"><collab>Ito J. Strange A. Liu W. Joas G. Lytras S. The Genotype to Phenotype Japan (G2P-Japan) Consortium </collab>. (<year>2024</year>). <article-title>A protein language model for exploring viral fitness landscapes</article-title>. <source>Nat. Commun</source>.<volume>16</volume>:<fpage>4236</fpage>. doi: <pub-id pub-id-type="doi">10.1101/2024.03.15.584819</pub-id></mixed-citation>
</ref>
<ref id="B29">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Jawaid</surname> <given-names>M. Z.</given-names></name> <name><surname>Yeo</surname> <given-names>R. W.</given-names></name> <name><surname>Gautam</surname> <given-names>A.</given-names></name> <name><surname>Gainous</surname> <given-names>T. B.</given-names></name> <name><surname>Hart</surname> <given-names>D. O.</given-names></name> <name><surname>Daley</surname> <given-names>T. P.</given-names></name></person-group> (<year>2023</year>). <article-title>Improving few-shot learning-based protein engineering with evolutionary sampling</article-title>. <source>arXiv preprint arXiv:2305.15441</source>. doi: <pub-id pub-id-type="doi">10.1101/2023.05.23.541997</pub-id></mixed-citation>
</ref>
<ref id="B30">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Jiang</surname> <given-names>K.</given-names></name> <name><surname>Yan</surname> <given-names>Z.</given-names></name> <name><surname>Di Bernardo</surname> <given-names>M.</given-names></name> <name><surname>Sgrizzi</surname> <given-names>S. R.</given-names></name> <name><surname>Villiger</surname> <given-names>L.</given-names></name> <name><surname>Kayabolen</surname> <given-names>A.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>Rapid in silico directed evolution by a protein language model with evolvepro</article-title>. <source>Science</source> <volume>387</volume>:<fpage>eadr6006</fpage>. doi: <pub-id pub-id-type="doi">10.1126/science.adr6006</pub-id><pub-id pub-id-type="pmid">39571002</pub-id></mixed-citation>
</ref>
<ref id="B31">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Karimi</surname> <given-names>M.</given-names></name> <name><surname>Banerjee</surname> <given-names>S.</given-names></name> <name><surname>Jaakkola</surname> <given-names>T.</given-names></name> <name><surname>Dubrov</surname> <given-names>B.</given-names></name> <name><surname>Shang</surname> <given-names>S.</given-names></name> <name><surname>Benson</surname> <given-names>R.</given-names></name></person-group> (<year>2024</year>). <article-title>&#x0201C;Extrapolative protein design through triplet-based preference learning,&#x0201D;</article-title> in <source>ICML 2024 Workshop on Foundation Models in the Wild</source>.</mixed-citation>
</ref>
<ref id="B32">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ko</surname> <given-names>C.-Y.</given-names></name> <name><surname>Chen</surname> <given-names>P.-Y.</given-names></name> <name><surname>Das</surname> <given-names>P.</given-names></name> <name><surname>Mroueh</surname> <given-names>Y.</given-names></name> <name><surname>Dan</surname> <given-names>S.</given-names></name> <name><surname>Kollias</surname> <given-names>G.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>Large language models can be strong self-detoxifiers</article-title>. <source>arXiv preprint arXiv:2410.03818</source>.</mixed-citation>
</ref>
<ref id="B33">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>M.</given-names></name> <name><surname>Tan</surname> <given-names>P.</given-names></name> <name><surname>Ma</surname> <given-names>X.</given-names></name> <name><surname>Zhong</surname> <given-names>B.</given-names></name> <name><surname>Yu</surname> <given-names>H.</given-names></name> <name><surname>Zhou</surname> <given-names>Z.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>&#x0201C;Prosst: protein language modeling with quantized structure and disentangled attention,&#x0201D;</article-title> in <source>Advances in Neural Information Processing Systems</source>, 35700&#x02013;35726. doi: <pub-id pub-id-type="doi">10.52202/079017-1126</pub-id></mixed-citation>
</ref>
<ref id="B34">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lin</surname> <given-names>Z.</given-names></name> <name><surname>Akin</surname> <given-names>H.</given-names></name> <name><surname>Rao</surname> <given-names>R.</given-names></name> <name><surname>Hie</surname> <given-names>B.</given-names></name> <name><surname>Zhu</surname> <given-names>Z.</given-names></name> <name><surname>Lu</surname> <given-names>W.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Evolutionary-scale prediction of atomic level protein structure with a language model</article-title>. <source>Science</source> <volume>379</volume>, <fpage>1123</fpage>&#x02013;<lpage>1130</lpage>. doi: <pub-id pub-id-type="doi">10.1101/2022.07.20.500902</pub-id><pub-id pub-id-type="pmid">36927031</pub-id></mixed-citation>
</ref>
<ref id="B35">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>S.</given-names></name> <name><surname>Zhu</surname> <given-names>T.</given-names></name> <name><surname>Ren</surname> <given-names>M.</given-names></name> <name><surname>Yu</surname> <given-names>C.</given-names></name> <name><surname>Bu</surname> <given-names>D.</given-names></name> <name><surname>Zhang</surname> <given-names>H.</given-names></name></person-group> (<year>2023</year>). <article-title>&#x0201C;Predicting mutational effects on protein-protein binding via a side-chain diffusion probabilistic model,&#x0201D;</article-title> in <source>Advances in Neural Information Processing Systems</source>, <fpage>48994</fpage>&#x02013;<lpage>49005</lpage>.</mixed-citation>
</ref>
<ref id="B36">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>X.</given-names></name> <name><surname>Liu</surname> <given-names>Y.</given-names></name> <name><surname>Chen</surname> <given-names>S.</given-names></name> <name><surname>Hu</surname> <given-names>W.</given-names></name></person-group> (<year>2025</year>). <article-title>Controllable protein sequence generation with llm preference optimization</article-title>. <source>arXiv preprint arXiv:2501.15007</source>.</mixed-citation>
</ref>
<ref id="B37">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Loux</surname> <given-names>T.</given-names></name> <name><surname>Wang</surname> <given-names>D.</given-names></name> <name><surname>Shakhnovich</surname> <given-names>E. I.</given-names></name></person-group> (<year>2024</year>). <article-title>More structure, less accuracy: Esm3&#x00027;s binding prediction paradox</article-title>. <source>bioRxiv</source>, <fpage>2024</fpage>&#x02013;<lpage>12</lpage>. doi: <pub-id pub-id-type="doi">10.1101/2024.12.09.627585</pub-id></mixed-citation>
</ref>
<ref id="B38">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lu</surname> <given-names>N.</given-names></name> <name><surname>Liu</surname> <given-names>S.</given-names></name> <name><surname>He</surname> <given-names>R.</given-names></name> <name><surname>Wang</surname> <given-names>Q.</given-names></name> <name><surname>Ong</surname> <given-names>Y.-S.</given-names></name> <name><surname>Tang</surname> <given-names>K.</given-names></name></person-group> (<year>2023</year>). <article-title>Large language models can be guided to evade ai-generated text detection</article-title>. <source>arXiv preprint arXiv:2305.10847</source>.</mixed-citation>
</ref>
<ref id="B39">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Maher</surname> <given-names>M. C.</given-names></name> <name><surname>Bartha</surname> <given-names>I.</given-names></name> <name><surname>Weaver</surname> <given-names>S.</given-names></name> <name><surname>Di Iulio</surname> <given-names>J.</given-names></name> <name><surname>Ferri</surname> <given-names>E.</given-names></name> <name><surname>Soriaga</surname> <given-names>L.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Predicting the mutational drivers of future SARS-CoV-2 variants of concern</article-title>. <source>Sci. Transl. Med</source>. <volume>14</volume>:<fpage>eabk3445</fpage>. doi: <pub-id pub-id-type="doi">10.1126/scitranslmed.abk3445</pub-id><pub-id pub-id-type="pmid">35014856</pub-id></mixed-citation>
</ref>
<ref id="B40">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Margatina</surname> <given-names>K.</given-names></name> <name><surname>Vernikos</surname> <given-names>G.</given-names></name> <name><surname>Barrault</surname> <given-names>L.</given-names></name> <name><surname>Aletras</surname> <given-names>N.</given-names></name></person-group> (<year>2021</year>). <article-title>Active learning by acquiring contrastive examples</article-title>. <source>arXiv preprint arXiv:2109.03764</source>.</mixed-citation>
</ref>
<ref id="B41">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Meier</surname> <given-names>J.</given-names></name> <name><surname>Rao</surname> <given-names>R.</given-names></name> <name><surname>Verkuil</surname> <given-names>R.</given-names></name> <name><surname>Liu</surname> <given-names>J.</given-names></name> <name><surname>Sercu</surname> <given-names>T.</given-names></name> <name><surname>Rives</surname> <given-names>A.</given-names></name></person-group> (<year>2021</year>). <article-title>&#x0201C;Language models enable zero-shot prediction of the effects of mutations on protein function,&#x0201D;</article-title> in <source>Advances in Neural Information Processing Systems</source>, 29287&#x02013;29303. doi: <pub-id pub-id-type="doi">10.1101/2021.07.09.450648</pub-id></mixed-citation>
</ref>
<ref id="B42">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Min</surname> <given-names>K.</given-names></name> <name><surname>Zhang</surname> <given-names>Y.</given-names></name> <name><surname>Liu</surname> <given-names>J.</given-names></name> <name><surname>Yang</surname> <given-names>J.</given-names></name> <name><surname>Cao</surname> <given-names>F.</given-names></name> <name><surname>Peng</surname> <given-names>Z.</given-names></name> <etal/></person-group>. (<year>2025</year>). <article-title>China&#x00027;s biosafety/biosecurity governance: evolution, challenges, and architecture design</article-title>. <source>Front. Med</source>. <volume>19</volume>, <fpage>871</fpage>&#x02013;<lpage>878</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s11684-025-1158-y</pub-id><pub-id pub-id-type="pmid">41016962</pub-id></mixed-citation>
</ref>
<ref id="B43">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mistani</surname> <given-names>P.</given-names></name> <name><surname>Mysore</surname> <given-names>V.</given-names></name></person-group> (<year>2024</year>). <article-title>Preference optimization of protein language models as a multi-objective binder design paradigm</article-title>. <source>arXiv preprint arXiv:2403.04187</source>.</mixed-citation>
</ref>
<ref id="B44">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Moulana</surname> <given-names>A.</given-names></name> <name><surname>Dupic</surname> <given-names>T.</given-names></name> <name><surname>Phillips</surname> <given-names>A. M.</given-names></name> <name><surname>Chang</surname> <given-names>J.</given-names></name> <name><surname>Nieves</surname> <given-names>S.</given-names></name> <name><surname>Roffler</surname> <given-names>A. A.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Compensatory epistasis maintains ACE2 affinity in SARS-CoV-2 omicron BA.1</article-title>. <source>Nat. Commun</source>. <volume>13</volume>:<fpage>7011</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41467-022-34506-z</pub-id><pub-id pub-id-type="pmid">36384919</pub-id></mixed-citation>
</ref>
<ref id="B45">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Notin</surname> <given-names>P.</given-names></name> <name><surname>Weitzman</surname> <given-names>R.</given-names></name> <name><surname>Marks</surname> <given-names>D.</given-names></name> <name><surname>Gal</surname> <given-names>Y.</given-names></name></person-group> (<year>2023</year>). <article-title>&#x0201C;Proteinnpt: improving protein property prediction and design with non-parametric transformers,&#x0201D;</article-title> in <source>Advances in Neural Information Processing Systems</source> (<publisher-loc>Curran Associates, Inc.</publisher-loc>), <fpage>33529</fpage>&#x02013;<lpage>33563</lpage>. doi: <pub-id pub-id-type="doi">10.1101/2023.12.06.570473</pub-id><pub-id pub-id-type="pmid">38106034</pub-id></mixed-citation>
</ref>
<ref id="B46">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ouyang</surname> <given-names>L.</given-names></name> <name><surname>Wu</surname> <given-names>J.</given-names></name> <name><surname>Jiang</surname> <given-names>X.</given-names></name> <name><surname>Almeida</surname> <given-names>D.</given-names></name> <name><surname>Wainwright</surname> <given-names>C.</given-names></name> <name><surname>Mishkin</surname> <given-names>P.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>&#x0201C;Training language models to follow instructions with human feedback,&#x0201D;</article-title> in <source>Advances in Neural Information Processing Systems</source>, <fpage>27730</fpage>&#x02013;<lpage>27744</lpage>.</mixed-citation>
</ref>
<ref id="B47">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ouyang</surname> <given-names>S.</given-names></name> <name><surname>Cai</surname> <given-names>H.</given-names></name> <name><surname>Luo</surname> <given-names>Y.</given-names></name> <name><surname>Su</surname> <given-names>K.</given-names></name> <name><surname>Zhang</surname> <given-names>L.</given-names></name> <name><surname>Du</surname> <given-names>B.</given-names></name></person-group> (<year>2024</year>). <article-title>&#x0201C;Mmsite: a multi-modal framework for the identification of active sites in proteins,&#x0201D;</article-title> in <source>Advances in Neural Information Processing Systems</source>, 45819&#x02013;45849. doi: <pub-id pub-id-type="doi">10.52202/079017-1457</pub-id></mixed-citation>
</ref>
<ref id="B48">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Pannu</surname> <given-names>J.</given-names></name> <name><surname>Bloomfield</surname> <given-names>D.</given-names></name> <name><surname>MacKnight</surname> <given-names>R.</given-names></name> <name><surname>Hanke</surname> <given-names>M. S.</given-names></name> <name><surname>Zhu</surname> <given-names>A.</given-names></name> <name><surname>Gomes</surname> <given-names>G.</given-names></name> <etal/></person-group>. (<year>2025</year>). <article-title>Dual-use capabilities of concern of biological AI models</article-title>. <source>PLoS Comput. Biol</source>. <volume>21</volume>:<fpage>e1012975</fpage>. doi: <pub-id pub-id-type="doi">10.1371/journal.pcbi.1012975</pub-id><pub-id pub-id-type="pmid">40338934</pub-id></mixed-citation>
</ref>
<ref id="B49">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rebedea</surname> <given-names>T.</given-names></name> <name><surname>Dinu</surname> <given-names>R.</given-names></name> <name><surname>Sreedhar</surname> <given-names>M.</given-names></name> <name><surname>Parisien</surname> <given-names>C.</given-names></name> <name><surname>Cohen</surname> <given-names>J.</given-names></name></person-group> (<year>2023</year>). <article-title>&#x0201C;Nemo guardrails: a toolkit for controllable and safe LLM applications with programmable rails,&#x0201D;</article-title> in <source>Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing: System Demonstrations</source>, 431&#x02013;445. doi: <pub-id pub-id-type="doi">10.18653/v1/2023.emnlp-demo.40</pub-id></mixed-citation>
</ref>
<ref id="B50">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Riesselman</surname> <given-names>A. J.</given-names></name> <name><surname>Ingraham</surname> <given-names>J. B.</given-names></name> <name><surname>Marks</surname> <given-names>D. S.</given-names></name></person-group> (<year>2018</year>). <article-title>Deep generative models of genetic variation capture the effects of mutations</article-title>. <source>Nat. Methods</source> <volume>15</volume>, <fpage>816</fpage>&#x02013;<lpage>822</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41592-018-0138-4</pub-id><pub-id pub-id-type="pmid">30250057</pub-id></mixed-citation>
</ref>
<ref id="B51">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ruffolo</surname> <given-names>J. A.</given-names></name> <name><surname>Madani</surname> <given-names>A.</given-names></name></person-group> (<year>2024</year>). <article-title>Designing proteins with language models</article-title>. <source>Nat. Biotechnol</source>. <volume>42</volume>, <fpage>200</fpage>&#x02013;<lpage>202</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41587-024-02123-4</pub-id><pub-id pub-id-type="pmid">38361067</pub-id></mixed-citation>
</ref>
<ref id="B52">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Schmirler</surname> <given-names>R.</given-names></name> <name><surname>Heinzinger</surname> <given-names>M.</given-names></name> <name><surname>Rost</surname> <given-names>B.</given-names></name></person-group> (<year>2023</year>). <article-title>Fine-tuning protein language models boosts predictions across diverse tasks</article-title>. <source>Nat. Commun</source>. <volume>15</volume>:<fpage>7407</fpage>. doi: <pub-id pub-id-type="doi">10.1101/2023.12.13.571462</pub-id><pub-id pub-id-type="pmid">39198457</pub-id></mixed-citation>
</ref>
<ref id="B53">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Schulman</surname> <given-names>J.</given-names></name> <name><surname>Wolski</surname> <given-names>F.</given-names></name> <name><surname>Dhariwal</surname> <given-names>P.</given-names></name> <name><surname>Radford</surname> <given-names>A.</given-names></name> <name><surname>Klimov</surname> <given-names>O.</given-names></name></person-group> (<year>2017</year>). <article-title>Proximal policy optimization algorithms</article-title>. <source>arXiv preprint arXiv:1707.06347</source>.</mixed-citation>
</ref>
<ref id="B54">
<mixed-citation publication-type="web"><collab>SecureDNA</collab> (<year>2025</year>). <source>SecureDNA: Free, secure DNA synthesis screening platform</source>. Available online at: <ext-link ext-link-type="uri" xlink:href="https://secureDNA.org">https://secureDNA.org</ext-link> (Accessed April 28, 2025).</mixed-citation>
</ref>
<ref id="B55">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Shan</surname> <given-names>S.</given-names></name> <name><surname>Luo</surname> <given-names>S.</given-names></name> <name><surname>Yang</surname> <given-names>Z.</given-names></name> <name><surname>Hong</surname> <given-names>J.</given-names></name> <name><surname>Su</surname> <given-names>Y.</given-names></name> <name><surname>Ding</surname> <given-names>F.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Deep learning guided optimization of human antibody against SARS-CoV-2 variants with broad neutralization</article-title>. <source>Proc. Nat. Acad. Sci</source>. <volume>119</volume>:<fpage>e2122954119</fpage>. doi: <pub-id pub-id-type="doi">10.1073/pnas.2122954119</pub-id><pub-id pub-id-type="pmid">35238654</pub-id></mixed-citation>
</ref>
<ref id="B56">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Shanker</surname> <given-names>V. R.</given-names></name> <name><surname>Bruun</surname> <given-names>T. U. J.</given-names></name> <name><surname>Hie</surname> <given-names>B. L.</given-names></name> <name><surname>Kim</surname> <given-names>P. S.</given-names></name></person-group> (<year>2024</year>). <article-title>Unsupervised evolution of protein and antibody complexes with a structure-informed language model</article-title>. <source>Science</source> <volume>385</volume>, <fpage>46</fpage>&#x02013;<lpage>53</lpage>. doi: <pub-id pub-id-type="doi">10.1126/science.adk8946</pub-id><pub-id pub-id-type="pmid">38963838</pub-id></mixed-citation>
</ref>
<ref id="B57">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Shuai</surname> <given-names>R. W.</given-names></name> <name><surname>Widatalla</surname> <given-names>T.</given-names></name> <name><surname>Huang</surname> <given-names>P.-S.</given-names></name> <name><surname>Hie</surname> <given-names>B. L.</given-names></name></person-group> (<year>2025</year>). <article-title>Sidechain conditioning and modeling for full-atom protein sequence design with FAMPNN</article-title>. <source>Proc. Mach. Learn. Res</source>. <volume>267</volume>:<fpage>66746</fpage>. doi: <pub-id pub-id-type="doi">10.1101/2025.02.13.637498</pub-id><pub-id pub-id-type="pmid">41307002</pub-id></mixed-citation>
</ref>
<ref id="B58">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Starr</surname> <given-names>T. N.</given-names></name> <name><surname>Greaney</surname> <given-names>A. J.</given-names></name> <name><surname>Hilton</surname> <given-names>S. K.</given-names></name> <name><surname>Ellis</surname> <given-names>D.</given-names></name> <name><surname>Crawford</surname> <given-names>K. H.</given-names></name> <name><surname>Dingens</surname> <given-names>A. S.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Deep mutational scanning of SARS-CoV-2 receptor binding domain reveals constraints on folding and ace2 binding</article-title>. <source>Cell</source> <volume>182</volume>, <fpage>1295</fpage>&#x02013;<lpage>1310</lpage>.e20. doi: <pub-id pub-id-type="doi">10.1016/j.cell.2020.08.012</pub-id><pub-id pub-id-type="pmid">32841599</pub-id></mixed-citation>
</ref>
<ref id="B59">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Stocco</surname> <given-names>F.</given-names></name> <name><surname>Artigues-Lleixa</surname> <given-names>M.</given-names></name> <name><surname>Hunklinger</surname> <given-names>A.</given-names></name> <name><surname>Widatalla</surname> <given-names>T.</given-names></name> <name><surname>Guell</surname> <given-names>M.</given-names></name> <name><surname>Ferruz</surname> <given-names>N.</given-names></name></person-group> (<year>2024</year>). <article-title>Guiding generative protein language models with reinforcement learning</article-title>. <source>arXiv preprint arXiv:2412.12979</source>.</mixed-citation>
</ref>
<ref id="B60">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Taft</surname> <given-names>J. M.</given-names></name> <name><surname>Weber</surname> <given-names>C. R.</given-names></name> <name><surname>Gao</surname> <given-names>B.</given-names></name> <name><surname>Ehling</surname> <given-names>R. A.</given-names></name> <name><surname>Han</surname> <given-names>J.</given-names></name> <name><surname>Frei</surname> <given-names>L.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Deep mutational learning predicts ace2 binding and antibody escape to combinatorial mutations in the SARS-CoV-2 receptor-binding domain</article-title>. <source>Cell</source> <volume>185</volume>, <fpage>4008</fpage>&#x02013;<lpage>4022</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.cell.2022.08.024</pub-id><pub-id pub-id-type="pmid">36150393</pub-id></mixed-citation>
</ref>
<ref id="B61">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Thadani</surname> <given-names>N. N.</given-names></name> <name><surname>Gurev</surname> <given-names>S.</given-names></name> <name><surname>Notin</surname> <given-names>P.</given-names></name> <name><surname>Youssef</surname> <given-names>N.</given-names></name> <name><surname>Rollins</surname> <given-names>N. J.</given-names></name> <name><surname>Ritter</surname> <given-names>D.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Learning from prepandemic data to forecast viral escape</article-title>. <source>Nature</source> <volume>622</volume>, <fpage>818</fpage>&#x02013;<lpage>825</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41586-023-06617-0</pub-id><pub-id pub-id-type="pmid">37821700</pub-id></mixed-citation>
</ref>
<ref id="B62">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Torres-Acosta</surname> <given-names>M. A.</given-names></name> <name><surname>Lye</surname> <given-names>G. J.</given-names></name> <name><surname>Dikicioglu</surname> <given-names>D.</given-names></name></person-group> (<year>2022</year>). <article-title>Automated liquid-handling operations for robust, resilient, and efficient bio-based laboratory practices</article-title>. <source>Biochem. Eng. J</source>. <volume>188</volume>:<fpage>108713</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.bej.2022.108713</pub-id></mixed-citation>
</ref>
<ref id="B63">
<mixed-citation publication-type="web"><collab>U.S. Department of Health and Human Services Administration for Strategic Preparedness and Response (ASPR).</collab> (<year>2025</year>). <source>Biosecurity</source>. Available online at: <ext-link ext-link-type="uri" xlink:href="https://aspr.hhs.gov/S3/Pages/Biosecurity.aspx">https://aspr.hhs.gov/S3/Pages/Biosecurity.aspx</ext-link> (Accessed April 28, 2025).</mixed-citation>
</ref>
<ref id="B64">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Undheim</surname> <given-names>T. A.</given-names></name></person-group> (<year>2024</year>). <article-title>The whack-a-mole governance challenge for AI-enabled synthetic biology: literature review and emerging frameworks</article-title>. <source>Front. Bioeng. Biotechnol</source>. <volume>12</volume>:<fpage>1359768</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fbioe.2024.1359768</pub-id><pub-id pub-id-type="pmid">38481570</pub-id></mixed-citation>
</ref>
<ref id="B65">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Vieira</surname> <given-names>L. C.</given-names></name> <name><surname>Handojo</surname> <given-names>M. L.</given-names></name> <name><surname>Wilke</surname> <given-names>C. O.</given-names></name></person-group> (<year>2024</year>). <article-title>Scaling down for efficiency: Medium-sized protein language models perform well at transfer learning on realistic datasets</article-title>. <source>bioRxiv</source>, <fpage>2024</fpage>&#x02013;<lpage>11</lpage>. doi: <pub-id pub-id-type="doi">10.1101/2024.11.22.624936</pub-id></mixed-citation>
</ref>
<ref id="B66">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>D.</given-names></name> <name><surname>Huot</surname> <given-names>M.</given-names></name> <name><surname>Mohanty</surname> <given-names>V.</given-names></name> <name><surname>Shakhnovich</surname> <given-names>E. I.</given-names></name></person-group> (<year>2024</year>). <article-title>Biophysical principles predict fitness of SARS-CoV-2 variants</article-title>. <source>Proc. Nat. Acad. Sci</source>. <volume>121</volume>:<fpage>e2314518121</fpage>. doi: <pub-id pub-id-type="doi">10.1073/pnas.2314518121</pub-id><pub-id pub-id-type="pmid">38820002</pub-id></mixed-citation>
</ref>
<ref id="B67">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>G.</given-names></name> <name><surname>Liu</surname> <given-names>X.</given-names></name> <name><surname>Wang</surname> <given-names>K.</given-names></name> <name><surname>Gao</surname> <given-names>Y.</given-names></name> <name><surname>Li</surname> <given-names>G.</given-names></name> <name><surname>Baptista-Hon</surname> <given-names>D. T.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Deep-learning-enabled protein-protein interaction analysis for prediction of SARS-CoV-2 infectivity and variant evolution</article-title>. <source>Nat. Med</source>. <volume>29</volume>, <fpage>2007</fpage>&#x02013;<lpage>2018</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41591-023-02483-5</pub-id><pub-id pub-id-type="pmid">37524952</pub-id></mixed-citation>
</ref>
<ref id="B68">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Warmuth</surname> <given-names>M. K.</given-names></name> <name><surname>R&#x000E4;tsch</surname> <given-names>G.</given-names></name> <name><surname>Mathieson</surname> <given-names>M.</given-names></name> <name><surname>Liao</surname> <given-names>J.</given-names></name> <name><surname>Lemmen</surname> <given-names>C.</given-names></name></person-group> (<year>2001</year>). <article-title>&#x0201C;Active learning in the drug discovery process,&#x0201D;</article-title> in <source>Advances in Neural Information Processing Systems</source>, 14.</mixed-citation>
</ref>
<ref id="B69">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Watson</surname> <given-names>J. L.</given-names></name> <name><surname>Juergens</surname> <given-names>D.</given-names></name> <name><surname>Bennett</surname> <given-names>N. R.</given-names></name> <name><surname>Trippe</surname> <given-names>B. L.</given-names></name> <name><surname>Yim</surname> <given-names>J.</given-names></name> <name><surname>Eisenach</surname> <given-names>H. E.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>De novo design of protein structure and function with RFdiffusion</article-title>. <source>Nature</source> <volume>620</volume>, <fpage>1089</fpage>&#x02013;<lpage>1100</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41586-023-06415-8</pub-id><pub-id pub-id-type="pmid">37433327</pub-id></mixed-citation>
</ref>
<ref id="B70">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wittmann</surname> <given-names>B. J.</given-names></name> <name><surname>Alexanian</surname> <given-names>T.</given-names></name> <name><surname>Bartling</surname> <given-names>C.</given-names></name> <name><surname>Beal</surname> <given-names>J.</given-names></name> <name><surname>Clore</surname> <given-names>A.</given-names></name> <name><surname>Diggans</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2025</year>). <article-title>Strengthening nucleic acid biosecurity screening against generative protein design tools</article-title>. <source>Science</source> <volume>390</volume>, <fpage>82</fpage>&#x02013;<lpage>87</lpage>. doi: <pub-id pub-id-type="doi">10.1126/science.adu8578</pub-id><pub-id pub-id-type="pmid">41037625</pub-id></mixed-citation>
</ref>
<ref id="B71">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Xiao</surname> <given-names>Y.</given-names></name> <name><surname>Zhao</surname> <given-names>W.</given-names></name> <name><surname>Zhang</surname> <given-names>J.</given-names></name> <name><surname>Jin</surname> <given-names>Y.</given-names></name> <name><surname>Zhang</surname> <given-names>H.</given-names></name> <name><surname>Ren</surname> <given-names>Z.</given-names></name> <etal/></person-group>. (<year>2025</year>). <article-title>Protein large language models: a comprehensive survey</article-title>. <source>arXiv preprint arXiv:2502.17504</source>.</mixed-citation>
</ref>
<ref id="B72">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>J.</given-names></name> <name><surname>Lal</surname> <given-names>R. G.</given-names></name> <name><surname>Bowden</surname> <given-names>J. C.</given-names></name> <name><surname>Astudillo</surname> <given-names>R.</given-names></name> <name><surname>Hameedi</surname> <given-names>M. A.</given-names></name> <name><surname>Kaur</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2025</year>). <article-title>Active learning-assisted directed evolution</article-title>. <source>Nat. Commun</source>. <volume>16</volume>:<fpage>714</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41467-025-55987-8</pub-id><pub-id pub-id-type="pmid">39821082</pub-id></mixed-citation>
</ref>
<ref id="B73">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Youssef</surname> <given-names>N.</given-names></name> <name><surname>Gurev</surname> <given-names>S.</given-names></name> <name><surname>Ghantous</surname> <given-names>F.</given-names></name> <name><surname>Brock</surname> <given-names>K. P.</given-names></name> <name><surname>Jaimes</surname> <given-names>J. A.</given-names></name> <name><surname>Thadani</surname> <given-names>N. N.</given-names></name> <etal/></person-group>. (<year>2025</year>). <article-title>Computationally designed proteins mimic antibody immune evasion in viral evolution</article-title>. <source>Immunity</source> <volume>58</volume>, <fpage>1411</fpage>&#x02013;<lpage>1421</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.immuni.2025.04.015</pub-id><pub-id pub-id-type="pmid">40345199</pub-id></mixed-citation>
</ref>
<ref id="B74">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yu</surname> <given-names>T.</given-names></name> <name><surname>Boob</surname> <given-names>A. G.</given-names></name> <name><surname>Singh</surname> <given-names>N.</given-names></name> <name><surname>Su</surname> <given-names>Y.</given-names></name> <name><surname>Zhao</surname> <given-names>H.</given-names></name></person-group> (<year>2023</year>). <article-title>In vitro continuous protein evolution empowered by machine learning and automation</article-title>. <source>Cell Syst</source>. <volume>14</volume>, <fpage>633</fpage>&#x02013;<lpage>644</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.cels.2023.04.006</pub-id></mixed-citation>
</ref>
<ref id="B75">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yu</surname> <given-names>Y.</given-names></name> <name><surname>Jiang</surname> <given-names>F.</given-names></name> <name><surname>Zhong</surname> <given-names>B.</given-names></name> <name><surname>Hong</surname> <given-names>L.</given-names></name> <name><surname>Li</surname> <given-names>M.</given-names></name></person-group> (<year>2025</year>). <article-title>Entropy-driven zero-shot deep learning model selection for viral proteins</article-title>. <source>Phys. Rev. Res</source>. <volume>7</volume>:<fpage>013229</fpage>. doi: <pub-id pub-id-type="doi">10.1103/PhysRevResearch.7.013229</pub-id></mixed-citation>
</ref>
<ref id="B76">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>Q.</given-names></name> <name><surname>Chen</surname> <given-names>W.</given-names></name> <name><surname>Qin</surname> <given-names>M.</given-names></name> <name><surname>Wang</surname> <given-names>Y.</given-names></name> <name><surname>Pu</surname> <given-names>Z.</given-names></name> <name><surname>Ding</surname> <given-names>K.</given-names></name> <etal/></person-group>. (<year>2025</year>). <article-title>Integrating protein language models and automatic biofoundry for enhanced protein evolution</article-title>. <source>Nat. Commun</source>. <volume>16</volume>:<fpage>1553</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41467-025-56751-8</pub-id><pub-id pub-id-type="pmid">39934638</pub-id></mixed-citation>
</ref>
<ref id="B77">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>Z.</given-names></name> <name><surname>Notin</surname> <given-names>P.</given-names></name> <name><surname>Huang</surname> <given-names>Y.</given-names></name> <name><surname>Lozano</surname> <given-names>A. C.</given-names></name> <name><surname>Chenthamarakshan</surname> <given-names>V.</given-names></name> <name><surname>Marks</surname> <given-names>D.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>&#x0201C;Multi-scale representation learning for protein fitness prediction,&#x0201D;</article-title> in <source>Advances in Neural Information Processing Systems</source>, 101456&#x02013;101473. doi: <pub-id pub-id-type="doi">10.52202/079017-3217</pub-id><pub-id pub-id-type="pmid">39679271</pub-id></mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn fn-type="custom" custom-type="edited-by" id="fn0002">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1150544/overview">Bin Hu</ext-link>, Los Alamos National Laboratory (DOE), United States</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by" id="fn0003">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1543817/overview">Matthew Bashton</ext-link>, Northumbria University, United Kingdom</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3263758/overview">Thomas Inglesby</ext-link>, Johns Hopkins University, United States</p>
</fn>
</fn-group>
<fn-group>
<fn id="fn0001"><label>1</label><p><italic>Emerald cloud lab</italic>. Available online at: <ext-link ext-link-type="uri" xlink:href="https://www.emeraldcloudlab.com/">https://www.emeraldcloudlab.com/</ext-link> (Accessed April 30, 2025).</p></fn>
</fn-group>
</back>
</article>