<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article article-type="research-article" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Digit. Health</journal-id><journal-title-group>
<journal-title>Frontiers in Digital Health</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Digit. Health</abbrev-journal-title></journal-title-group>
<issn pub-type="epub">2673-253X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fdgth.2026.1733630</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Identification and validation of respiratory virus immunization using natural language processing</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes"><name><surname>Wilson</surname><given-names>Kevin A.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="cor1">&#x002A;</xref><uri xlink:href="https://loop.frontiersin.org/people/3300908/overview"/><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role></contrib>
<contrib contrib-type="author"><name><surname>Riddles</surname><given-names>John J.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role></contrib>
<contrib contrib-type="author"><name><surname>Hill</surname><given-names>Andrew C.</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/3320935/overview" /><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role></contrib>
<contrib contrib-type="author"><name><surname>Bassett</surname><given-names>Elizabeth A.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/3240660/overview" /><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role></contrib>
<contrib contrib-type="author"><name><surname>Zhou</surname><given-names>Mengshi</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/2562446/overview" /><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role></contrib>
<contrib contrib-type="author"><name><surname>Barron</surname><given-names>Michelle</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
<contrib contrib-type="author"><name><surname>Chavez</surname><given-names>Catia</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role></contrib>
<contrib contrib-type="author"><name><surname>Shrivastava</surname><given-names>Rahul</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role></contrib>
<contrib contrib-type="author"><name><surname>Battalahalli</surname><given-names>Anil</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role></contrib>
<contrib contrib-type="author"><name><surname>Chacreton</surname><given-names>Daniel</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
<contrib contrib-type="author"><name><surname>Moran</surname><given-names>Ethan</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/3281924/overview" /><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role></contrib>
<contrib contrib-type="author"><name><surname>Rowley</surname><given-names>Elizabeth</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role></contrib>
<contrib contrib-type="author"><name><surname>Weber</surname><given-names>Zachary A.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role></contrib>
<contrib contrib-type="author"><name><surname>Reichle</surname><given-names>Lawrence</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role></contrib>
<contrib contrib-type="author"><name><surname>Ball</surname><given-names>Sarah W.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role></contrib>
<contrib contrib-type="author"><name><surname>Payne</surname><given-names>Amanda B.</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role></contrib>
<contrib contrib-type="author"><name><surname>DeCuir</surname><given-names>Jennifer</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role></contrib>
<contrib contrib-type="author"><name><surname>Link-Gelles</surname><given-names>Ruth</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role></contrib>
<contrib contrib-type="author"><name><surname>Ong</surname><given-names>Toan C.</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/3312529/overview" /><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role></contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>Westat</institution>, <city>Bethesda</city>, <state>MD</state>, <country country="us">United States</country></aff>
<aff id="aff2"><label>2</label><institution>Colorado School of Public Health and ACCORDS School of Medicine, University of Colorado Anschutz Medical Campus</institution>, <city>Aurora</city>, <state>CO</state>, <country country="us">United States</country></aff>
<aff id="aff3"><label>3</label><institution>Coronavirus and Other Respiratory Viruses Division, National Center for Immunization and Respiratory Diseases, Centers for Disease Control and Prevention</institution>, <city>Atlanta</city>, <state>GA</state>, <country country="us">United States</country></aff>
<aff id="aff4"><label>4</label><institution>Influenza Division, National Center for Immunization and Respiratory Diseases, Centers for Disease Control and Prevention</institution>, <city>Atlanta</city>, <state>GA</state>, <country country="us">United States</country></aff>
<aff id="aff5"><label>5</label><institution>United States Public Health Service Commissioned Corps</institution>, <city>Rockville</city>, <state>MD</state>, <country country="us">United States</country></aff>
<author-notes>
<corresp id="cor1"><label>&#x002A;</label><bold>Correspondence:</bold> Kevin A. Wilson <email xlink:href="mailto:kevinwilson@westat.com">kevinwilson@westat.com</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-02"><day>02</day><month>02</month><year>2026</year></pub-date>
<pub-date publication-format="electronic" date-type="collection"><year>2026</year></pub-date>
<volume>8</volume><elocation-id>1733630</elocation-id>
<history>
<date date-type="received"><day>27</day><month>10</month><year>2025</year></date>
<date date-type="rev-recd"><day>23</day><month>12</month><year>2025</year></date>
<date date-type="accepted"><day>05</day><month>01</month><year>2026</year></date>
</history>
<permissions>
<copyright-statement>&#x00A9; 2026 Wilson, Riddles, Hill, Bassett, Zhou, Barron, Chavez, Shrivastava, Battalahalli, Chacreton, Moran, Rowley, Weber, Reichle, Ball, Payne, DeCuir, Link-Gelles and Ong.</copyright-statement>
<copyright-year>2026</copyright-year><copyright-holder>Wilson, Riddles, Hill, Bassett, Zhou, Barron, Chavez, Shrivastava, Battalahalli, Chacreton, Moran, Rowley, Weber, Reichle, Ball, Payne, DeCuir, Link-Gelles and Ong</copyright-holder><license><ali:license_ref start_date="2026-02-02">https://creativecommons.org/licenses/by/4.0/</ali:license_ref><license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p></license>
</permissions>
<abstract><sec><title>Introduction</title>
<p>Electronic health record (EHR)-based research often relies on structured data elements, such as ICD-10-CM and CPT codes, to identify clinical diagnoses and procedures. However, some information, such as the administration of immunizations, may be captured more reliably in the text-based narrative sections of the patient&#x0027;s record. We developed a rule-based natural language processing (NLP) algorithm to identify the administration of immunizations for COVID-19, influenza, and RSV using a combination of synthetic and publicly available data.</p>
</sec><sec><title>Methods</title>
<p>After applying standard NLP processing techniques to clean and standardize the text, we implemented a multi-stage, rule-based algorithm. We applied a dictionary of general keywords to identify potential immunizations, and a set of specific keywords, which leveraged grammatical dependencies in the text, to increase accuracy. We implemented additional rules to account for negation and immunization recommendations. The algorithm was applied to a sample of 20,000 patients from the study population. We measured performance by conducting a manual review of 400 individual notes and assessing concurrence with structured data, using precision and recall as evaluation metrics.</p>
</sec><sec><title>Results</title>
<p>In the first evaluation, which compared the performance of the algorithm with an independent test dataset using manual clinical review, precision was 71&#x0025; and recall was 97&#x0025; for COVID-19 immunization; 91&#x0025; and 92&#x0025; for Influenza; and 57&#x0025; and 96&#x0025; for RSV. In a second evaluation using structured data as the gold standard (i.e., ICD-10-CM, CPT, and CVX codes), precision was 72&#x0025; and recall was 9&#x0025; for COVID-19 immunization; 71&#x0025; and 12&#x0025; for Influenza; and for RSV, precision was 78&#x0025; and recall was 10&#x0025;.</p>
</sec><sec><title>Discussion</title>
<p>We demonstrated the effectiveness of NLP methods in identifying immunizations from EHR. High precision and recall for COVID-19 and influenza immunizations suggest that the algorithm can effectively identify immunization references when they are present in the text; however, low recall when compared to the structured data suggests that there are many more immunizations in the structured data not present in the text. Thus, the algorithm has specialized utility for augmenting immunization records using text data from individual notes; however, the algorithm&#x0027;s extensibility and generalizability can serve as a framework for future EHR-based research.</p>
</sec>
</abstract>
<kwd-group>
<kwd>COVID-19</kwd>
<kwd>electronic health records</kwd>
<kwd>immunization verification</kwd>
<kwd>influenza</kwd>
<kwd>natural language processing (NLP)</kwd>
<kwd>respiratory syncytial virus (RSV)</kwd>
<kwd>rule-based methods</kwd>
<kwd>vaccine verification</kwd>
</kwd-group><funding-group><funding-statement>The author(s) declared that financial support was received for this work and/or its publication. This study was supported by the Centers of Disease Control and Prevention (CDC) contract 75D30121D12779 to Westat.</funding-statement></funding-group><counts>
<fig-count count="4"/>
<table-count count="5"/><equation-count count="0"/><ref-count count="39"/><page-count count="13"/><word-count count="5486"/></counts><custom-meta-group><custom-meta><meta-name>section-at-acceptance</meta-name><meta-value>Health Informatics</meta-value></custom-meta></custom-meta-group>
</article-meta>
</front>
<body><sec id="s1" sec-type="intro"><label>1</label><title>Introduction</title>
<p>Natural language processing (NLP) is a growing area of research that aims to develop algorithms and models capable of processing and decoding unstructured, human-generated text. In recent years, there has been interest in using NLP to analyze unstructured data within Electronic Health Record (EHR) systems (<xref ref-type="bibr" rid="B1">1</xref>). Unlike structured data, which consists of data organized into predefined data fields, typically categorical or numerical values, unstructured data consists of free text or fields without a predefined format. Although structured EHR sections contain most health information for individuals, often defined by standardized data elements such as ICD-10-CM diagnosis and procedure codes, a significant proportion of this information is contained within unstructured sections, such as physician notes (<xref ref-type="bibr" rid="B2">2</xref>). This poses a challenge to researchers who rely solely on structured EHR sections for data analysis. We developed NLP methods to extract critical data from text-based narrative sections of the patient&#x0027;s record, using immunization<xref ref-type="fn" rid="n3"><sup>1</sup></xref> administration as a specific use case.</p>
<p>NLP methods take various forms and can employ a rule-based approach, machine learning (ML) models, or other artificial intelligence (AI) techniques, depending on the research needs of a specific project (<xref ref-type="bibr" rid="B2">2</xref>). In a rule-based model, experts develop a set of keywords and a series of rules that contextually detect terms in a sentence. In contrast, an ML model uses a preexisting training dataset to predict criteria or labels in a test dataset (<xref ref-type="bibr" rid="B3">3</xref>). Rule-based systems have been widely implemented for NLP tasks, such as computational phenotyping and clinical decision support; rule-based systems have been successfully developed for information extraction (<xref ref-type="bibr" rid="B3">3</xref>&#x2013;<xref ref-type="bibr" rid="B9">9</xref>). Notably, Deady et al. (<xref ref-type="bibr" rid="B10">10</xref>) employed rule-based NLP methods to extract influenza vaccination data from clinical notes, resulting in a 16.8&#x0025; increase in captured vaccinations.</p>
<p>Researchers have applied rules and model-based NLP methods across a range of therapeutic areas, including identification of bleeding events (<xref ref-type="bibr" rid="B11">11</xref>&#x2013;<xref ref-type="bibr" rid="B13">13</xref>, <xref ref-type="bibr" rid="B37">37</xref>), chronic cough (<xref ref-type="bibr" rid="B14">14</xref>), surgical site infections (<xref ref-type="bibr" rid="B15">15</xref>), prediction of falls (<xref ref-type="bibr" rid="B16">16</xref>), back pain (<xref ref-type="bibr" rid="B17">17</xref>), Hepatitis C (<xref ref-type="bibr" rid="B18">18</xref>), and a range of mental health issues (<xref ref-type="bibr" rid="B19">19</xref>, <xref ref-type="bibr" rid="B20">20</xref>). The application of NLP methods has been valuable in situations where traditional diagnosis codes do not capture the disease of interest, such as the identification of diseases in undiagnosed populations and in situations where ICD 10-CM diagnosis and procedure codes are not specific enough to accurately identify the disease population (<xref ref-type="bibr" rid="B14">14</xref>, <xref ref-type="bibr" rid="B18">18</xref>, <xref ref-type="bibr" rid="B38">38</xref>). NLP methods have also been effective in identifying early predictors and longitudinal progression of disease and provide an alternative to labor-intensive chart review (<xref ref-type="bibr" rid="B16">16</xref>, <xref ref-type="bibr" rid="B18">18</xref>). Rule-based methods are popular and effective because they do not require labeled training data (<xref ref-type="bibr" rid="B11">11</xref>, <xref ref-type="bibr" rid="B14">14</xref>) and are able to algorithmically detect modifiers, such as negation, self-diagnosis, and affirmation (<xref ref-type="bibr" rid="B14">14</xref>, <xref ref-type="bibr" rid="B16">16</xref>, <xref ref-type="bibr" rid="B19">19</xref>). Methods range from relatively simple regular expressions to algorithms that incorporate semantic mappings, grammatical structure, and clinical terminologies (<xref ref-type="bibr" rid="B15">15</xref>). Rule-based methods have also been used in combination with ML approaches, including as a preprocessing step to generate labeled training data (<xref ref-type="bibr" rid="B17">17</xref>) and for identification of modifiers (<xref ref-type="bibr" rid="B12">12</xref>).</p>
<p>The success of previous studies using a rule-based NLP model to extract immunizations from text-based EHR sections guided our research efforts. Moreover, an analysis of NLP research on unstructured EHR data revealed that rule-based NLP models significantly outperform ML approaches while also providing increased simplicity, transparency, and interpretability (<xref ref-type="bibr" rid="B8">8</xref>). During an exploratory phase, preceding the extraction of EHR study data, we developed preliminary models based on publicly available EHR data using the Medical Information Mart for Intensive Care, version four dataset (MIMIC-IV) (<xref ref-type="bibr" rid="B9">9</xref>). Compared to a less malleable ML-based model, a rule-based approach provided increased flexibility in further developing our approach in this vaccine administration setting.</p>
<p>Respiratory viruses, such as SARS-CoV-2, influenza, and respiratory syncytial virus (RSV), remain persistent contributors of morbidity and mortality within the United States (<xref ref-type="bibr" rid="B21">21</xref>&#x2013;<xref ref-type="bibr" rid="B23">23</xref>). As such, tracking data related to immunization coverage, effectiveness, and severe disease outcomes by immunization status are crucial in supporting public health efforts targeting these viruses (<xref ref-type="bibr" rid="B24">24</xref>, <xref ref-type="bibr" rid="B25">25</xref>, <xref ref-type="bibr" rid="B27">27</xref>). The need for a rapid public health response increases the reliance on timely and accurate EHR data. Vaccine effectiveness (VE) estimates support public health programs and policies by routinely collecting and synthesizing data on respiratory illnesses and associated outcomes in vaccinated and unvaccinated subjects, and publishing timely statistical analyses (<xref ref-type="bibr" rid="B26">26</xref>, <xref ref-type="bibr" rid="B28">28</xref>&#x2013;<xref ref-type="bibr" rid="B30">30</xref>). As pandemic-era regulations requiring providers to report immunization data to state and local immunization information systems have expired, there may be an impact on the reliability and completeness of immunization history data. Furthermore, structured EHR data may not reflect immunizations received in locations outside of healthcare networks, including unaffiliated clinical practices, pharmacies, and workplaces (<xref ref-type="bibr" rid="B10">10</xref>). Due to the importance of consistent and reliable immunization data for estimating VE and assessing immunization coverage, we investigated and developed methods to extract additional immunization data from relevant text-based EHR sections for study participants.</p>
<p>This study specifically examines the use and optimization of a rule-based NLP algorithm to identify and characterize immunization events in unstructured EHR sections, and broadly considers the application of a rule-based NLP algorithm to capture other measures and characteristics that are not reliant on ICD-10-CM diagnosis and procedure codes.</p>
</sec>
<sec id="s2" sec-type="methods"><label>2</label><title>Materials and methods</title>
<sec id="s2a"><label>2.1</label><title>Study population</title>
<p>The population for this study consisted of patients who received care within the University of Colorado Healthcare System and had available clinical note data within the study period, from August 1, 2023, to December 31, 2023. This patient population (<italic>N</italic>&#x2009;&#x003D;&#x2009;40,588) was initially identified as part of the VISION network, for which the methodology has been well established in the literature (<xref ref-type="bibr" rid="B28">28</xref>, <xref ref-type="bibr" rid="B31">31</xref>, <xref ref-type="bibr" rid="B32">32</xref>). The population was associated with 357,877 notes across three care settings. Specifically, 254,926 notes (71&#x0025;) were recorded in the inpatient setting, 55,901 (16&#x0025;) in the emergency department/urgent care setting, and 47,050 (13&#x0025;) in the outpatient setting. This activity was reviewed by CDC and was conducted consistent with applicable federal law and CDC policy.<xref ref-type="fn" rid="n4"><sup>2</sup></xref></p>
</sec>
<sec id="s2b"><label>2.2</label><title>Sample</title>
<p>We selected a stratified, systematic sample of 20,000 patients from the study population (<italic>N</italic>&#x2009;&#x003D;&#x2009;40,588). The sample was stratified by presence of structured immunization records, testing results, and immunocompromised status. Within each stratum, patients were sorted by age and sampled at even intervals. To ensure representativeness and reasonable sample sizes within different demographics, we stratified the sample by several variables of interest: immunization status in the structured EHR data for COVID-19, influenza, and RSV; pathogenic test result; age group; RSV immunization eligibility; and immunocompromised status. <xref ref-type="table" rid="T1">Table&#x00A0;1</xref> presents the definitions for each variable. Notably, to be included in the sample, a patient had to have test results for at least one pathogen (i.e., SARS-CoV-2, influenza, RSV), although they could have missing test results for one or two pathogens. A preliminary review indicated that progress notes were likely to contain more relevant information than other note types, so notes were restricted to progress notes. Of 20,000 patients, we included 18,488 patients in the downstream analysis due to the availability of progress notes. From this sample, we selected three overlapping sub-samples to implement and evaluate separate rules for COVID-19, influenza, and RSV immunization. For the sub-sample, a random selection of patients was drawn from different strata defined by the presence of structured vaccination records, as well as presence/absence of vaccinations identified by an early prototype of the parser. This stratification was conducted to ensure sufficient cases for assessing various performance metrics, particularly precision and recall. All progress notes from all available encounters were included in the sample.</p>
<table-wrap id="T1" position="float"><label>Table&#x00A0;1</label>
<caption><p>Description of the stratification variables applied to study sample.</p></caption>
<table>
<colgroup>
<col align="left"/>
<col align="left"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">Variable</th>
<th valign="top" align="center">Description</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">COVID-19 vaccination</td>
<td valign="top" align="left">Evidence of COVID-19 vaccination in structured data up to 12 months prior to the start of the study period or during the study period<xref ref-type="table-fn" rid="TF1"><sup>a</sup></xref></td>
</tr>
<tr>
<td valign="top" align="left">Influenza vaccination</td>
<td valign="top" align="left">Evidence of influenza vaccination in structured data during the study period</td>
</tr>
<tr>
<td valign="top" align="left">RSV immunization</td>
<td valign="top" align="left">Evidence of RSV immunization in structured data during the study period</td>
</tr>
<tr>
<td valign="top" align="left">SARS-CoV-2 test result</td>
<td valign="top" align="left">Presence of a positive SARS-CoV-2 test result during the study period</td>
</tr>
<tr>
<td valign="top" align="left">Influenza test result</td>
<td valign="top" align="left">Presence of a positive influenza test result during the study period</td>
</tr>
<tr>
<td valign="top" align="left">RSV test result</td>
<td valign="top" align="left">Presence of a positive RSV test result during the study period</td>
</tr>
<tr>
<td valign="top" align="left">Age group</td>
<td valign="top" align="left">Individual&#x0027;s age group (years): 0&#x2013;9, 10&#x2013;17, 18&#x2013;64, 65&#x002B;</td>
</tr>
<tr>
<td valign="top" align="left">Immunocompromised</td>
<td valign="top" align="left">Discharge summary contains ICD-10 code(s) for one or more of: hematologic malignancy; solid malignancy; transplant; rheumatologic/inflammatory disorder; or other intrinsic immune condition or immunodeficiency</td>
</tr>
<tr>
<td valign="top" align="left">RSV immunization eligibility<xref ref-type="table-fn" rid="TF2"><sup>b</sup></xref></td>
<td valign="top" align="left">Patients eligible to receive RSV immunization during the study period: age &#x2265;60 years or age &#x2264;9 months or pregnant during the study period</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="TF1"><label>a</label>
<p>COVID-19 vaccination looks 12 months prior to the start of the study period as SARS-CoV-2 is a non-seasonal virus.</p></fn>
<fn id="TF2"><label>b</label>
<p>RSV immunization eligibility refers to eligibility during the study period.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>Stratification by the presence of structured immunization records, testing results, and immunocompromised status was done to assess whether missingness rates &#x2014; and potential biases &#x2014; varied across these variables. <xref ref-type="sec" rid="s3d">Section 3.4</xref> discusses a few identified differences in missingness rates. Methods for estimation and adjustment for potential biases are topics for future work.</p>
</sec>
<sec id="s2c"><label>2.3</label><title>Algorithm development</title>
<p>We developed the initial algorithm iteratively using a combination of synthetic data generated by ChatGPT and publicly available, de-identified clinical data from the MIMIC-IV database (<xref ref-type="bibr" rid="B9">9</xref>). The synthetic data were generated using a prompt requesting artificial medical records including patient medical histories, assessment notes, medications, and social history. Further, we requested that some records contain relevant vaccine histories and that other records should have vaccine histories for other viruses, such as HPV. The prompts and example results are provided in the <xref ref-type="sec" rid="s11">Supplementary Table S3</xref>. This data set was manually annotated. The initial algorithm was developed for influenza only due to the absence of COVID-19 and RSV data from the MIMIC-IV database. We extended this algorithm to identify COVID-19 and RSV immunizations and then applied it to the full sample.</p>
<p>Our rule-based NLP algorithm used predefined rules and domain expertise to extract immunization history from clinical notes. The algorithm comprised four key components, starting with the preprocessing of clinical notes, which included sentence segmentation, tokenization of sentences into individual words, and lemmatization of each word to its root form (e.g., &#x201C;vaccinate&#x201D; to &#x201C;vaccine&#x201D;) [Step 1]. Initial evidence of immunization was first identified through a set of general keywords (e.g., &#x201C;booster&#x201D;, &#x201C;shot&#x201D;, &#x201C;vaccine&#x201D;), which were then connected to a &#x201C;specific term&#x201D; through grammatical dependencies [Step 2]. The complete list of &#x201C;general&#x201D; and &#x201C;specific&#x201D; terms is available in <xref ref-type="sec" rid="s11">Supplementary Table S1</xref>. The keywords were then assessed for negation (e.g., &#x201C;declined&#x201D;, &#x201C;refuse&#x201D;) and hypothetical (e.g., &#x201C;recommend&#x201D;, &#x201C;discuss&#x201D;) keywords [Step 3]. If none were identified, the sentence was then classified as &#x201C;positive&#x201D; [Step 4]. The keyword list for negation and hypotheticals is presented in <xref ref-type="sec" rid="s11">Supplementary Table S2</xref>. <xref ref-type="fig" rid="F1">Figure&#x00A0;1</xref> describes the NLP process, and <xref ref-type="table" rid="T2">Table&#x00A0;2</xref> presents the detailed steps of the algorithm.</p>
<fig id="F1" position="float"><label>Figure&#x00A0;1</label>
<caption><p>Methodological flow of the study rule-based NLP algorithm to identify immunization evidence in clinical notes.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-08-1733630-g001.tif"><alt-text content-type="machine-generated">Flowchart illustrating the process of classifying clinical notes. It includes sections for preprocessing by segmenting sentences and tokenizing them into root forms, then standardizing terms. It proceeds to identify general and specific phrases, checks phrase length, and filters phrases. It searches for negation or hypothetical keywords. If negation is present, the phrase is filtered out. The final step classifies the note as positive or negative based on the presence of phrases.</alt-text>
</graphic>
</fig>
<table-wrap id="T2" position="float"><label>Table&#x00A0;2</label>
<caption><p>Description of the NLP algorithm procedures for the identification of immunization evidence in clinical notes.</p></caption>
<table>
<colgroup>
<col align="left"/>
<col align="left"/>
<col align="left"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">Number</th>
<th valign="top" align="center">Step</th>
<th valign="top" align="center">Description</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">1</td>
<td valign="top" align="left">Preprocess Clinical Note</td>
<td valign="top" align="left">Each clinical note was segmented into sentences using the <italic>MedSpaCy library</italic>. After sentence segmentation, text preprocessing was applied to each sentence, which included lowercasing and removing special characters, redundant spaces, and new lines. To prepare for keyword searches in the following steps, the algorithm also tokenized each sentence into individual words and lemmatized each word to its root form (e.g., &#x201C;vaccinated&#x201D; to &#x201C;vaccine&#x201D;).</td>
</tr>
<tr>
<td valign="top" align="left">2</td>
<td valign="top" align="left">Perform Named Entity Recognition of Relevant Vaccine Terms</td>
<td valign="top" align="left">The algorithm applied the <italic>DependencyMatcher</italic> component in <italic>spaCy</italic> to identify instances where a &#x201C;general&#x201D; term (e.g., vaccine, shot, booster) that serves as the syntactic head or dependent in a dependency chain that connects it to a &#x201C;specific&#x201D; term (e.g., COVID-19, influenza, RSV). To account for lengthy phrases that are difficult to parse correctly, &#x201C;specific&#x201D; terms more than nine words apart from the &#x201C;general&#x201D; and term were ignored.</td>
</tr>
<tr>
<td valign="top" align="left">3</td>
<td valign="top" align="left">Identify Negation and Hypothetical Keywords</td>
<td valign="top" align="left">Once the algorithm identified an instance in step 2, it searched for negation (e.g., decline, refuse, no) or hypothetical (e.g., recommend, discuss, plan) keywords that are associated with the instance in a dependency chain.</td>
</tr>
<tr>
<td valign="top" align="left">4</td>
<td valign="top" align="left">Classify Note</td>
<td valign="top" align="left">If no negation or hypothetical keyword was identified for at least one instance, the sentence was labeled as a positive sentence. If at least one sentence within the note was marked as positive, the algorithm classified the note as a positive case.</td>
</tr>
</tbody>
</table>
</table-wrap>
<p><xref ref-type="fig" rid="F2">Figure&#x00A0;2</xref> illustrates an example of the process with the dependency parser. In the example below, the algorithm identifies the general term &#x201C;vaccine&#x201D; and then uses the dependency graph to locate the specific term &#x201C;COVID&#x201D; and the negation term &#x201C;declined&#x201D;. Thus, this sentence would indicate a negative vaccination. The NLP algorithm was developed using Python version 3.10.12 (<xref ref-type="bibr" rid="B33">33</xref>). Text data were processed using Python&#x0027;s <italic>spaCy</italic> library version 3.5.4, an NLP library (<xref ref-type="bibr" rid="B34">34</xref>). The <italic>DependencyParser</italic> component of <italic>spaCy</italic> was used to identify syntactic dependencies, and <italic>MedSpaCy</italic> version 1.1.5 was used for sentence segmentation (<xref ref-type="bibr" rid="B34">34</xref>).</p>
<fig id="F2" position="float"><label>Figure&#x00A0;2</label>
<caption><p>Example of the NLP algorithm dependency parser to capture syntactic dependencies.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-08-1733630-g002.tif"><alt-text content-type="machine-generated">Diagram analyzing the sentence \"Patient declined the COVID vaccine.\" The sentence is broken down into parts of speech and grammatical connections. \"Patient\" is the nominal subject, classified as a proper noun. \"Declined\" serves as the verb, highlighted as a negation term. \"The\" is the definite article. \"COVID\" is an adjective, and \"vaccine\" is a noun, acting as the direct object and modifier. The diagram uses colored boxes and arrows to show connections, with a legend explaining general terms, specific terms, negation terms, parts of speech, and grammatical connections.</alt-text>
</graphic>
</fig>
<p>We developed the algorithm, as described in <xref ref-type="fig" rid="F1">Figure&#x00A0;1</xref>, for COVID-19, influenza, and RSV immunizations and evaluated the results in two ways:
<list list-type="simple">
<list-item>
<p>To understand the degree to which the NLP algorithm correctly identified immunization references in the clinical notes, thus assessing the algorithm&#x0027;s overall ability in effectively identifying immunization, we compared the NLP results to immunization history data from a manual review of 400 clinical notes, limited to one note per patient, and calculated performance measures. In this evaluation, the manual review was treated as the gold standard.</p></list-item>
<list-item>
<p>To understand the degree to which the NLP algorithm identified immunization administrations absent from the structured data and thus assess the algorithm&#x0027;s overall utility in enhancing immunization detection, we compared the NLP results to immunization history data from the structured sections of the EHR. In this evaluation, the structured data was treated as the gold standard.</p></list-item>
</list>We calculated precision and recall for each evaluation. Precision is the probability that an identified immunization is indeed an immunization. High precision suggests the algorithm has a high probability of correctly identifying immunization references when they are present in the notes. Recall is defined as the probability that, if there is an immunization reference, the algorithm identified it. High recall suggests the algorithm has a high probability of capturing immunization references that are present in the notes.</p>
</sec>
<sec id="s2d"><label>2.4</label><title>Evaluation based on manual note review</title>
<p>We manually reviewed a stratified random sample of 400 notes (one note for each of 400 patients) selected from the 18,488 patients described in <xref ref-type="sec" rid="s2b">Section 2.2</xref>, constructed two-by-two confusion matrices and calculated precision and recall and associated Wilson confidence intervals to identify two types of errors: (1) labeling a patient as having information of interest when the patient&#x0027;s note does not contain that information (false positive), and (2) not identifying existing information of interest that exists in the note (false negative). 109 of the 400 patients were not eligible to receive the RSV vaccine, so were not included in confusion matrices or statistics for RSV. To ensure the reliability of the manual review, two reviewers independently reviewed all progress notes from the 400 patients. To assess reviewer agreement, we calculated Cohen&#x0027;s kappa statistics, which were 0.77, 0.93, and 0.97 for COVID-19, influenza, and RSV, respectively, indicating a relatively strong agreement between reviewers. A third reviewer resolved disagreements between the two reviewers.</p>
<p>We developed a software tool to aid in the manual review of the selected notes. The tool&#x0027;s interface presented the reviewer with the full text of the note, including visual aids and/or annotations based on the NLP algorithm&#x0027;s output. First, the interface would provide the complete clinical note; any immunization administration references identified by the NLP algorithm would be highlighted. For example, if a sentence referenced a COVID-19 vaccination, the phrase would be highlighted. If the NLP algorithm did not identify an immunization reference in the note, the tool highlighted instances of the words &#x201C;COVID&#x201D;, &#x201C;RSV&#x201D;, &#x201C;influenza&#x201D;, &#x201C;vaccination&#x201D;, &#x201C;vaccine&#x201D;, and &#x201C;shot&#x201D; within the note text to help the reviewer identify potential immunization references. To review the note and output, the tool&#x0027;s prompt requested that the reviewer answer questions about each note and leave free-text comments. For example, one prompt asked, &#x201C;Does the text indicate the patient received COVID vaccination (Yes/No)?&#x201D; When the reviewer finished with a note, they could continue to the next unreviewed note or return to a previously viewed note. The sample screenshots of the tool&#x0027;s user interface are provided in <xref ref-type="sec" rid="s11">Supplementary Figures S1a,b</xref>.</p>
</sec>
<sec id="s2e"><label>2.5</label><title>Comparison with structured EHR data</title>
<p>We constructed two-by-two confusion matrices to assess concordance of the algorithm by respiratory virus and gain insight into the overlap in immunization references between the structured clinical data and information contained in the clinical notes. Concordance was assessed using precision and recall with the structured data as the gold standard. We also calculated the proportion of additional immunizations identified by the NLP algorithm to provide a direct measure of utility. Reported <italic>p</italic>-values were calculated using Pearson chi-square tests.</p>
</sec>
</sec>
<sec id="s3" sec-type="results"><label>3</label><title>Results</title>
<sec id="s3a"><label>3.1</label><title>Characteristics of the sample</title>
<p>Within the sample (<italic>n</italic>&#x2009;&#x003D;&#x2009;20,000 patients), there was nearly equal representation of patients with and without evidence of vaccination in the structured EHR data for COVID-19 and influenza (<xref ref-type="table" rid="T3">Table&#x00A0;3</xref>). More than half of the sample patients did not have evidence of RSV immunization status (55&#x0025;) due to the limited number of eligible patients in the sampling frame. Individuals aged 18&#x2013;64 years comprised approximately half of the sample (42&#x0025;), while individuals aged 10&#x2013;17 accounted for the smallest age category in the sample (8&#x0025;). A majority of the sample was composed of immunocompetent individuals (79&#x0025;). Of those sampled with testing information, most individuals had a reported negative SARS-CoV-2 molecular test record (75&#x0025;). Similarly, those with influenza testing information mostly tested negative (85&#x0025;), and those with RSV testing information also mostly tested negative (95&#x0025;).</p>
<table-wrap id="T3" position="float"><label>Table&#x00A0;3</label>
<caption><p>Characteristics of the sample population (<italic>n</italic>&#x2009;&#x003D;&#x2009;20,000) by study stratification variables, the university of Colorado healthcare system, August 1, 2023, to December 21, 2023.</p></caption>
<table>
<colgroup>
<col align="left"/>
<col align="left"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">Variable</th>
<th valign="top" align="center">Level</th>
<th valign="top" align="center">Count (row &#x0025;)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left" rowspan="2">COVID-19 vaccination</td>
<td valign="top" align="left">Present</td>
<td valign="top" align="center">9,997 (50&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">Absent</td>
<td valign="top" align="center">10,003 (50&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left" rowspan="2">Influenza vaccination</td>
<td valign="top" align="left">Present</td>
<td valign="top" align="center">11,127 (56&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">Absent</td>
<td valign="top" align="center">8,873 (44&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left" rowspan="3">RSV immunization</td>
<td valign="top" align="left">Present</td>
<td valign="top" align="center">1,767 (9&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">Absent</td>
<td valign="top" align="center">7,302 (37&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">Ineligible</td>
<td valign="top" align="center">10,931 (55&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left" rowspan="3">SARS-CoV-2 test result</td>
<td valign="top" align="left">Positive</td>
<td valign="top" align="center">4,974 (25&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">Negative</td>
<td valign="top" align="center">15,026 (75&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">Absent</td>
<td valign="top" align="center">0 (0&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left" rowspan="3">Influenza test result</td>
<td valign="top" align="left">Positive</td>
<td valign="top" align="center">2,360 (12&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">Negative</td>
<td valign="top" align="center">13,464 (67&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">Absent</td>
<td valign="top" align="center">4,176 (21&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left" rowspan="4">RSV test result</td>
<td valign="top" align="left">Positive</td>
<td valign="top" align="center">315 (2&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">Negative</td>
<td valign="top" align="center">7,088 (35&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">Absent</td>
<td valign="top" align="center">1,666 (8&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">Immunization ineligible</td>
<td valign="top" align="center">10,931 (55&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left" rowspan="4">Age group</td>
<td valign="top" align="left">0&#x2013;9 years old</td>
<td valign="top" align="center">3,298 (16&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">10&#x2013;17 years old</td>
<td valign="top" align="center">1,529 (8&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">18&#x2013;64 years old</td>
<td valign="top" align="center">8,491 (42&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">65&#x002B; years old</td>
<td valign="top" align="center">6,682 (33&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left" rowspan="2">Immunocompromised</td>
<td valign="top" align="left">Immunocompetent</td>
<td valign="top" align="center">15,758 (79&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">Likely immunocompromised</td>
<td valign="top" align="center">4,242 (21&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left" rowspan="4">RSV immunization eligibility</td>
<td valign="top" align="left">Infant (&#x003C;9 months old)</td>
<td valign="top" align="center">505 (3&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">Pregnant</td>
<td valign="top" align="center">1,882 (9&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">Older (60&#x002B; years old)</td>
<td valign="top" align="center">6,682 (33&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">Immunization ineligible</td>
<td valign="top" align="center">10,931 (55&#x0025;)</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Additionally, absent testing result information was highest in the influenza stratum (21&#x0025;), followed by RSV (8&#x0025;). There were no absent testing records for SARS-CoV-2, as the presence of this testing information was prioritized during sampling. The sampling stratification variables and reported frequencies are presented in <xref ref-type="table" rid="T3">Table&#x00A0;3</xref>, while variable definitions are provided in <xref ref-type="table" rid="T1">Table&#x00A0;1</xref>.</p>
<p>We attempted to minimize the lack of testing information and RSV immunization ineligibility while targeting an equal distribution of values across other variables; however, due to the limited size of the data frame and the need to balance across several variables, some inequality in representation across strata remained. However, most groups contain over 1,000 patients, and all contain over 300 except for the group with absent SARS-CoV-2 test information (<italic>n</italic>&#x2009;&#x003D;&#x2009;0).</p>
</sec>
<sec id="s3b"><label>3.2</label><title>Evaluation based on manual note review</title>
<p><xref ref-type="fig" rid="F3">Figures&#x00A0;3A&#x2013;C</xref> presents confusion matrices for the evaluation sample. We compared the performance of the algorithm with an independent test dataset using manual clinical review. For COVID-19 immunization, precision was 71&#x0025; [95&#x0025; CI: 63&#x0025;, 77&#x0025;] and recall was 97&#x0025; [92&#x0025;, &#x003E;99&#x0025;]; for influenza, precision was 91&#x0025; [85&#x0025;, 94&#x0025;] and recall was 92&#x0025; [87&#x0025;, 96&#x0025;]; and for RSV, precision was 57&#x0025; [46&#x0025;, 67&#x0025;] and recall was 96&#x0025; [84&#x0025;, &#x003E;99&#x0025;]. Comparatively low precision for RSV may be explained by the prevalence of unrelated respiratory conditions leading to situations where &#x201C;respiratory&#x201D; is mentioned in the same context as immunizations but is not in reference to an RSV immunization.</p>
<fig id="F3" position="float"><label>Figure&#x00A0;3</label>
<caption><p>Performance of NLP Algorithm Compared to Manual Clinical Review for the Identification of COVID-19 (A), Influenza (B), and RSV (C) Immunization Evidence&#x002A;.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-08-1733630-g003.tif"><alt-text content-type="machine-generated">Three confusion matrices comparing manual review and NLP labeling for COVID-19, Influenza, and RSV. (A) COVID-19: True Positive 141 (97.2%), False Positive 4 (2.8%), False Negative 59 (23.1%), True Negative 196 (76.9%). (B) Influenza: True Positive 181 (92.3%), False Positive 15 (7.7%), False Negative 19 (9.3%), True Negative 185 (90.7%). (C) RSV: True Positive 43 (95.6%), False Positive 2 (4.4%), False Negative 32 (13.0%), True Negative 214 (87.0%). Grayscale bar indicates percentage.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s3c"><label>3.3</label><title>Comparison with structured EHR data</title>
<p>To understand the degree to which the NLP algorithm identified immunization administrations absent from the structured data and thus assess the algorithm&#x0027;s overall utility in enhancing immunization detection, we treated the structured data as the gold standard. <xref ref-type="fig" rid="F4">Figures&#x00A0;4A&#x2013;C</xref> summarizes these results.</p>
<fig id="F4" position="float"><label>Figure&#x00A0;4</label>
<caption><p>Performance of NLP Algorithm Compared to Structured Electronic Health Record Data for the Identification of COVID-19 (A), Influenza (B), and RSV (C) Immunization Evidence&#x002A;.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-08-1733630-g004.tif"><alt-text content-type="machine-generated">Three confusion matrices comparing the NLP algorithm to structured EHR data. (A) COVID-19: 8,807 records present (91.4%), 829 referenced (8.6%). 8,537 absent, 315 referenced (3.6%). (B) Flu: 9,322 present (88.2%), 1,244 referenced (11.8%). 7,419 absent, 503 referenced (6.3%). (C) RSV: 1,548 present (90.1%), 171 referenced (9.9%). 6,921 absent, 47 referenced (0.7%). Each chart uses a grayscale gradient legend indicating percentage.</alt-text>
</graphic>
</fig>
<p>For COVID-19 immunization, precision was 72&#x0025; [95&#x0025; CI: 69&#x0025;, 75&#x0025;] and recall was 9&#x0025; [8&#x0025;, 10&#x0025;]; for influenza, precision was 71&#x0025; [69&#x0025;, 74&#x0025;] and recall was 12&#x0025; [11&#x0025;, 13&#x0025;]; and for RSV, precision was 78&#x0025; [72&#x0025;, 84&#x0025;] and recall was 10&#x0025; [8&#x0025;, 12&#x0025;]. The NLP algorithm identified patients whose immunization was recorded in the free-text clinical notes but absent from the structured EHR data. The NLP algorithm identified vaccine administrations in 6.3&#x0025; of cases for influenza, 3.6&#x0025; of cases for COVID-19, and 0.7&#x0025; of cases for RSV, among records with no vaccine administration of that type present in structured EHR. Supplementing structured immunization records with these records increased estimated immunization coverage from 52.1&#x0025; to 53.8&#x0025; for COVID-19, from 57.1&#x0025; to 59.8&#x0025; for influenza, and from 19.8&#x0025; to 20.3&#x0025; for RSV.</p>
</sec>
<sec id="s3d"><label>3.4</label><title>Immunization identification by strata</title>
<p><xref ref-type="table" rid="T4">Table&#x00A0;4</xref> shows the rate of immunization detection across different patient demographics. For patients without a corresponding immunization in the structured EHR data, COVID-19 and influenza vaccinations were more frequently identified among patients aged 65 years or older compared with other age groups (<italic>p</italic>&#x2009;&#x003C;&#x2009;0.001) with detection rates of 5.4&#x0025; vs. 3.0&#x0025; for COVID-19 and 1.4&#x0025; vs. 4.3&#x0025; for influenza. For influenza, vaccinations were more frequently identified among immunocompromised patients than immunocompetent patients (<italic>p</italic>&#x2009;&#x003C;&#x2009;0.001), with detection rates of 5.3&#x0025; for immunocompromised patients and 3.2&#x0025; for immunocompetent patients. The NLP algorithm identified RSV immunization references with similar frequencies within each stratum.</p>
<table-wrap id="T4" position="float"><label>Table&#x00A0;4</label>
<caption><p>Patient-Level detection of immunization evidence by respiratory virus and patient demographics.</p></caption>
<table>
<colgroup>
<col align="left"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left" rowspan="2">Variable</th>
<th valign="top" align="center" colspan="2">Evidence of immunization in structured EHR data</th>
<th valign="top" align="center" colspan="2">No evidence of immunization in structured EHR data</th>
</tr>
<tr>
<th valign="top" align="center">Total<break/><italic>N</italic></th>
<th valign="top" align="center">Immunization reference found<break/><italic>N</italic> (row &#x0025;)</th>
<th valign="top" align="center">Total<break/><italic>N</italic></th>
<th valign="top" align="center">Immunization reference found<break/><italic>N</italic> (row &#x0025;)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left" style="background-color:#d9d9d9" colspan="5">SARS-CoV-2</td>
</tr>
<tr>
<td valign="top" align="left">Overall</td>
<td valign="top" align="center">9,636</td>
<td valign="top" align="center">829 (8.6&#x0025;)</td>
<td valign="top" align="center">8,852</td>
<td valign="top" align="center">315 (3.6&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left" style="background-color:#d9d9d9" colspan="5">Immunocompromised status</td>
</tr>
<tr>
<td valign="top" align="left">Likely immunocompromised</td>
<td valign="top" align="center">2,542</td>
<td valign="top" align="center">255 (10.0&#x0025;)</td>
<td valign="top" align="center">1,526</td>
<td valign="top" align="center">81 (5.3&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">Immunocompetent</td>
<td valign="top" align="center">7,094</td>
<td valign="top" align="center">574 (8.1&#x0025;)</td>
<td valign="top" align="center">7,326</td>
<td valign="top" align="center">234 (3.2&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left" style="background-color:#d9d9d9" colspan="5">Age</td>
</tr>
<tr>
<td valign="top" align="left">0&#x2013;9 years</td>
<td valign="top" align="center">778</td>
<td valign="top" align="center">57 (7.3&#x0025;)</td>
<td valign="top" align="center">2,046</td>
<td valign="top" align="center">32 (1.6&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">10&#x2013;17 years</td>
<td valign="top" align="center">491</td>
<td valign="top" align="center">32 (6.5&#x0025;)</td>
<td valign="top" align="center">827</td>
<td valign="top" align="center">18 (2.2&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">18&#x2013;64 years</td>
<td valign="top" align="center">4,038</td>
<td valign="top" align="center">310 (7.7&#x0025;)</td>
<td valign="top" align="center">3,801</td>
<td valign="top" align="center">147 (3.9&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">&#x2265;65 years</td>
<td valign="top" align="center">4,329</td>
<td valign="top" align="center">430 (9.9&#x0025;)</td>
<td valign="top" align="center">2,178</td>
<td valign="top" align="center">118 (5.4&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left" style="background-color:#d9d9d9" colspan="5">Testing</td>
</tr>
<tr>
<td valign="top" align="left">Only negative tests in period</td>
<td valign="top" align="center">8,060</td>
<td valign="top" align="center">684 (8.5&#x0025;)</td>
<td valign="top" align="center">5,935</td>
<td valign="top" align="center">186 (3.1&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">Positive test in period</td>
<td valign="top" align="center">1,576</td>
<td valign="top" align="center">145 (9.2&#x0025;)</td>
<td valign="top" align="center">2,917</td>
<td valign="top" align="center">129 (4.4&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">No test in period</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">0 (N/A)</td>
<td valign="top" align="center">0</td>
<td valign="top" align="center">0 (N/A)</td>
</tr>
<tr>
<td valign="top" align="left" style="background-color:#d9d9d9" colspan="5">Influenza</td>
</tr>
<tr>
<td valign="top" align="left">Overall</td>
<td valign="top" align="center">10,566</td>
<td valign="top" align="center">1,244 (11.8&#x0025;)</td>
<td valign="top" align="center">7,922</td>
<td valign="top" align="center">503 (6.3&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left" style="background-color:#d9d9d9" colspan="5">Immunocompromised status</td>
</tr>
<tr>
<td valign="top" align="left">Likely immunocompromised</td>
<td valign="top" align="center">2,533</td>
<td valign="top" align="center">361 (14.3&#x0025;)</td>
<td valign="top" align="center">1,535</td>
<td valign="top" align="center">166 (10.8&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">Immunocompetent</td>
<td valign="top" align="center">8,033</td>
<td valign="top" align="center">883 (11.0&#x0025;)</td>
<td valign="top" align="center">6,387</td>
<td valign="top" align="center">337 (5.3&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left" style="background-color:#d9d9d9" colspan="5">Age</td>
</tr>
<tr>
<td valign="top" align="left">0&#x2013;9 years</td>
<td valign="top" align="center">1,488</td>
<td valign="top" align="center">14 (0.9&#x0025;)</td>
<td valign="top" align="center">1,336</td>
<td valign="top" align="center">45 (3.4&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">10&#x2013;17 years</td>
<td valign="top" align="center">605</td>
<td valign="top" align="center">66 (10.9&#x0025;)</td>
<td valign="top" align="center">713</td>
<td valign="top" align="center">9 (1.3&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">18&#x2013;64 years</td>
<td valign="top" align="center">4,241</td>
<td valign="top" align="center">459 (10.8&#x0025;)</td>
<td valign="top" align="center">3,598</td>
<td valign="top" align="center">190 (5.3&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">&#x2265;65 years</td>
<td valign="top" align="center">4,232</td>
<td valign="top" align="center">572 (13.5&#x0025;)</td>
<td valign="top" align="center">2,275</td>
<td valign="top" align="center">259 (11.4&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left" style="background-color:#d9d9d9" colspan="5">Testing</td>
</tr>
<tr>
<td valign="top" align="left">Only negative tests in period</td>
<td valign="top" align="center">7,914</td>
<td valign="top" align="center">949 (12.0&#x0025;)</td>
<td valign="top" align="center">4,735</td>
<td valign="top" align="center">343 (7.2&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">Positive test in period</td>
<td valign="top" align="center">931</td>
<td valign="top" align="center">124 (13.3&#x0025;)</td>
<td valign="top" align="center">1,162</td>
<td valign="top" align="center">66 (5.7&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">No test in period</td>
<td valign="top" align="center">1,721</td>
<td valign="top" align="center">171 (9.9&#x0025;)</td>
<td valign="top" align="center">2,025</td>
<td valign="top" align="center">94 (4.6&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left" style="background-color:#d9d9d9" colspan="5">RSV</td>
</tr>
<tr>
<td valign="top" align="left">Overall</td>
<td valign="top" align="center">1,719</td>
<td valign="top" align="center">171 (9.9&#x0025;)</td>
<td valign="top" align="center">6,968</td>
<td valign="top" align="center">47 (0.7&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left" style="background-color:#d9d9d9" colspan="5">Immunocompromised status</td>
</tr>
<tr>
<td valign="top" align="left">Likely immunocompromised</td>
<td valign="top" align="center">1,138</td>
<td valign="top" align="center">123 (10.8&#x0025;)</td>
<td valign="top" align="center">4,859</td>
<td valign="top" align="center">30 (0.6&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">Immunocompetent</td>
<td valign="top" align="center">581</td>
<td valign="top" align="center">48 (8.3&#x0025;)</td>
<td valign="top" align="center">2,109</td>
<td valign="top" align="center">17 (0.8&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left" style="background-color:#d9d9d9" colspan="5">Age</td>
</tr>
<tr>
<td valign="top" align="left">0&#x2013;9 years</td>
<td valign="top" align="center">42</td>
<td valign="top" align="center">9 (21.4&#x0025;)</td>
<td valign="top" align="center">397</td>
<td valign="top" align="center">1 (0.3&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">10&#x2013;17 years</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">0 (0&#x0025;)</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">0 (0&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">18&#x2013;64 years</td>
<td valign="top" align="center">173</td>
<td valign="top" align="center">18 (10.4&#x0025;)</td>
<td valign="top" align="center">1,564</td>
<td valign="top" align="center">9 (0.6&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">&#x2265;65 years</td>
<td valign="top" align="center">1,503</td>
<td valign="top" align="center">144 (9.6&#x0025;)</td>
<td valign="top" align="center">5,004</td>
<td valign="top" align="center">37 (0.7&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left" style="background-color:#d9d9d9" colspan="5">Testing</td>
</tr>
<tr>
<td valign="top" align="left">Only negative tests in period</td>
<td valign="top" align="center">1,485</td>
<td valign="top" align="center">150 (10.1&#x0025;)</td>
<td valign="top" align="center">5,410</td>
<td valign="top" align="center">41 (0.8&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">Positive test in period</td>
<td valign="top" align="center">17</td>
<td valign="top" align="center">3 (17.6&#x0025;)</td>
<td valign="top" align="center">280</td>
<td valign="top" align="center">0 (0.0&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">No test in period</td>
<td valign="top" align="center">217</td>
<td valign="top" align="center">18 (8.3&#x0025;)</td>
<td valign="top" align="center">1,278</td>
<td valign="top" align="center">6 (0.5&#x0025;)</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The results in <xref ref-type="table" rid="T4">Table&#x00A0;4</xref> show that the algorithm&#x0027;s effectiveness varied across patient subgroups. The proportion of uncaptured immunizations is higher for certain groups, such as immunocompromised and older patients. <xref ref-type="table" rid="T5">Table&#x00A0;5</xref> shows that mentions of vaccine administrations in unstructured clinical notes are more common in outpatient settings than inpatient or urgent care settings.</p>
<table-wrap id="T5" position="float"><label>Table&#x00A0;5</label>
<caption><p>Visit-Level detection of immunization evidence by respiratory virus and patient demographics.</p></caption>
<table>
<colgroup>
<col align="left"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left" rowspan="3">Setting<xref ref-type="table-fn" rid="TF3"><sup>a</sup></xref></th>
<th valign="top" align="center" colspan="2">Evidence of immunization in structured EHR data</th>
<th valign="top" align="center" colspan="2">No evidence of immunization in structured <break/>EHR data</th>
</tr>
<tr>
<th valign="top" align="center">Total</th>
<th valign="top" align="center">Immunization reference found</th>
<th valign="top" align="center">Total</th>
<th valign="top" align="center">Immunization reference found</th>
</tr>
<tr>
<th valign="top" align="center"><italic>N</italic></th>
<th valign="top" align="center"><italic>N</italic> (row &#x0025;)</th>
<th valign="top" align="center"><italic>N</italic></th>
<th valign="top" align="center"><italic>N</italic> (row &#x0025;)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left" style="background-color:#d9d9d9" colspan="5">SARS-CoV-2</td>
</tr>
<tr>
<td valign="top" align="left">Inpatient</td>
<td valign="top" align="center">89,880</td>
<td valign="top" align="center">778 (0.9&#x0025;)</td>
<td valign="top" align="center">48,182</td>
<td valign="top" align="center">635 (1.3&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">Outpatient</td>
<td valign="top" align="center">16,370</td>
<td valign="top" align="center">1,079 (6.6&#x0025;)</td>
<td valign="top" align="center">12,532</td>
<td valign="top" align="center">445 (3.6&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">Urgent care</td>
<td valign="top" align="center">10,019</td>
<td valign="top" align="center">468 (4.7&#x0025;)</td>
<td valign="top" align="center">13,608</td>
<td valign="top" align="center">405 (3.0&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left" style="background-color:#d9d9d9" colspan="5">Influenza</td>
</tr>
<tr>
<td valign="top" align="left">Inpatient</td>
<td valign="top" align="center">66,020</td>
<td valign="top" align="center">1,575 (2.4&#x0025;)</td>
<td valign="top" align="center">72,051</td>
<td valign="top" align="center">2,710 (3.8&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">Outpatient</td>
<td valign="top" align="center">19,196</td>
<td valign="top" align="center">1,879 (9.8&#x0025;)</td>
<td valign="top" align="center">10,140</td>
<td valign="top" align="center">522 (5.1&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">Urgent care</td>
<td valign="top" align="center">11,330</td>
<td valign="top" align="center">426 (3.8&#x0025;)</td>
<td valign="top" align="center">12,213</td>
<td valign="top" align="center">184 (1.5&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left" style="background-color:#d9d9d9" colspan="5">RSV</td>
</tr>
<tr>
<td valign="top" align="left">Inpatient</td>
<td valign="top" align="center">13,562</td>
<td valign="top" align="center">284 (2.1&#x0025;)</td>
<td valign="top" align="center">98,043</td>
<td valign="top" align="center">1,058 (1.1&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">Outpatient</td>
<td valign="top" align="center">4,072</td>
<td valign="top" align="center">170 (4.2&#x0025;)</td>
<td valign="top" align="center">13,012</td>
<td valign="top" align="center">223 (1.7&#x0025;)</td>
</tr>
<tr>
<td valign="top" align="left">Urgent care</td>
<td valign="top" align="center">1,308</td>
<td valign="top" align="center">78 (6.0&#x0025;)</td>
<td valign="top" align="center">5,461</td>
<td valign="top" align="center">37 (0.7&#x0025;)</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="TF3"><label>a</label>
<p>In contrast to other variables which were computed at the patient-level, setting was tabulated at the visit level.</p></fn>
</table-wrap-foot>
</table-wrap>
</sec>
</sec>
<sec id="s4" sec-type="discussion"><label>4</label><title>Discussion</title>
<p>We developed rule-based NLP methods to enhance the identification of COVID-19, influenza, and RSV immunizations from clinical notes, with the goal of supplementing structured EHR data and thereby improving immunization estimates. Based on manual review results, the algorithm identified 97.2&#x0025; of COVID-19 vaccinations, 92.3&#x0025; of influenza vaccinations, and 95.6&#x0025; of RSV immunizations among immunizations recorded in progress notes, with few false positives and slightly more false negatives. Precision varied (70&#x0025; for COVID-19, 91&#x0025; for influenza, and 53&#x0025; for RSV). For COVID-19 and influenza specifically, high precision indicated that when we identified vaccinations in the record, there is a higher probability of being correct. Similarly, high recall suggested that we can identify a high proportion of immunizations in the notes (97&#x0025; for COVID-19, 92&#x0025; for influenza, and 96&#x0025; for RSV). Differences in detection rates across subgroups suggest that our algorithm can identify disparities in how immunization history is documented within structured EHR fields. Conversely, the comparison between immunizations detected by the algorithm and immunizations that are already present in structured data suggests the unstructured text contributed little additional vaccine information beyond what is already captured in the structured data, with potential increases in vaccination coverage limited to 1.7&#x0025; for COVID-19, 2.7&#x0025;, for influenza, and 0.5&#x0025; for RSV. This reinforces our assumptions around the clinical workflow and our understanding of how immunization data are added to unstructured notes. These results are explained by the low recall when treating the structured data as the gold standard, which suggests that there are many more vaccinations in the structured data than are represented in the notes. Taken together, these results suggest that while the algorithm performs well in identifying instances of immunizations in the text, there were relatively few to be found in this application.</p>
<p>As with prior studies, our algorithm performed well and was able to identify relevant clinical terms (i.e., vaccine administrations) when they were present in the data; however, the utility of the algorithm was dependent on the prevalence of those terms in the clinical notes. In contrast to our approach, Deady et al. (<xref ref-type="bibr" rid="B10">10</xref>) applied a simpler algorithm limited to the detection of COVID-19, which resulted in a higher number of additional vaccinations being identified (16.8&#x0025;) despite overall lower performance. We hypothesize that this was due to more robust recording of vaccination in the structured data in the intervening time since that work was done, but this assumption requires more detailed investigation. Similar levels of performance have been achieved in other therapeutic areas. Bucher et al. (<xref ref-type="bibr" rid="B15">15</xref>) achieved 95&#x0025; sensitivity and 86&#x0025; specificity when using a rules-based approach to identify post-operative surgical site infections. Comparable performance was achieved by Taggart et al. (<xref ref-type="bibr" rid="B11">11</xref>), with sensitivity of 85&#x0025; and positive predictive value of 63&#x0025;, while Murff et al. (<xref ref-type="bibr" rid="B35">35</xref>) in an earlier study achieved slightly lower performance with 77&#x0025; sensitivity and 63&#x0025; specificity. Paterson et al. (<xref ref-type="bibr" rid="B16">16</xref>) achieved a high level of performance in identifying falls without chart review with precision of 92&#x0025; and recall of 95&#x0025;. Rule-based methods have also been shown to be effective in identifying broader mental health and social conditions. Gray et al. (<xref ref-type="bibr" rid="B20">20</xref>) identified patients with residential stability issues with 92&#x0025; precision and 84&#x0025; recall, while Chandran et al. (<xref ref-type="bibr" rid="B19">19</xref>) were successful in disentangling obsessive compulsive disorder from related comorbidities, with 77&#x0025; precision and 67&#x0025; recall. These results are consistent with our finding that rule-based NLP methods are effective in identifying clinical terms when they are present in the EHR record.</p>
<p>The performance of our method can be attributed to several key innovations. Our method used grammatical dependencies to understand the relationships between and functions of words in complex sentences, where traditional keyword-based approaches might fail. This allowed us to identify immunization administration in sentences of arbitrary length where the indicator that an immunization was given could be relatively distant from the name of the virus. This advancement proved valuable in accurately interpreting negations and distinguishing between recommended vs. administered immunizations. The effectiveness of our method was further validated by comparing immunizations detected by the algorithm against immunizations recorded in structured data in the EHR. Across the full 20,000 patient sample, the NLP algorithm identified 315 COVID-19 vaccinations, 503 influenza vaccinations, and 47 RSV immunizations that were not captured in the structured data. Due to anticipated differences in immunization uptake across demographic groups, we stratified by immunization status, presence of test result, and immunosuppressed status when calculating immunization detection rates across the full sample. This process enabled us to generate a sample for each immunization that accurately reflected the underlying population, helping us assess the algorithm&#x0027;s performance in a real-world setting. For example, among patients with no evidence of immunization in the structured EHR data, the algorithm detected a higher proportion of influenza vaccinations in immunocompromised patients and older adults, suggesting that it was effective in identifying differences in immunization administration under-capture when they were represented in the clinical notes.</p>
<p>While we have demonstrated our method&#x0027;s performance in identifying immunizations in clinical notes that were not included in structured EHR data, we have not formally evaluated how this performance varies across key analytic variables, including demographics. We hope to perform that evaluation and combine estimates with detection rates across the same variables to estimate the level of completeness in immunization records. One limitation is that most immunized individuals do not have immunization references in clinical notes; however, in addition to supplementing structured EHR data, results on the proportion of patients with immunization references in both notes and structured records can be used to adjust estimates of missingness among patients without immunization references in structured notes. Further research is needed to investigate the impact on VE estimates and explore ways to mitigate biases in these estimates. Failure to capture all vaccine administrations means that some vaccinated individuals would be wrongly included in unvaccinated reference groups during analyses, with the expected effect of underestimating vaccine effectiveness. We also performed preliminary work on identifying the date of immunization; however, due to the appearance of multiple dates in a highly unstructured format, the performance was variable. The accuracy of extracted dates ranged from 20.8&#x0025; for influenza to 66.7&#x0025; for RSV. The date of immunization can be a crucial variable in analyses; however, further research is needed to identify administration dates accurately.</p>
<p>In this research, we have developed a generic, keyword-based algorithm that can be applied to multiple clinical data extraction tasks. While our approach has been effective in identifying vaccine administrations when present in clinical text and has addressed some of the limitations of prior methods, more work is needed to extract more complex entities, such as the date of immunization. We have only evaluated it at a single participating institution and applied it solely to identifying immunization characteristics; therefore, its performance remains unclear in more general settings. Future work could apply the algorithm across multiple sites and expand to include other patient characteristics. Other emerging methods that utilize generative AI may capture more complex grammatical structures and semantic similarities necessary to isolate nuanced factors associated with immunization (e.g., date of administration, manufacturer) (<xref ref-type="bibr" rid="B36">36</xref>). Therefore, additional work is also needed to quantify and validate the results across different demographics to assess potential bias. If this can be accomplished, our method can be used to estimate missingness in the structured EHR data, which would enhance broader EHR-based analyses.</p>
<p>Due to our specific focus on vaccine administration, we limited our approach to analysis of clinical notes. For more general applications, a potential enhancement would be the incorporation of a broader range of data, such as lab values, imaging studies, and prescription records, which would have the potential to support more complex, multi-modal modelling approaches. Our methods would have more utility if the quality and consistency of clinical documentation was improved, especially for clinical conditions and events that are not well represented by ICD-10-CM or CPT codes, thus suggesting an opportunity to engage with clinicians to improve documentation protocols. A key advantage of our approach is that implementation within the clinical setting is relatively straightforward, requiring only the identification of terms that are typically well-represented in the notes (e.g., disease symptoms), defining of key words through clinical consultation, and the application of the algorithm to the clinical data warehouse. In conclusion, we have shown that rule-based NLP methods are effective in identifying vaccine administrations from clinical text. Due to the ease of implementation they could have utility in general clinical data extraction tasks, especially those that are not reliant on ICD-10-CM diagnosis and procedure codes, which are easily captured in the structured EHR, such as in the identification of adverse events, transfers or referrals, use of treatments and medications, and more nuanced descriptions of disease symptoms.</p>
</sec>
</body>
<back>
<sec id="s5" sec-type="data-availability"><title>Data availability statement</title>
<p>The datasets presented in this article are not readily available because data sharing agreements between the CDC and VISION Network partner institutions prohibit making the data publicly available. Requests to access the datasets should be directed to Kevin Wilson, <email>kevinwilson@westat.com</email>.</p>
</sec>
<sec id="s6" sec-type="ethics-statement"><title>Ethics statement</title>
<p>The studies involving humans were approved by Westat Institutional Review Board (IRB). The studies were conducted in accordance with the local legislation and institutional requirements. Written informed consent for participation was not required from the participants or the participants&#x0027; legal guardians/next of kin in accordance with the national legislation and institutional requirements.</p>
</sec>
<sec id="s7" sec-type="author-contributions"><title>Author contributions</title>
<p>KW: Methodology, Writing &#x2013; original draft, Investigation, Visualization, Conceptualization, Writing &#x2013; review &#x0026; editing, Supervision. JR: Conceptualization, Data curation, Writing &#x2013; review &#x0026; editing, Methodology, Investigation, Writing &#x2013; original draft, Formal analysis, Software, Visualization. AH: Formal analysis, Writing &#x2013; review &#x0026; editing, Methodology, Investigation, Writing &#x2013; original draft, Software, Validation, Resources, Visualization, Data curation. EB: Writing &#x2013; review &#x0026; editing, Methodology, Writing &#x2013; original draft, Investigation, Project administration, Visualization, Conceptualization. MZ: Software, Methodology, Visualization, Investigation, Data curation, Writing &#x2013; review &#x0026; editing, Formal analysis. MB: Investigation, Validation, Writing &#x2013; review &#x0026; editing. CC: Formal analysis, Methodology, Writing &#x2013; original draft, Validation, Investigation, Writing &#x2013; review &#x0026; editing, Resources. RS: Data curation, Writing &#x2013; review &#x0026; editing, Methodology, Writing &#x2013; original draft, Software, Investigation. AB: Writing &#x2013; review &#x0026; editing, Methodology, Data curation, Investigation, Software. DC: Methodology, Investigation, Software, Writing &#x2013; review &#x0026; editing. EM: Writing &#x2013; review &#x0026; editing, Investigation, Writing &#x2013; original draft, Visualization, Methodology. ER: Writing &#x2013; review &#x0026; editing, Investigation. ZW: Writing &#x2013; review &#x0026; editing, Investigation. LR: Investigation, Writing &#x2013; review &#x0026; editing, Supervision, Writing &#x2013; original draft. SB: Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing, Conceptualization, Funding acquisition, Investigation, Supervision. AP: Supervision, Writing &#x2013; review &#x0026; editing, Investigation, Funding acquisition, Project administration. JD: Supervision, Writing &#x2013; review &#x0026; editing, Investigation, Funding acquisition. RL-G: Writing &#x2013; review &#x0026; editing, Investigation, Supervision, Funding acquisition. TO: Supervision, Methodology, Formal analysis, Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing, Visualization, Project administration, Conceptualization, Resources, Investigation, Validation, Funding acquisition.</p>
</sec>
<ack><title>Acknowledgments</title>
<p>The authors would like to acknowledge Gizem Korkmaz, Westat, for her technical input in the initial phases of the algorithm development and testing. The authors would like to further acknowledge Beyonce Carrington, Westat, for administrative support during the pilot study that led to the development of this manuscript.</p>
</ack>
<sec id="s9" sec-type="COI-statement"><title>Conflict of interest</title>
<p>Authors KW, JR, EB, MZ, RS, AB, DC, EM, ER, ZW, LR and SW were employed by company Westat.</p>
<p>The remaining author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s10" sec-type="ai-statement"><title>Generative AI statement</title>
<p>The author(s) declared that generative AI was used in the creation of this manuscript. Synthetic data was generated using ChatGPT (<ext-link ext-link-type="uri" xlink:href="https://chatgpt.com/">https://chatgpt.com/</ext-link>) and was utilized in combination with publicly available data for the initial algorithm development and testing. The synthetic data were generated using a prompt requesting artificial medical records including patient medical histories, assessment notes, medications, and social history. Further prompts requested that some records contain relevant vaccine histories and that other records should have vaccine histories for other viruses, such as HPV. See <xref ref-type="sec" rid="s11">Supplementary Table S3</xref> for additional information.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec id="s12" sec-type="disclaimer"><title>Publisher&#x0027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s11" sec-type="supplementary-material"><title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fdgth.2026.1733630/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fdgth.2026.1733630/full&#x0023;supplementary-material</ext-link></p>
<supplementary-material xlink:href="Table1.docx" id="SM1" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document"/>
<supplementary-material xlink:href="Table3.xlsx" id="SM2" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"/>
<supplementary-material xlink:href="Table2.docx" id="SM3" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document"/>
<supplementary-material xlink:href="Datasheet1.docx" id="SM4" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document"/>
</sec>
<ref-list><title>References</title>
<ref id="B1"><label>1.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gao</surname> <given-names>Y</given-names></name> <name><surname>Mahajan</surname> <given-names>D</given-names></name> <name><surname>Uzuner</surname> <given-names>&#x00D6;</given-names></name> <name><surname>Yetisgen</surname> <given-names>M</given-names></name></person-group>. <article-title>Clinical natural language processing for secondary uses</article-title>. <source>J Biomed Inform</source>. (<year>2024</year>) <volume>150</volume>:<fpage>104596</fpage>. <pub-id pub-id-type="doi">10.1016/j.jbi.2024.104596</pub-id><pub-id pub-id-type="pmid">38278312</pub-id></mixed-citation></ref>
<ref id="B2"><label>2.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Juhn</surname> <given-names>Y</given-names></name> <name><surname>Liu</surname> <given-names>H</given-names></name></person-group>. <article-title>Artificial intelligence approaches using natural language processing to advance EHR-based clinical research</article-title>. <source>J Allergy Clin Immunol</source>. (<year>2020</year>) <volume>145</volume>(<issue>2</issue>):<fpage>463</fpage>&#x2013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1016/j.jaci.2019.12.897</pub-id><pub-id pub-id-type="pmid">31883846</pub-id></mixed-citation></ref>
<ref id="B3"><label>3.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zeng</surname> <given-names>Z</given-names></name> <name><surname>Deng</surname> <given-names>Y</given-names></name> <name><surname>Li</surname> <given-names>X</given-names></name> <name><surname>Naumann</surname> <given-names>T</given-names></name> <name><surname>Luo</surname> <given-names>Y</given-names></name></person-group>. <article-title>Natural language processing for EHR-based computational phenotyping</article-title>. <source>IEEE/ACM Trans Comput Biol Bioinform</source>. (<year>2019</year>) <volume>16</volume>(<issue>1</issue>):<fpage>139</fpage>&#x2013;<lpage>53</lpage>. <pub-id pub-id-type="doi">10.1109/TCBB.2018.2849968</pub-id><pub-id pub-id-type="pmid">29994486</pub-id></mixed-citation></ref>
<ref id="B4"><label>4.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Redjdal</surname> <given-names>A</given-names></name> <name><surname>Novikava</surname> <given-names>N</given-names></name> <name><surname>Kempf</surname> <given-names>E</given-names></name> <name><surname>Bouaud</surname> <given-names>J</given-names></name> <name><surname>Seroussi</surname> <given-names>B</given-names></name></person-group>. <article-title>Leveraging rule-based NLP to translate textual reports as structured inputs automatically processed by a clinical decision support system</article-title>. <source>Stud Health Technol Inform</source>. (<year>2024</year>) <volume>316</volume>:<fpage>1861</fpage>&#x2013;<lpage>5</lpage>. <pub-id pub-id-type="doi">10.3233/SHTI240794</pub-id><pub-id pub-id-type="pmid">39176854</pub-id></mixed-citation></ref>
<ref id="B5"><label>5.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gunter</surname> <given-names>D</given-names></name> <name><surname>Puac-Polanco</surname> <given-names>P</given-names></name> <name><surname>Miguel</surname> <given-names>O</given-names></name> <name><surname>Thornhill</surname> <given-names>RE</given-names></name> <name><surname>Yu</surname> <given-names>AYX</given-names></name> <name><surname>Liu</surname> <given-names>ZA</given-names></name><etal/></person-group> <article-title>Rule-based natural language processing for automation of stroke data extraction: a validation study</article-title>. <source>Neuroradiology</source>. (<year>2022</year>) <volume>64</volume>(<issue>12</issue>):<fpage>2357</fpage>&#x2013;<lpage>62</lpage>. <pub-id pub-id-type="doi">10.1007/s00234-022-03029-1</pub-id><pub-id pub-id-type="pmid">35913525</pub-id></mixed-citation></ref>
<ref id="B6"><label>6.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zheng</surname> <given-names>C</given-names></name> <name><surname>Yu</surname> <given-names>W</given-names></name> <name><surname>Xie</surname> <given-names>F</given-names></name> <name><surname>Chen</surname> <given-names>W</given-names></name> <name><surname>Mercado</surname> <given-names>C</given-names></name> <name><surname>Sy</surname> <given-names>LS</given-names></name><etal/></person-group> <article-title>The use of natural language processing to identify tdap-related local reactions at five health care systems in the vaccine safety datalink</article-title>. <source>Int J Med Inf</source>. (<year>2019</year>) <volume>127</volume>:<fpage>27</fpage>&#x2013;<lpage>34</lpage>. <pub-id pub-id-type="doi">10.1016/j.ijmedinf.2019.04.009</pub-id></mixed-citation></ref>
<ref id="B7"><label>7.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yu</surname> <given-names>W</given-names></name> <name><surname>Zheng</surname> <given-names>C</given-names></name> <name><surname>Xie</surname> <given-names>F</given-names></name> <name><surname>Chen</surname> <given-names>W</given-names></name> <name><surname>Mercado</surname> <given-names>C</given-names></name> <name><surname>Sy</surname> <given-names>LS</given-names></name><etal/></person-group> <article-title>The use of natural language processing to identify vaccine-related anaphylaxis at five health care systems in the vaccine safety datalink</article-title>. <source>Pharmacoepidemiol Drug Saf</source>. (<year>2020</year>) <volume>29</volume>(<issue>2</issue>):<fpage>182</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1002/pds.4919</pub-id><pub-id pub-id-type="pmid">31797475</pub-id></mixed-citation></ref>
<ref id="B8"><label>8.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kreimeyer</surname> <given-names>K</given-names></name> <name><surname>Foster</surname> <given-names>M</given-names></name> <name><surname>Pandey</surname> <given-names>A</given-names></name> <name><surname>Arya</surname> <given-names>N</given-names></name> <name><surname>Halford</surname> <given-names>G</given-names></name> <name><surname>Jones</surname> <given-names>SF</given-names></name><etal/></person-group> <article-title>Natural language processing systems for capturing and standardizing unstructured clinical information: a systematic review</article-title>. <source>J Biomed Inform</source>. (<year>2017</year>) <volume>73</volume>:<fpage>14</fpage>&#x2013;<lpage>29</lpage>. <pub-id pub-id-type="doi">10.1016/j.jbi.2017.07.012</pub-id><pub-id pub-id-type="pmid">28729030</pub-id></mixed-citation></ref>
<ref id="B9"><label>9.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Johnson</surname> <given-names>AEW</given-names></name> <name><surname>Bulgarelli</surname> <given-names>L</given-names></name> <name><surname>Shen</surname> <given-names>L</given-names></name> <name><surname>Gayles</surname> <given-names>A</given-names></name> <name><surname>Shammout</surname> <given-names>A</given-names></name> <name><surname>Horng</surname> <given-names>S</given-names></name><etal/></person-group> <article-title>MIMIC-IV, a freely accessible electronic health record dataset</article-title>. <source>Sci Data</source>. (<year>2023</year>) <volume>10</volume>(<issue>1</issue>):<fpage>1</fpage>. <pub-id pub-id-type="doi">10.1038/s41597-022-01899-x</pub-id><pub-id pub-id-type="pmid">36596836</pub-id></mixed-citation></ref>
<ref id="B10"><label>10.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Deady</surname> <given-names>M</given-names></name> <name><surname>Ezzeldin</surname> <given-names>H</given-names></name> <name><surname>Cook</surname> <given-names>K</given-names></name> <name><surname>Billings</surname> <given-names>D</given-names></name> <name><surname>Pizarro</surname> <given-names>J</given-names></name> <name><surname>Plotogea</surname> <given-names>AA</given-names></name><etal/></person-group> <article-title>The food and drug administration biologics effectiveness and safety initiative facilitates detection of vaccine administrations from unstructured data in medical records through natural language processing</article-title>. <source>Front Digit Health</source>. (<year>2021</year>) <volume>3</volume>:<fpage>777905</fpage>. <pub-id pub-id-type="doi">10.3389/fdgth.2021.777905</pub-id><pub-id pub-id-type="pmid">35005697</pub-id></mixed-citation></ref>
<ref id="B11"><label>11.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Taggart</surname> <given-names>M</given-names></name> <name><surname>Chapman</surname> <given-names>WW</given-names></name> <name><surname>Steinberg</surname> <given-names>BA</given-names></name> <name><surname>Ruckel</surname> <given-names>S</given-names></name> <name><surname>Pregenzer-Wenzler</surname> <given-names>A</given-names></name> <name><surname>Du</surname> <given-names>Y</given-names></name><etal/></person-group> <article-title>Comparison of 2 natural language processing methods for identification of bleeding among critically ill patients</article-title>. <source>JAMA Netw Open</source>. (<year>2018</year>) <volume>1</volume>(<issue>6</issue>):<fpage>e183451</fpage>. <pub-id pub-id-type="doi">10.1001/jamanetworkopen.2018.3451</pub-id><pub-id pub-id-type="pmid">30646240</pub-id></mixed-citation></ref>
<ref id="B12"><label>12.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Calleja-Panero</surname> <given-names>JL</given-names></name> <name><surname>Esteban Mur</surname> <given-names>R</given-names></name> <name><surname>Jarque</surname> <given-names>I</given-names></name> <name><surname>Romero-G&#x00F3;mez</surname> <given-names>M</given-names></name> <name><surname>Group</surname> <given-names>SR</given-names></name> <name><surname>Garc&#x00ED;a Labrador</surname> <given-names>L</given-names></name><etal/></person-group> <article-title>Chronic liver disease-associated severe thrombocytopenia in Spain: results from a retrospective study using machine learning and natural language processing</article-title>. <source>Gastroenterol Hepatol</source>. (<year>2024</year>) <volume>47</volume>(<issue>3</issue>):<fpage>236</fpage>&#x2013;<lpage>45</lpage>. <pub-id pub-id-type="doi">10.1016/j.gastrohep.2023.05.010</pub-id><pub-id pub-id-type="pmid">37236305</pub-id></mixed-citation></ref>
<ref id="B13"><label>13.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gaspar</surname> <given-names>F</given-names></name> <name><surname>Zayene</surname> <given-names>M</given-names></name> <name><surname>Coumau</surname> <given-names>C</given-names></name> <name><surname>Bertrand</surname> <given-names>E</given-names></name> <name><surname>Bettex</surname> <given-names>M</given-names></name> <name><surname>Le Pogam</surname> <given-names>MA</given-names></name><etal/></person-group> <article-title>Natural language processing and ICD-10 coding for detecting bleeding events in discharge summaries: comparative cross-sectional study</article-title>. <source>JMIR Med Inform</source>. (<year>2025</year>) <volume>13</volume>:<fpage>e67837</fpage>. <pub-id pub-id-type="doi">10.2196/67837</pub-id><pub-id pub-id-type="pmid">40882207</pub-id></mixed-citation></ref>
<ref id="B14"><label>14.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bali</surname> <given-names>V</given-names></name> <name><surname>Weaver</surname> <given-names>J</given-names></name> <name><surname>Turzhitsky</surname> <given-names>V</given-names></name> <name><surname>Schelfhout</surname> <given-names>J</given-names></name> <name><surname>Paudel</surname> <given-names>ML</given-names></name> <name><surname>Hulbert</surname> <given-names>E</given-names></name><etal/></person-group> <article-title>Development of a natural language processing algorithm to detect chronic cough in electronic health records</article-title>. <source>BMC Pulm Med</source>. (<year>2022</year>) <volume>22</volume>(<issue>1</issue>):<fpage>256</fpage>. <pub-id pub-id-type="doi">10.1186/s12890-022-02035-6</pub-id><pub-id pub-id-type="pmid">35764999</pub-id></mixed-citation></ref>
<ref id="B15"><label>15.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bucher</surname> <given-names>BT</given-names></name> <name><surname>Shi</surname> <given-names>J</given-names></name> <name><surname>Ferraro</surname> <given-names>JP</given-names></name> <name><surname>Skarda</surname> <given-names>DE</given-names></name> <name><surname>Samore</surname> <given-names>MH</given-names></name> <name><surname>Hurdle</surname> <given-names>JF</given-names></name><etal/></person-group> <article-title>Portable automated surveillance of surgical site infections using natural language processing: development and validation</article-title>. <source>Ann Surg</source>. (<year>2020</year>) <volume>272</volume>(<issue>4</issue>):<fpage>629</fpage>&#x2013;<lpage>36</lpage>. <pub-id pub-id-type="doi">10.1097/SLA.0000000000004133</pub-id><pub-id pub-id-type="pmid">32773639</pub-id></mixed-citation></ref>
<ref id="B16"><label>16.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Patterson</surname> <given-names>BW</given-names></name> <name><surname>Jacobsohn</surname> <given-names>GC</given-names></name> <name><surname>Shah</surname> <given-names>MN</given-names></name> <name><surname>Song</surname> <given-names>Y</given-names></name> <name><surname>Maru</surname> <given-names>A</given-names></name> <name><surname>Venkatesh</surname> <given-names>AK</given-names></name><etal/></person-group> <article-title>Development and validation of a pragmatic natural language processing approach to identifying falls in older adults in the emergency department</article-title>. <source>BMC Med Inform Decis Mak</source>. (<year>2019</year>) <volume>19</volume>(<issue>1</issue>):<fpage>138</fpage>. <pub-id pub-id-type="doi">10.1186/s12911-019-0843-7</pub-id><pub-id pub-id-type="pmid">31331322</pub-id></mixed-citation></ref>
<ref id="B17"><label>17.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tan</surname> <given-names>WK</given-names></name> <name><surname>Hassanpour</surname> <given-names>S</given-names></name> <name><surname>Heagerty</surname> <given-names>PJ</given-names></name> <name><surname>Rundell</surname> <given-names>SD</given-names></name> <name><surname>Suri</surname> <given-names>P</given-names></name> <name><surname>Huhdanpaa</surname> <given-names>HT</given-names></name><etal/></person-group> <article-title>Comparison of natural language processing rules-based and machine-learning systems to identify lumbar spine imaging findings related to low back pain</article-title>. <source>Acad Radiol</source>. (<year>2018</year>) <volume>25</volume>(<issue>11</issue>):<fpage>1422</fpage>&#x2013;<lpage>32</lpage>. <pub-id pub-id-type="doi">10.1016/j.acra.2018.03.008</pub-id><pub-id pub-id-type="pmid">29605561</pub-id></mixed-citation></ref>
<ref id="B18"><label>18.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Calleja Panero</surname> <given-names>JL</given-names></name> <name><surname>de la Poza</surname> <given-names>G</given-names></name> <name><surname>Hidalgo</surname> <given-names>L</given-names></name> <name><surname>Aguilera Sancho-Tello</surname> <given-names>MV</given-names></name> <name><surname>Torras</surname> <given-names>X</given-names></name> <name><surname>Santos de Lamadrid</surname> <given-names>R</given-names></name><etal/></person-group> <article-title>Patient journey of individuals tested for HCV in Spain: liverTAI, a retrospective analysis of EHRs through natural language processing</article-title>. <source>Gastroenterol Hepatol</source>. (<year>2023</year>) <volume>46</volume>(<issue>7</issue>):<fpage>491</fpage>&#x2013;<lpage>503</lpage>. <pub-id pub-id-type="doi">10.1016/j.gastrohep.2022.10.012</pub-id><pub-id pub-id-type="pmid">36273653</pub-id></mixed-citation></ref>
<ref id="B19"><label>19.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chandran</surname> <given-names>D</given-names></name> <name><surname>Robbins</surname> <given-names>DA</given-names></name> <name><surname>Chang</surname> <given-names>CK</given-names></name> <name><surname>Shetty</surname> <given-names>H</given-names></name> <name><surname>Sanyal</surname> <given-names>J</given-names></name> <name><surname>Downs</surname> <given-names>J</given-names></name><etal/></person-group> <article-title>Use of natural language processing to identify obsessive compulsive symptoms in patients with schizophrenia, schizoaffective disorder or bipolar disorder</article-title>. <source>Sci Rep</source>. (<year>2019</year>) <volume>9</volume>(<issue>1</issue>):<fpage>14146</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-019-49165-2</pub-id><pub-id pub-id-type="pmid">31578348</pub-id></mixed-citation></ref>
<ref id="B20"><label>20.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gray</surname> <given-names>GM</given-names></name> <name><surname>Zirikly</surname> <given-names>A</given-names></name> <name><surname>Ahumada</surname> <given-names>LM</given-names></name> <name><surname>Rouhizadeh</surname> <given-names>M</given-names></name> <name><surname>Richards</surname> <given-names>T</given-names></name> <name><surname>Kitchen</surname> <given-names>C</given-names></name><etal/></person-group> <article-title>Application of natural language processing to identify social needs from patient medical notes: development and assessment of a scalable, performant, and rule-based model in an integrated healthcare delivery system</article-title>. <source>JAMIA Open</source>. (<year>2023</year>) <volume>6</volume>(<issue>4</issue>):<fpage>ooad085</fpage>. <pub-id pub-id-type="doi">10.1093/jamiaopen/ooad085</pub-id><pub-id pub-id-type="pmid">37799347</pub-id></mixed-citation></ref>
<ref id="B21"><label>21.</label><mixed-citation publication-type="other"><collab>Centers for Disease Control and Prevention</collab>. (<year>2025</year>). <comment>Preliminary Estimated Flu Disease Burden 2024-2025 Flu Season. Available online at:</comment> <ext-link ext-link-type="uri" xlink:href="https://www.cdc.gov/flu-burden/php/data-vis/2024-2025.html">https://www.cdc.gov/flu-burden/php/data-vis/2024-2025.html</ext-link> <comment>(Accessed June 23, 2025)</comment></mixed-citation></ref>
<ref id="B22"><label>22.</label><mixed-citation publication-type="other"><collab>Centers for Disease Control and Prevention</collab>. (<year>2024</year>). <comment>Preliminary Estimates of COVID-19 Burden for 2024-2025. Available online at:</comment> <ext-link ext-link-type="uri" xlink:href="https://www.cdc.gov/covid/php/surveillance/burden-estimates.html">https://www.cdc.gov/covid/php/surveillance/burden-estimates.html</ext-link> <comment>(Accessed June 23, 2025)</comment></mixed-citation></ref>
<ref id="B23"><label>23.</label><mixed-citation publication-type="other"><collab>Centers for Disease Control and Prevention</collab>. (<year>2024</year>). <comment>Preliminary Estimates of RSV Burden for 2024&#x2013;2025. Available online at:</comment> <ext-link ext-link-type="uri" xlink:href="https://www.cdc.gov/rsv/php/surveillance/burden-estimates.html">https://www.cdc.gov/rsv/php/surveillance/burden-estimates.html</ext-link> <comment>(Accessed June 23, 2025)</comment></mixed-citation></ref>
<ref id="B24"><label>24.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Oliver</surname> <given-names>SE</given-names></name> <name><surname>Wallace</surname> <given-names>M</given-names></name> <name><surname>Twentyman</surname> <given-names>E</given-names></name> <name><surname>Moulia</surname> <given-names>DL</given-names></name> <name><surname>Godfrey</surname> <given-names>M</given-names></name> <name><surname>Link-Gelles</surname> <given-names>R</given-names></name><etal/></person-group> <article-title>Development of COVID-19 vaccine policy&#x2014;United States, 2020&#x2013;2023</article-title>. <source>Vaccine</source>. (<year>2024</year>) <volume>42 Suppl 3</volume>(<issue>Suppl 3</issue>):<fpage>125512</fpage>. <pub-id pub-id-type="doi">10.1016/j.vaccine.2023.12.022</pub-id><pub-id pub-id-type="pmid">38158297</pub-id></mixed-citation></ref>
<ref id="B25"><label>25.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Link-Gelles</surname> <given-names>R</given-names></name> <name><surname>Britton</surname> <given-names>A</given-names></name> <name><surname>Fleming-Dutra</surname> <given-names>KE</given-names></name></person-group>, <collab>CDC COVID-19 Vaccine Effectiveness Team</collab>. <article-title>Building the U.S. COVID-19 vaccine effectiveness program: past successes and future directions</article-title>. <source>Vaccine</source>. (<year>2024</year>) <volume>42 Suppl 3</volume>(<issue>Suppl 3</issue>):<fpage>125492</fpage>. <pub-id pub-id-type="doi">10.1016/j.vaccine.2023.12.002</pub-id><pub-id pub-id-type="pmid">38129285</pub-id></mixed-citation></ref>
<ref id="B26"><label>26.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Link-Gelles</surname> <given-names>R</given-names></name> <name><surname>Rowley</surname> <given-names>EA</given-names></name> <name><surname>DeSilva</surname> <given-names>MB</given-names></name> <name><surname>Dascomb</surname> <given-names>K</given-names></name> <name><surname>Irving</surname> <given-names>SA</given-names></name> <name><surname>Klein</surname> <given-names>NP</given-names></name><etal/></person-group> <article-title>Interim effectiveness of updated 2023&#x2013;2024 (monovalent XBB.1.5) COVID-19 vaccines against COVID-19&#x2013;associated hospitalization among adults aged &#x2265;18 years with immunocompromising conditions &#x2014; VISION network</article-title>. <source>MMWR Morb Mortal Wkly Rep</source>. (<year>2024b</year>) <volume>73</volume>(<issue>12</issue>):<fpage>271</fpage>&#x2013;<lpage>6</lpage>. <pub-id pub-id-type="doi">10.15585/mmwr.mm7312a5</pub-id></mixed-citation></ref>
<ref id="B27"><label>27.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Roper</surname> <given-names>LE</given-names></name> <name><surname>Link-Gelles</surname> <given-names>R</given-names></name> <name><surname>Surie</surname> <given-names>D</given-names></name> <name><surname>DeCuir</surname> <given-names>J</given-names></name> <name><surname>Zambrano</surname> <given-names>LD</given-names></name> <name><surname>Prill</surname> <given-names>MM</given-names></name><etal/></person-group> <article-title>A framework for monitoring RSV prevention product effectiveness in the United States</article-title>. <source>Vaccine</source>. (<year>2025</year>) <volume>45</volume>:<fpage>126633</fpage>. <pub-id pub-id-type="doi">10.1016/j.vaccine.2024.126633</pub-id><pub-id pub-id-type="pmid">39755055</pub-id></mixed-citation></ref>
<ref id="B28"><label>28.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Thompson</surname> <given-names>MG</given-names></name> <name><surname>Stenehjem</surname> <given-names>E</given-names></name> <name><surname>Grannis</surname> <given-names>S</given-names></name> <name><surname>Ball</surname> <given-names>SW</given-names></name> <name><surname>Naleway</surname> <given-names>AL</given-names></name> <name><surname>Ong</surname> <given-names>TC</given-names></name><etal/></person-group> <article-title>Effectiveness of COVID-19 vaccines in ambulatory and inpatient care settings</article-title>. <source>N Engl J Med</source>. (<year>2021</year>) <volume>385</volume>(<issue>15</issue>):<fpage>1355</fpage>&#x2013;<lpage>71</lpage>. <pub-id pub-id-type="doi">10.1056/NEJMoa2110362</pub-id><pub-id pub-id-type="pmid">34496194</pub-id></mixed-citation></ref>
<ref id="B29"><label>29.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>DeCuir</surname> <given-names>J</given-names></name> <name><surname>Payne</surname> <given-names>AB</given-names></name> <name><surname>Self</surname> <given-names>WH</given-names></name> <name><surname>Rowley</surname> <given-names>EAK</given-names></name> <name><surname>Dascomb</surname> <given-names>K</given-names></name> <name><surname>DeSilva</surname> <given-names>MB</given-names></name><etal/></person-group> <article-title>Interim effectiveness of updated 2023&#x2013;2024 (monovalent XBB.1.5) COVID-19 vaccines against COVID-19-associated emergency department and urgent care encounters and hospitalization among immunocompetent adults aged &#x2265;18 years&#x2014;VISION and IVY networks, September 2023&#x2013;January 2024</article-title>. <source>MMWR Morb Mortal Wkly Rep</source>. (<year>2024</year>) <volume>73</volume>(<issue>8</issue>):<fpage>180</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.15585/mmwr.mm7308a5</pub-id><pub-id pub-id-type="pmid">38421945</pub-id></mixed-citation></ref>
<ref id="B30"><label>30.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Frutos</surname> <given-names>AM</given-names></name> <name><surname>Price</surname> <given-names>AM</given-names></name> <name><surname>Harker</surname> <given-names>E</given-names></name> <name><surname>Reeves</surname> <given-names>EL</given-names></name> <name><surname>Ahmad</surname> <given-names>HM</given-names></name> <name><surname>Murugan</surname> <given-names>V</given-names></name><etal/></person-group> <article-title>Interim estimates of 2023&#x2013;24 seasonal influenza vaccine effectiveness&#x2014;United States</article-title>. <source>MMWR Morb Mortal Wkly Rep</source>. (<year>2024</year>) <volume>73</volume>(<issue>8</issue>):<fpage>168</fpage>&#x2013;<lpage>74</lpage>. <pub-id pub-id-type="doi">10.15585/mmwr.mm7308a3</pub-id><pub-id pub-id-type="pmid">38421935</pub-id></mixed-citation></ref>
<ref id="B31"><label>31.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Link-Gelles</surname> <given-names>R</given-names></name> <name><surname>Chickery</surname> <given-names>S</given-names></name> <name><surname>Webber</surname> <given-names>A</given-names></name> <name><surname>Ong</surname> <given-names>T</given-names></name> <name><surname>Rowley</surname> <given-names>EA</given-names></name> <name><surname>DeSilva</surname> <given-names>MB</given-names></name><etal/></person-group> <article-title>Interim estimates of 2024&#x2013;2025 COVID-19 vaccine effectiveness among adults aged &#x2265;18 years&#x2014;VISION and IVY networks, September 2024&#x2013;January 2025</article-title>. <source>MMWR Morb Mortal Wkly Rep</source>. (<year>2025</year>) <volume>74</volume>(<issue>6</issue>):<fpage>73</fpage>&#x2013;<lpage>82</lpage>. <pub-id pub-id-type="doi">10.15585/mmwr.mm7406a1</pub-id><pub-id pub-id-type="pmid">40014628</pub-id></mixed-citation></ref>
<ref id="B32"><label>32.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bozio</surname> <given-names>CH</given-names></name> <name><surname>Butterfield</surname> <given-names>K</given-names></name> <name><surname>Irving</surname> <given-names>SA</given-names></name> <name><surname>Vazquez-Benitez</surname> <given-names>G</given-names></name> <name><surname>Ong</surname> <given-names>TC</given-names></name> <name><surname>Zheng</surname> <given-names>K</given-names></name><etal/></person-group> <article-title>Relative risks of COVID-19-associated hospitalizations and clinical outcomes by age and race/ethnicity-March 2020&#x2013;March 2021</article-title>. <source>Open Forum Infect Dis</source>. (<year>2022</year>) <volume>9</volume>(<issue>10</issue>):<fpage>ofac376</fpage>. <pub-id pub-id-type="doi">10.1093/ofid/ofac376</pub-id><pub-id pub-id-type="pmid">36204160</pub-id></mixed-citation></ref>
<ref id="B33"><label>33.</label><mixed-citation publication-type="other"><collab>Python Software Foundation</collab>. <comment>Python Language Reference [computer program]. Version 3.10.12</comment> (<year>2024</year>).</mixed-citation></ref>
<ref id="B34"><label>34.</label><mixed-citation publication-type="confproc"><person-group person-group-type="author"><name><surname>Eyre</surname> <given-names>H</given-names></name> <name><surname>Chapman</surname> <given-names>AB</given-names></name> <name><surname>Peterson</surname> <given-names>KS</given-names></name> <name><surname>Shi</surname> <given-names>J</given-names></name> <name><surname>Alba</surname> <given-names>PR</given-names></name> <name><surname>Jones</surname> <given-names>MM</given-names></name><etal/></person-group> <article-title>Launching into clinical space with medspaCy: a new clinical text processing toolkit in python</article-title>. <conf-name>AMIA &#x2026; Annual Symposium Proceedings. AMIA Symposium</conf-name>; <conf-date>2021</conf-date> (<year>2022</year>). p. <fpage>438</fpage>&#x2013;<lpage>47</lpage></mixed-citation></ref>
<ref id="B35"><label>35.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Murff</surname> <given-names>HJ</given-names></name> <name><surname>FitzHenry</surname> <given-names>F</given-names></name> <name><surname>Matheny</surname> <given-names>ME</given-names></name> <name><surname>Gentry</surname> <given-names>N</given-names></name> <name><surname>Kotter</surname> <given-names>KL</given-names></name> <name><surname>Crimin</surname> <given-names>K</given-names></name><etal/></person-group> <article-title>Automated identification of postoperative complications within an electronic medical record using natural language processing</article-title>. <source>JAMA</source>. (<year>2011</year>) <volume>306</volume>(<issue>8</issue>):<fpage>848</fpage>&#x2013;<lpage>55</lpage>. <pub-id pub-id-type="doi">10.1001/jama.2011.1204</pub-id><pub-id pub-id-type="pmid">21862746</pub-id></mixed-citation></ref>
<ref id="B36"><label>36.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>Y</given-names></name> <name><surname>Li</surname> <given-names>J</given-names></name> <name><surname>He</surname> <given-names>J</given-names></name> <name><surname>Tao</surname> <given-names>C</given-names></name></person-group>. <article-title>AE-GPT: using large language models to extract adverse events from surveillance reports-A use case with influenza vaccine adverse events</article-title>. <source>PLoS One</source>. (<year>2024</year>) <volume>19</volume>(<issue>3</issue>):<fpage>e0300919</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0300919</pub-id><pub-id pub-id-type="pmid">38512919</pub-id></mixed-citation></ref>
<ref id="B37"><label>37.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Alnor</surname> <given-names>AB</given-names></name> <name><surname>Lynggaard</surname> <given-names>RB</given-names></name> <name><surname>Laursen</surname> <given-names>MS</given-names></name> <name><surname>Vinholt</surname> <given-names>PJ</given-names></name></person-group>. <article-title>Natural language processing for identifying major bleeding risk in hospitalised medical patients</article-title>. <source>Comput Biol Med</source>. (<year>2025</year>) <volume>190</volume>:<fpage>110093</fpage>. <pub-id pub-id-type="doi">10.1016/j.compbiomed.2025.110093</pub-id><pub-id pub-id-type="pmid">40164027</pub-id></mixed-citation></ref>
<ref id="B38"><label>38.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bowles</surname> <given-names>A</given-names></name> <name><surname>Perez</surname> <given-names>C</given-names></name> <name><surname>Vachani</surname> <given-names>A</given-names></name> <name><surname>Steltz</surname> <given-names>J</given-names></name> <name><surname>Rose</surname> <given-names>B</given-names></name> <name><surname>Bryant</surname> <given-names>AK</given-names></name><etal/></person-group> <article-title>An NLP framework for the extraction of concept measurements from radiology and pathology notes</article-title>. <source>Stud Health Technol Inform</source>. (<year>2024</year>) <volume>310</volume>:<fpage>1446</fpage>&#x2013;<lpage>7</lpage>. <pub-id pub-id-type="doi">10.3233/SHTI231237</pub-id><pub-id pub-id-type="pmid">38269689</pub-id></mixed-citation></ref></ref-list>
<fn-group>
<fn id="n1" fn-type="custom" custom-type="edited-by"><p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1022178/overview">Emmanouil Spanakis</ext-link>, Foundation for Research and Technology Hellas (FORTH), Greece</p></fn>
<fn id="n2" fn-type="custom" custom-type="reviewed-by"><p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1682631/overview">Gloria Sanchez Antolin</ext-link>, Hospital Universitario R&#x00ED;o Hortega, Spain</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2881697/overview">Mohamed Kamal</ext-link>, Children&#x0027;s Cancer Hospital, Egypt</p></fn>
</fn-group>
<fn-group>
<fn id="n3"><p><sup>1</sup>Immunization is used throughout this paper to broadly describe vaccination. As non-vaccine products (e.g., monoclonal antibodies) were considered for RSV, when discussing RSV, COVID-19, and influenza, immunization is used; when discussing COVID-19 and influenza, either together or separately, vaccination is used.</p></fn>
<fn id="n4"><p><sup>2</sup>See e.g., 45C.F.R. part 46, 21C.F.R. part 56; 42 U.S.C. &#x00A7;241(d); 5 U.S.C. &#x00A7;552a; 44 U.S.C. &#x00A7;3501 et seq.</p></fn>
</fn-group>
</back>
</article>