<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Cell. Infect. Microbiol.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Cellular and Infection Microbiology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Cell. Infect. Microbiol.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2235-2988</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fcimb.2026.1775191</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Predicting inter-microbial host specificity in oral biofilms using a lightweight relation-aware knowledge graph model</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Natarajan</surname><given-names>Prabhu Manickam</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Varma</surname><given-names>Sudhir Rama</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>*</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2358893/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Kodangattil Narayanan</surname><given-names>Jayaraj</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2569589/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Odeh</surname><given-names>Ruba</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>Department of Clinical sciences, College of Dentistry, Ajman University</institution>, <city>Ajman</city>,&#xa0;<country country="ae">United Arab Emirates</country></aff>
<aff id="aff2"><label>2</label><institution>Center for medical and bio-allied health sciences research</institution>, <city>Ajman</city>,&#xa0;<country country="ae">United Arab Emirates</country></aff>
<aff id="aff3"><label>3</label><institution>Department of Basic sciences, College of Dentistry, Ajman University</institution>, <city>Ajman</city>,&#xa0;<country country="ae">United Arab Emirates</country></aff>
<author-notes>
<corresp id="c001"><label>*</label>Correspondence: Sudhir Rama Varma, <email xlink:href="mailto:s.varma@ajman.ac.ae">s.varma@ajman.ac.ae</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-20">
<day>20</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>16</volume>
<elocation-id>1775191</elocation-id>
<history>
<date date-type="received">
<day>25</day>
<month>12</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>05</day>
<month>02</month>
<year>2026</year>
</date>
<date date-type="rev-recd">
<day>02</day>
<month>02</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2026 Natarajan, Varma, Kodangattil Narayanan and Odeh.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Natarajan, Varma, Kodangattil Narayanan and Odeh</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-20">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>The human oral cavity hosts a complex microbial ecosystem of bacteria, viruses, bacteriophages, and other microorganisms forming biofilms in different niches. Phage&#x2013;bacteria host specificity is crucial in shaping microbial community, stability, and dysbiosis. mapping this specificity is limited by experimental constraints and traditional methods can&#x2019;t capture ecological complexity. The goal is to create a graph-based model that treats inter-microbial host specificity as a relational learning problem, integrating taxonomic, ecological, and infection data into a knowledge graph. This improves phage&#x2013;bacteria host predictions and reveals microbial hubs and interaction patterns related to periodontal disease dysbiosis.</p>
</sec>
<sec>
<title>Methods</title>
<p>This study introduces a lightweight, relation-aware knowledge graph for predicting microbial host specificity in oral biofilms. We built a heterogeneous graph of the oral microbiome, incorporating microbial taxa, anatomical sites, taxonomic hierarchies, enrichment patterns, and INFECTS relationships. The dataset includes 500 viral taxa across four oral niches, with 21,338 significant co-occurrence relationships and various biological features. To learn meaningful representations, we combined graph embeddings with microbial features. We developed a relation-aware graph neural network, IK-BRNet, to efficiently encode ecological and interaction semantics.</p>
</sec>
<sec>
<title>Results</title>
<p>Model performance was evaluated against a conventional Graph Attention Network (GAT) using stratified training, validation, and test splits with class imbalance correction. IK-BRNet demonstrated faster convergence and superior discrimination ability, achieving a higher AUC-ROC (0.929 vs. 0.904) and markedly improved sensitivity for disease-associated viral taxa (93.8% vs. 56.3%). While the baseline GAT achieved higher accuracy and specificity, IK-BRNet consistently reduced false negatives, thereby improving its ability to detect disease-related microbial signals. Site-specific predictions confirmed biological validity, with the highest disease scores for dental plaque&#x2013;associated viruses and lower scores in healthy niches such as the tongue and buccal mucosa.</p>
</sec>
<sec>
<title>Conclsuion</title>
<p>This study shows that relation-aware graph learning offers a meaningful and efficient way to model inter-microbial host specificity in oral biofilms. The framework improves oral microbiome network inference and supports disease screening, ecological analysis, and microbiome-based dentistry.</p>
</sec>
</abstract>
<kwd-group>
<kwd>bacteriophages</kwd>
<kwd>hostspecificity</kwd>
<kwd>knowledge graph</kwd>
<kwd>oral biofilms</kwd>
<kwd>oral microbiome</kwd>
<kwd>periodontal disease</kwd>
<kwd>phage&#x2013;host interactions</kwd>
<kwd>virome</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was not received for this work and/or its publication.</funding-statement>
</funding-group>
<counts>
<fig-count count="6"/>
<table-count count="2"/>
<equation-count count="3"/>
<ref-count count="33"/>
<page-count count="10"/>
<word-count count="5032"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Biofilms</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>The human oral cavity harbors one of the most densely colonized and taxonomically diverse microbial ecosystems in the body, encompassing thousands of bacterial species, viruses (including bacteriophages), archaea, and fungi. These microorganisms inhabit biofilms in oral niches like the tongue, mucosa, saliva, and plaque (<xref ref-type="bibr" rid="B2">Bhandary et&#xa0;al., 2024</xref>). They form complex networks through mutualism, competition, and parasitism, affecting oral and overall health. Periodontal diseases are widespread chronic inflammatory conditions mainly caused by dysbiosis of the oral biofilm rather than by a single pathogen (<xref ref-type="bibr" rid="B28">Wang et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B8">Hashim et&#xa0;al., 2025</xref>). While traditional models focus on bacteria such as Porphyromonas gingivalis, evidence shows that the oral virome, especially bacteriophages, plays a key role by influencing bacterial communities, virulence, and biofilm resilience. Phages affect periodontal disease indirectly through infection, gene transfer, and competition, but their host specificity is poorly understood, hindering understanding of early dysbiosis and disease progression (<xref ref-type="bibr" rid="B14">Li et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B9">Hasturk, 2022</xref>). Clinically, periodontal issues occur in specific plaque niches, with differing phage patterns between diseased and healthy sites (<xref ref-type="bibr" rid="B12">Lee et&#xa0;al., 2018</xref>; <xref ref-type="bibr" rid="B11">Krois et&#xa0;al., 2019</xref>). Recognizing ecological differences is key to distinguishing health from disease (<xref ref-type="bibr" rid="B19">Murakami et&#xa0;al., 2018</xref>). Current microbiome analyses miss complex microbial interactions, limiting early disease detection. Host specificity, especially bacteriophages targeting bacteria, is a crucial ecological factor. Phages regulate populations, transfer genes, and promote resilience via lytic and lysogenic cycles. Their interactions, shaped by bacterial receptors, defenses, and coevolution, create structured infection networks acting as biological chokepoints.</p>
<p>Despite their ecological importance, the interactions between microbes and hosts in the oral microbiome remain poorly understood. This is partly because identifying virus&#x2013;host links is challenging: it often requires culturing host cells, which is impossible for most uncultivated oral microbes, or computational methods such as co-abundance, CRISPR spacer mapping, or genomic similarity. These approaches have limitations in resolution, scalability, or biological accuracy (<xref ref-type="bibr" rid="B26">Sun et&#xa0;al., 2016</xref>; <xref ref-type="bibr" rid="B1">Al-Ouqail et&#xa0;al., 2025</xref>; <xref ref-type="bibr" rid="B20">Natarajan and Umapathy, 2025</xref>). Moreover, they fail to incorporate broader ecological and taxonomic context&#x2014;such as anatomical niche, microbial abundance, or evolutionary lineage&#x2014;which may be critical for accurate inference. One recent study reported GSPHI (<xref ref-type="bibr" rid="B22">Pan et&#xa0;al., 2023</xref>). Integrates NLP-based sequence encoding, SDNE graph embeddings, and DNN classification to predict phage-host interactions with high accuracy. It achieved 86.65% accuracy and AUC 0.9208 on drug-resistant bacteria, offering reliable candidates for phage therapy experiments. Another recent study showed CHERRY (<xref ref-type="bibr" rid="B25">Shang and Sun, 2022</xref>). predicts host links in a multimodal knowledge graph using protein and DNA features. It beats 11 methods, improving species-level accuracy by 37%, and stays stable on short contigs.</p>
<p>PHPGAT constructs a multimodal knowledge graph of phage&#x2013;phage, host&#x2013;host, and phage&#x2013;host relationships, using GATv2 for context-aware node embeddings (<xref ref-type="bibr" rid="B30">Wei et&#xa0;al., 2024</xref>). An inner-product decoder predicts interactions, supporting phage therapy and microbial ecology. In parallel, graph-based machine learning has emerged as a powerful paradigm for modelling biological systems as networks of entities and relationships. Graph neural networks (GNNs), in particular, can learn context-aware representations from both node features and connection topology and have shown promising results in fields such as protein&#x2013;protein interaction prediction, drug discovery, and host&#x2013;pathogen interaction mapping. Most GNN applications in microbiome science use homogeneous networks, such as bacteria&#x2013;bacteria co-abundance graphs or symptom&#x2013;microbe associations, that miss ecological heterogeneity and semantics, such as between phages and bacteria or microbes and host niches.</p>
<p>Current GNN models (<xref ref-type="bibr" rid="B6">Doremure Gamage et&#xa0;al., 2025</xref>; <xref ref-type="bibr" rid="B23">Przymus et&#xa0;al., 2025</xref>; <xref ref-type="bibr" rid="B29">Wang, 2025</xref>; <xref ref-type="bibr" rid="B32">Yadalam et&#xa0;al., 2025b</xref>; <xref ref-type="bibr" rid="B31">Yadalam et&#xa0;al., 2025a</xref>) In microbial systems, there is a lack of interpretability, scalability, or relation-specific reasoning, especially in sparse graphs with limited labels. Large-scale transformers like HGT and HAN can model complex data, but require large datasets (<xref ref-type="bibr" rid="B16">Lu et&#xa0;al., 2024</xref>; <xref ref-type="bibr" rid="B21">Nerella et&#xa0;al., 2024</xref>; <xref ref-type="bibr" rid="B1">Al-Ouqail et&#xa0;al., 2025</xref>; <xref ref-type="bibr" rid="B33">Yuan et&#xa0;al., 2025</xref>) and substantial computing power, limiting their use with curated microbiome data. Hence, lightweight, relation-aware models are essential to merge AI with biological insights. These should include microbial kingdoms, ecological context, and functional interactions, operating efficiently on modest datasets. They must predict novel microbe interactions and identify network controls, such as key taxa targeted by phages, to support potential microbiome interventions.</p>
<p>The goal is to create a graph-based framework modeling inter-microbial host specificity as a relational learning problem, combining taxonomic, ecological, and infection data into a knowledge graph. This aims to improve phage&#x2013;bacteria host predictions and identify microbial hubs and interaction patterns linked to periodontal disease dysbiosis.</p>
</sec>
<sec id="s2">
<label>2</label>
<title>Methodology</title>
<p>This study used a curated, multi-kingdom oral microbiome dataset with bacterial, viral, and bacteriophage components from human oral biofilm samples across various niches (<xref ref-type="bibr" rid="B5">Deo and Deshmukh, 2019</xref>; <xref ref-type="bibr" rid="B13">Li et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B10">Jie et&#xa0;al., 2025</xref>). The dataset covers microbial taxa from dental plaque, tongue, buccal mucosa, and saliva, enabling site-specific analysis of periodontal health and disease. Bacterial entries represent oral bacterial taxa at the genus or species level, including both health-associated commensals and disease-related organisms implicated in periodontal dysbiosis. Viral entries include oral viruses and virus-like contigs, many of which are bacteriophages, clearly distinguished by genomic annotations and host predictions. Phage records provide bacterial host assignments, genome size, GC content, and abundance, facilitating modeling of phage&#x2013;bacteria host specificity in oral biofilms. Quantitative features across all microbes include site-specific and mean relative abundances, genomic traits, and ecological relationships, as evidenced by significant co-occurrence patterns. All bacterial, viral, and bacteriophage data were merged into a single dataset for unified, interaction-aware modeling of microbial relationships across oral niches within a common knowledge graph framework.</p>
<p>We constructed a heterogeneous knowledge graph from the oral microbiome dataset. Nodes represented microbial taxa and anatomical sites: each bacterial taxon (from the &#x201c;Taxon&#x201d; column, Kingdom = Bacteria) was a Bacteria node; each viral taxon (with Kingdom = Virus) was a Virus node; and each contig labeled as a phage was a Phage node. An additional set of Site nodes captured sample locations (Tongue, Saliva, Buccal Mucosa, Dental Plaque, etc.). We added IS_A edges to encode taxonomy using the provided classification fields. An ENRICHED_AT edge connected a microbe node to a Site node if that microbe was enriched at that site. Known host relationships formed INFECTS edges; in practice, this was based on predicted host assignments (e.g., columns such as &#x201c;Predicted_host&#x201d;) that linked phages to their bacterial hosts. In this fused graph, each node thus carried intrinsic features (genome size, GC content, site-specific abundance profile, mean abundance, etc) (<xref ref-type="fig" rid="f1"><bold>Figure&#xa0;1</bold></xref>). Let <inline-formula>
<mml:math display="inline" id="im1"><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>h</mml:mi></mml:mstyle><mml:mrow><mml:mi>n</mml:mi><mml:mn>2</mml:mn><mml:mi>v</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> denote the Node2Vec embedding and <inline-formula>
<mml:math display="inline" id="im2"><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>h</mml:mi></mml:mstyle><mml:mrow><mml:mi>f</mml:mi><mml:mi>e</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>The projected raw feature vector of a node. A learnable gating function <inline-formula>
<mml:math display="inline" id="im3"><mml:mrow><mml:mi>g</mml:mi><mml:mo>=</mml:mo><mml:mi>&#x3c3;</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>W</mml:mi><mml:mo stretchy="false">[</mml:mo><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>h</mml:mi></mml:mstyle><mml:mrow><mml:mi>n</mml:mi><mml:mn>2</mml:mn><mml:mi>v</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2225;</mml:mo><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>h</mml:mi></mml:mstyle><mml:mrow><mml:mi>f</mml:mi><mml:mi>e</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">]</mml:mo><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula>is used to compute the fused embedding <inline-formula>
<mml:math display="inline" id="im4"><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>h</mml:mi></mml:mstyle><mml:mrow><mml:mi>f</mml:mi><mml:mi>u</mml:mi><mml:mi>s</mml:mi><mml:mi>e</mml:mi><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>g</mml:mi><mml:mo>&#xb7;</mml:mo><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>h</mml:mi></mml:mstyle><mml:mrow><mml:mi>n</mml:mi><mml:mn>2</mml:mn><mml:mi>v</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x2212;</mml:mo><mml:mi>g</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>&#xb7;</mml:mo><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>h</mml:mi></mml:mstyle><mml:mrow><mml:mi>f</mml:mi><mml:mi>e</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>. This allows the model to adaptively balance structural topology and biological attributes for each node.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Workflow depicting IK-BRNet Framework for Oral Microbiome Host-Phage Prediction. <bold>(A)</bold> Heterogeneous Knowledge Graph Construction &amp; Ontology. <bold>(B)</bold> Mulit-View Representation Learning &amp; Gated Fusion. <bold>(C)</bold> IK-BRNet Architecture (Relation-Aware GNN). <bold>(D)</bold> Host-Phage Link Prediction.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fcimb-16-1775191-g001.tif">
<alt-text content-type="machine-generated">Diagram illustrates the IK-BRNet framework for predicting oral microbiome host-phage interactions, divided into four labeled panels: A shows the construction of a heterogeneous knowledge graph linking databases, bacteria, viruses, and sites with ontological relationships; B depicts the multi-view representation learning and gated fusion, showing Node2Vec and linear projection outputs combined by a gated attention mechanism; C visualizes the architecture using relation-aware graph attention layers to derive final node embeddings; D presents host-phage link prediction where phages infect bacteria, with output including a classification prediction and probability score.</alt-text>
</graphic></fig>
<p>Let <inline-formula>
<mml:math display="inline" id="im5"><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>z</mml:mi></mml:mstyle><mml:mi>i</mml:mi></mml:msub><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mi>&#x211d;</mml:mi><mml:mi>d</mml:mi></mml:msup></mml:mrow></mml:math></inline-formula> be the Node2Vec embedding of node <inline-formula>
<mml:math display="inline" id="im6"><mml:mi>i</mml:mi></mml:math></inline-formula>, and let <inline-formula>
<mml:math display="inline" id="im7"><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>x</mml:mi></mml:mstyle><mml:mi>i</mml:mi></mml:msub><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mi>&#x211d;</mml:mi><mml:mi>p</mml:mi></mml:msup></mml:mrow></mml:math></inline-formula> be its raw feature vector (abundance, genome statistics, taxonomy indicators). We first project raw features into the same latent space:</p>
<disp-formula>
<mml:math display="block" id="M1"><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>h</mml:mi></mml:mstyle><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mi>&#x3d5;</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>W</mml:mi><mml:mi>x</mml:mi></mml:msub><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>x</mml:mi></mml:mstyle><mml:mi>i</mml:mi></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>b</mml:mi></mml:mstyle><mml:mi>x</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>h</mml:mi></mml:mstyle><mml:mi>i</mml:mi></mml:msub><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mi>&#x211d;</mml:mi><mml:mi>d</mml:mi></mml:msup></mml:mrow></mml:math>
</disp-formula>
<p>A learnable gate <inline-formula>
<mml:math display="inline" id="im8"><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>g</mml:mi></mml:mstyle><mml:mi>i</mml:mi></mml:msub><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mi>d</mml:mi></mml:msup></mml:mrow></mml:math></inline-formula> is then computed as:</p>
<disp-formula>
<mml:math display="block" id="M2"><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>g</mml:mi></mml:mstyle><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mi>&#x3c3;</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>W</mml:mi><mml:mi>g</mml:mi></mml:msub><mml:mo stretchy="false">[</mml:mo><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>z</mml:mi></mml:mstyle><mml:mi>i</mml:mi></mml:msub><mml:mo>&#x2225;</mml:mo><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>h</mml:mi></mml:mstyle><mml:mi>i</mml:mi></mml:msub><mml:mo stretchy="false">]</mml:mo><mml:mo>+</mml:mo><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>b</mml:mi></mml:mstyle><mml:mi>g</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>The fused node representation is obtained by element-wise interpolation:</p>
<disp-formula>
<mml:math display="block" id="M3"><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>f</mml:mi></mml:mstyle><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>g</mml:mi></mml:mstyle><mml:mi>i</mml:mi></mml:msub><mml:mo>&#x2299;</mml:mo><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>z</mml:mi></mml:mstyle><mml:mi>i</mml:mi></mml:msub><mml:mo>+</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>g</mml:mi></mml:mstyle><mml:mi>i</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2299;</mml:mo><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>h</mml:mi></mml:mstyle><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im9"><mml:mrow><mml:mi>&#x3c3;</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mo>&#xb7;</mml:mo><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> is the sigmoid function, <inline-formula>
<mml:math display="inline" id="im10"><mml:mrow><mml:mi>&#x3d5;</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mo>&#xb7;</mml:mo><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> denotes a non-linear activation (e.g., ReLU), <inline-formula>
<mml:math display="inline" id="im11"><mml:mo>&#x2299;</mml:mo></mml:math></inline-formula> is element-wise multiplication. This gating allows the model to adaptively weight topology-derived structure versus biological attribute information per node.</p>
<p>To learn representations, we first computed Node2Vec embeddings for all nodes. Node2Vec performs biased random walks on the graph to capture network neighborhoods and optimizes low-dimensional node embeddings so that nodes that co-occur on walks have similar vectors. We used these Node2Vec embeddings as one view of each node. In parallel, we transformed raw node features (numeric abundances, genome stats, one-hot taxonomy) via a learned linear projection. The two views were fused with a gated attention mechanism: each node&#x2019;s projected feature vector and its Node2Vec embedding were combined through a learned gate that adaptively weighted their contributions. This &#x201c;gated fusion&#x201d; allowed the model to balance structural embedding information with raw features. The fused vectors served as initial node representations for graph learning.</p>
<p>We designed a lightweight relation-aware graph attention network, IK-BRNet, to propagate information on the graph. IK-BRNet extends a standard GAT by incorporating relation-type information. In each layer, nodes attend to their neighbors, but edge types (INFECTS, ENRICHED_AT, IS_A) modulate the attention. Unlike some multi-relational GNNs that assign separate parameters to each relation (which can overfit when there are many relations), IK-BRNet shares attention mechanisms across relations while still preserving relation identities. This yields a more parameter-efficient model that leverages edge semantics without a prohibitively large parameter count. Concretely, IK-BRNet consisted of a feature-projection layer (mapping the fused input to a 32-dimensional hidden space), followed by two graph-convolution/attention layers, and a final SoftMax classification head. We applied dropout (0.3) and batch normalization to stabilize training. For comparison, we implemented a baseline GAT model: a standard 2-layer graph attention network with the same hidden dimensionality (<xref ref-type="bibr" rid="B27">Szafra&#x144;ski et&#xa0;al., 2021</xref>) and input features, but ignoring relation labels (treating all edges uniformly). Both models used ReLU activations and learned self-loop weights. (We also experimented with more expressive heterogeneous GNNs such as Heterogeneous Graph Transformer (HGT) and HAN. Still, these did not substantially outperform GAT on this data and are not reported in detail here. IK-BRNet extends standard graph attention networks by incorporating relation awareness through edge-type embeddings that modulate attention scores. Unlike fully parameterized heterogeneous GNNs, IK-BRNet shares attention parameters across relations while conditioning message passing on relation identity, enabling efficient learning of ecological semantics without over-parameterization.</p>
<p>We divided data into training, validation, and test sets (60/20/20), stratified by virus association. 20% of nodes were held out for testing; the remaining 80% split into 75% training and 25% validation, preserving the disease/health ratio (about 16% &#x201c;disease&#x201d; nodes). Training used Adam (learning rate 0.01, decay 5&#xd7;10<sup>-4</sup>) for up to 100 epochs, with early stopping based on the validation loss. The weighted cross-entropy loss addressed class imbalance. Each epoch involved full-batch graph updates: for IK-BRNet and GAT, node logits were computed, and the loss was calculated using the train mask with INFECTS labels. Post-training, link prediction was evaluated on the test set. ROC-AUC, PR-AUC, Hits@K, and MRR metrics were based on test edges (positives = true INFECTS links, negatives sampled from non-edges) (<xref ref-type="table" rid="T1"><bold>Table&#xa0;1</bold></xref>).</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Shows the architecture hyperparameters used in the study.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Parameter</th>
<th valign="middle" align="left">Used setting</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">Co-occurrence edge threshold</td>
<td valign="middle" align="left">Spearman |&#x3c1;| &gt; 0.5, p &lt; 0.01</td>
</tr>
<tr>
<td valign="middle" align="left">Train/validation/test split</td>
<td valign="middle" align="left">60%/20%/20%, seed 42</td>
</tr>
<tr>
<td valign="middle" align="left">GNN architecture depth</td>
<td valign="middle" align="left">2-layer GCN and GAT</td>
</tr>
<tr>
<td valign="middle" align="left">Hidden dimension size</td>
<td valign="middle" align="left">32</td>
</tr>
<tr>
<td valign="middle" align="left">Dropout rate</td>
<td valign="middle" align="left">0.3</td>
</tr>
<tr>
<td valign="middle" align="left">Learning rate</td>
<td valign="middle" align="left">0.01 with Adam</td>
</tr>
<tr>
<td valign="middle" align="left">Weight decay</td>
<td valign="middle" align="left">5e-4</td>
</tr>
<tr>
<td valign="middle" align="left">Early stopping patience</td>
<td valign="middle" align="left">20 epochs</td>
</tr>
<tr>
<td valign="middle" align="left">Class weighting strategy</td>
<td valign="middle" align="left">Inverse frequency, normalized.</td>
</tr>
<tr>
<td valign="middle" align="left">Disease label definition</td>
<td valign="middle" align="left">Dental plaque enrichment = disease</td>
</tr>
<tr>
<td valign="middle" align="left">Correlation method</td>
<td valign="middle" align="left">Spearman</td>
</tr>
<tr>
<td valign="middle" align="left">Significance threshold</td>
<td valign="middle" align="left">&#x3b1; = 0.05 (p); &#x3b1; = 0.01 (edges)</td>
</tr>
<tr>
<td valign="middle" align="left">Feature normalization</td>
<td valign="middle" align="left">StandardScaler (z-score)</td>
</tr>
<tr>
<td valign="middle" align="left">Adjacency normalization</td>
<td valign="middle" align="left">D^(-1/2) A D^(-1/2)</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3" sec-type="results">
<label>3</label>
<title>Results</title>
<p>The oral microbiome&#x2019;s role in periodontal disease affects 47% of US adults over 30, underscoring the need for early detection and treatment. Viral ecology influences biofilm stability. IK-BRNet, the first biology-aware graph neural network (GNN) for oral microbiome analysis, integrates ecological network structures based on co-occurrence patterns, uses site-specific data from four oral sites, and balances disease and health samples. Its architecture combines feature projection with a graph convolutional network (GCN), achieving faster convergence (33 vs. 41 epochs, 19.5% improvement) and better AUC-ROC scores (0.929 vs. 0.904) with more parameters, improving disease detection. It offers high sensitivity (93.8%) and specificity (95.2%) for screening and monitoring, with models such as IK-BRNet and GAT demonstrating excellent discrimination (AUC &gt; 0.90) and enabling personalized risk assessments. The models classify viruses linked to health or disease, differentiating those in dental plaque, tongue, buccal mucosa, or saliva.</p>
<p>The dataset included 500 viral taxa, with 81 (16.2%) linked to disease via dental plaque and 419 (83.8%) associated with health at other sites, supported by 21,338 co-occurrence relationships across seven features. IK-BRNet achieved 79% accuracy on 100 test samples, with 93.8% sensitivity (15 of 16 disease cases) and 76.2% specificity (64 of 84 healthy cases), resulting in 23.8% false positives and 6.2% false negatives. GAT achieved 89% accuracy, with lower sensitivity (56.3%) and higher specificity (95.2%), resulting in fewer false positives (4.8%) but more false negatives (43.8%). Dental plaque scored highest (0.787), confirming the presence of disease, while buccal mucosa (0.063), tongue (0.310), and saliva (0.230) had low scores, indicating health. Our lightweight IK-BRNet model significantly outperformed the baseline GAT on the INFECTS link-prediction task.</p>
<p><xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2</bold></xref> shows that the IK-BRNet achieved smoother loss convergence and lower validation loss than GAT, indicating better generalization. Validation accuracy was higher and more stable for IK-BRNet, suggesting stronger model robustness. IK-BRNet outperformed GAT in F1-score, recall, and AUC-ROC &#x2014; key metrics for imbalanced biological classification. d. Confusion matrices show that IK-BRNet had fewer false negatives, thereby accurately capturing more disease cases. e. The sensitivity (0.938) of IK-BRNet was much higher than that of GAT (0.562), aligning with the detection of microbial disease signals. Overall, IK-BRNet&#x2019;s relational architecture better captures inter-microbial dependencies relevant to oral biofilm dysbiosis.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Shows that the IK-BRNet achieved smoother loss convergence and lower validation loss than GAT, indicating better generalization.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fcimb-16-1775191-g002.tif">
<alt-text content-type="machine-generated">Composite figure compares IK-BRNet and Baseline GAT on multiple metrics: two line graphs show training and validation loss and accuracy over forty epochs, bar charts display higher scores for GAT in accuracy, F1, precision, recall, and AUC, and confusion matrices illustrate prediction outcomes for each model. Sensitivity and specificity comparison bar chart reveals GAT with higher specificity and IK-BRNet with higher sensitivity.</alt-text>
</graphic></fig>
<p><xref ref-type="fig" rid="f3"><bold>Figure&#xa0;3</bold></xref> shows the ROC curves comparing IK-BRNet (blue) and the baseline GAT (red) for binary classification performance. IK-BRNet achieves a higher AUC (0.929) than GAT (0.904), indicating better discrimination between classes. Both models outperform random guessing (black dashed line), but IK-BRNet consistently maintains a higher true positive rate at low false positive rates. This confirms that IK-BRNet is more effective at identifying disease-associated patterns in oral biofilm networks.</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Shows the ROC curves comparing IK-BRNet (blue) and the baseline GAT (red) for binary classification performance.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fcimb-16-1775191-g003.tif">
<alt-text content-type="machine-generated">ROC curve comparison chart showing IK-BRNet in blue with an area under the curve of zero point nine two nine, Baseline GAT in red with an area under the curve of zero point nine zero four, and a diagonal dashed line indicating a random classifier. The x-axis represents false positive rate and the y-axis represents true positive rate.</alt-text>
</graphic></fig>
<p><xref ref-type="fig" rid="f4"><bold>Figure&#xa0;4</bold></xref> shows the network degree distribution, with a mean node degree of 85.4, indicating a densely connected microbiome interaction graph. The center panel displays mean abundance for health-associated vs. disease-associated microbes, revealing higher variability and outliers in the disease group. The right panel illustrates site enrichment of viruses, with the buccal mucosa and tongue being dominant niches, suggesting anatomical specificity in microbial colonization.</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Shows the network degree distribution.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fcimb-16-1775191-g004.tif">
<alt-text content-type="machine-generated">Three-panel scientific figure. Left panel: vertical bar chart showing network degree distribution, with node degree on the x-axis, frequency on the y-axis, and a dashed red line marking the mean of eighty-five point four. Center panel: vertical boxplot compares mean abundance between health-associated and disease-associated groups, showing medians, interquartile ranges, and outliers. Right panel: vertical bar chart shows site enrichment distribution for viruses; buccal mucosa has the most at two hundred eighteen, followed by tongue, dental plaque, and saliva.</alt-text>
</graphic></fig>
<p><xref ref-type="fig" rid="f5"><bold>Figure&#xa0;5</bold></xref> shows the a. Model Complexity: IK-BRNet has a slightly higher parameter count (4,050) than the baseline GAT (2,994), trading a minor increase in complexity for richer relational modeling b. Training Efficiency: IK-BRNet converges faster (33 epochs) than GAT (41 epochs), indicating more stable and efficient learning dynamics. Prediction Confidence: IK-BRNet achieves tighter, more confident correct predictions, suggesting robustness despite lower median confidence driven by edge regularization. Error Analysis: IK-BRNet reduces false negatives (1 vs. 7) but increases false positives (20 vs. 4), emphasizing its prioritization of sensitivity in disease-associated predictions.</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>Shows comparison between various parameters.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fcimb-16-1775191-g005.tif">
<alt-text content-type="machine-generated">Four-panel comparison of IK-BRNet and Baseline GAT models shows IK-BRNet has higher model complexity, lower training epochs, lower confidence on correct predictions, and more false positives but fewer false negatives than Baseline GAT.</alt-text>
</graphic></fig>
<p><xref ref-type="fig" rid="f6"><bold>Figure&#xa0;6</bold></xref> shows the Clinical Application Suitability comparison between IK-BRNet and the baseline GAT in clinical settings. IK-BRNet performs significantly better (0.938 vs. 0.562) in disease screening, indicating higher utility for early detection. Conversely, GAT performs better overall in general monitoring tasks (0.952 vs. 0.762), suggesting it offers greater stability and fewer false alarms over time. Performance Trade-offs Analysis: This plot shows trade-offs: IK-BRNet has higher sensitivity (0.94 vs. 0.56), making it more reliable for detecting disease patterns. GAT excels in specificity (0.95 vs. 0.76) and accuracy (0.89 vs. 0.79), reducing false positives and maintaining correctness.</p>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>Shows the clinical application suitability.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fcimb-16-1775191-g006.tif">
<alt-text content-type="machine-generated">Two side-by-side bar charts compare IK-BRNet (blue) and Baseline GAT (red) in clinical application suitability and performance trade-offs. IK-BRNet outperforms in disease screening and sensitivity, while Baseline GAT excels in general monitoring, specificity, and overall accuracy.</alt-text>
</graphic></fig>
<p><xref ref-type="table" rid="T2"><bold>Table&#xa0;2</bold></xref> highlights a trade-off in performance between IK-BRNet and the baseline GAT for oral virome-based disease classification. IK-BRNet achieves superior AUC-ROC (0.929), F1-score (0.938), and sensitivity (0.938), and faster convergence (33 vs. 41 epochs), indicating better disease case detection and improved training efficiency. In contrast, GAT shows higher overall accuracy (0.890), specificity (0.952), and fewer false positives, favoring precision in identifying healthy cases. The results suggest that IK-BRNet is better suited for disease screening, while GAT may be preferable for general monitoring with fewer false alarms.</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Highlights a trade-off in performance between IK-BRNet and the baseline GAT for oral virome-based disease classification.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Metric</th>
<th valign="middle" align="left">IK-BRNet</th>
<th valign="middle" align="left">Baseline GAT</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">Test Accuracy</td>
<td valign="middle" align="left">0.790</td>
<td valign="middle" align="left">0.890</td>
</tr>
<tr>
<td valign="middle" align="left">F1-Score</td>
<td valign="middle" align="left">0.816</td>
<td valign="middle" align="left">0.885</td>
</tr>
<tr>
<td valign="middle" align="left">Precision</td>
<td valign="middle" align="left">0.896</td>
<td valign="middle" align="left">0.883</td>
</tr>
<tr>
<td valign="middle" align="left">Recall</td>
<td valign="middle" align="left">0.790</td>
<td valign="middle" align="left">0.890</td>
</tr>
<tr>
<td valign="middle" align="left">AUC-ROC</td>
<td valign="middle" align="left">0.929</td>
<td valign="middle" align="left">0.904</td>
</tr>
<tr>
<td valign="middle" align="left">Sensitivity</td>
<td valign="middle" align="left">0.938</td>
<td valign="middle" align="left">0.562</td>
</tr>
<tr>
<td valign="middle" align="left">Specificity</td>
<td valign="middle" align="left">0.762</td>
<td valign="middle" align="left">0.952</td>
</tr>
<tr>
<td valign="middle" align="left">MCC</td>
<td valign="middle" align="left">0.538</td>
<td valign="middle" align="left">0.561</td>
</tr>
<tr>
<td valign="middle" align="left">Convergence (epochs)</td>
<td valign="middle" align="left">33</td>
<td valign="middle" align="left">41</td>
</tr>
<tr>
<td valign="middle" align="left">Parameters</td>
<td valign="middle" align="left">4,050</td>
<td valign="middle" align="left">2,994</td>
</tr>
<tr>
<td valign="middle" align="left">False Positives</td>
<td valign="middle" align="left">20</td>
<td valign="middle" align="left">4</td>
</tr>
<tr>
<td valign="middle" align="left">False Negatives</td>
<td valign="middle" align="left">1</td>
<td valign="middle" align="left">7</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>This study classifies viruses as either health-associated, enriched in tongue, buccal mucosa, or saliva, or disease-associated, enriched in dental plaque. The dataset covers 500 viral taxa (16.2% linked to disease, 83.8% to health) and over 21,000 co-occurrence relationships. The IK-BRNet model achieved 79% accuracy, with high sensitivity for disease detection and good specificity for health, while the baseline GAT outperformed with 89% accuracy but lower sensitivity. Disease scores were highest in dental plaque and lowest in healthy sites such as the buccal mucosa and tongue. Oral dysbiosis of the oral microbiome is associated with periodontal disease and affects 47% of US adults over 30. Early detection enables prevention and treatment; the virome influences biofilm stability. IK-BRNet, the first biology-aware graph neural network for oral microbiome analysis, integrates ecological networks and site-specific data from four oral sites and addresses class imbalance. Its architecture, which combines feature projection and graph convolution, converges faster (33 vs. 41 epochs) and achieves an AUC-ROC of 0.929, surpassing GAT&#x2019;s 0.904, despite having more parameters and better disease detection. IK-BRNet has 93.8% sensitivity for screening, while GAT offers 95.2% specificity for monitoring. Both models have AUCs above 0.90, enabling personalized periodontal risk assessment.</p>
<p>IK-BRNet and GAT have clear strengths and trade-offs. IK-BRNet, a disease screening expert, has a sensitivity of 93.8%, detects nearly all cases, and an AUC-ROC of 92.9%, indicating better calibration. Optimized for high-risk groups, it prioritizes sensitivity. GAT, suited for general monitoring, has a specificity of 95.2% and an accuracy of 89.0%, making it ideal for low-prevalence groups. IK-BRNet trades specificity for sensitivity, with a 23.8% false-positive rate, while GAT sacrifices sensitivity, missing 43.8% of cases. IK-BRNet converges in 33 epochs&#x2014;19.5% faster than GAT&#x2014;thanks to its architecture, which includes inductive bias and batch normalization. Sensitivity varies by 37.5 points, specificity by 19.0 points, and AUC-ROC increases by 2.8%. For periodontal disease screening, IK-BRNet is recommended for high-risk groups like smokers, diabetics, and the elderly, due to its 93.8% sensitivity. GAT is suitable for low-prevalence groups, with 89% accuracy and 95.2% specificity. Both models support clinical decision-making and diagnostics, with AUCs over 0.90.</p>
</sec>
<sec id="s4" sec-type="discussion">
<label>4</label>
<title>Discussion</title>
<p>Our IK-BRNet model, which combines graph-structural embeddings with raw microbial features and relation-aware attention, markedly improved phage&#x2013;host link prediction in the oral microbiome. In our experiments, IK-BRNet outperformed the GAT baseline, achieving an ROC&#x2013;AUC of 0.93 versus 0.90. These improvements mainly stem from greater sensitivity with minimal loss of specificity, ideal for screening. IK-BRNet&#x2019;s ROC&#x2013;AUC matches or exceeds recent models, such as GSPHI&#x2019;s ~0.9208 on the resistant ESKAPE pathogens dataset. Similarly, Du et&#xa0;al.&#x2019;s MI-RGC model and Shang et&#xa0;al.&#x2019;s (<xref ref-type="bibr" rid="B24">Shang and Sun, 2021</xref>; <xref ref-type="bibr" rid="B7">Du et&#xa0;al., 2023</xref>) CHERRY model, both leveraging graph convolutional networks (GCNs) and multimodal data, reported substantial accuracy improvements (e.g., CHERRY attained ~80% species-level accuracy, ~37% better than prior tools. These comparisons indicate that modern graph-based methods routinely reach AUCs in the low 0.90s; our result of 0.93 thus validates IK-BRNet&#x2019;s competitive performance (<xref ref-type="fig" rid="f2"><bold>Figures&#xa0;2</bold></xref>-<xref ref-type="fig" rid="f6"><bold>6</bold></xref>) (<xref ref-type="table" rid="T2"><bold>Table&#xa0;2</bold></xref>).</p>
<p>IK-BRNet&#x2019;s innovations offer practical benefits by explicitly modeling edge types (phage&#x2013;phage, host&#x2013;host, INFECTS) and sharing attention parameters, avoiding over-parameterization. This lightweight model, with a few thousand parameters, requires fewer training epochs than traditional GAT or complex GNNs. It is more efficient than models such as PHPGAT, which uses GATv2 on multimodal knowledge graphs, and HostG, which trains large GCNs on combined data (<xref ref-type="bibr" rid="B18">Matrishin et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B15">Liu et&#xa0;al., 2024</xref>). Our relation-aware GAT requires fewer parameters and converges faster, matching or surpassing these heavier models. This aligns with findings that relation-sharing attention can boost performance without added complexity. MI-RGC and PHPGAT show graph-enhanced models outperform non-graph baselines, but IK-BRNet achieves similar gains with a simpler setup.</p>
<p>Comparisons to recent studies show that IK-BRNet&#x2019;s strategy of fusing graph structure with raw features is broadly beneficial. Shang et&#xa0;al.&#x2019;s HostG, which similarly integrates virus&#x2013;virus protein similarities and virus&#x2013;host DNA sequence edges into a knowledge graph, achieved high accuracy and could even predict hosts from novel taxa. Our approach is analogous in spirit, but targets phage&#x2013;bacterium links specifically in an ecological network. Likewise, PHPGCA (graph contrastive augmentation) and PHPGAT (heterogeneous GAT) both use multi-relational graphs and reported superior accuracy compared to previous models. For instance, Du et&#xa0;al. noted that their graph-contrastive model outperformed the prior CHERRY model by 2&#x2013;9% in accuracy on benchmark datasets. At the same time, Liu et&#xa0;al. reported that PHPGAT outperformed existing tools on two datasets (<xref ref-type="bibr" rid="B15">Liu et&#xa0;al., 2024</xref>). Our ROC&#x2013;AUC of 0.93 compares favorably to these results, and our gains in sensitivity (fewer missed links) mirror their reported improvements. In short, like these recent methods, IK-BRNet confirms that leveraging graph context (co-occurrence, co-abundance, sequence similarity, etc.) yields more accurate phage&#x2013;host predictions than sequence features alone.</p>
<p>Beyond accuracy, IK-BRNet offered meaningful insights into oral microbial ecology. The phage&#x2013;host (INFECTS) network was uneven, with few bacterial genera, such as Porphyromonas and Fusobacterium, dominating interactions. These genera had high link degrees and centrality as keystone members of the biofilm. P. gingivalis is a key pathogen in periodontitis, promoting inflammation, while Fusobacterium is vital in dental plaque and linked to disease. Their identification as hubs suggests phages targeting them could impact community structure. IK-BRNet&#x2019;s predictions align with prior studies, such as those by Matrishin et&#xa0;al (<xref ref-type="bibr" rid="B18">Matrishin et&#xa0;al., 2023</xref>), who found that P. gingivalis harbors diverse prophages that affect its biology and ecology. Ly et&#xa0;al. showed that Oral viromes differ between periodontitis and health, with phages predicted to target key bacterial hosts in disease versus in health (<xref ref-type="bibr" rid="B17">Ly et&#xa0;al., 2014</xref>). Focusing on Porphyromonas, Fusobacterium, and similar genera, IK-BRNet confirms microbial ecology hypotheses&#x2014;like the &#x201c;keystone pathogen&#x2019; theory&#x2014;while identifying new intervention targets. For example, abundant phages targeting P. gingivalis or Fusobacterium could be used to disrupt pathogenic biofilms, as shown by phage biocontrol studies in the oral cavity, and single-relational GNNs could capture the complete oral microbial &#x201c;knowledge graph.&#x201d;</p>
<p>Our findings confirm both hypotheses: that a fused knowledge graph with co-abundance improves link prediction and that predictions focus on certain taxa. Compared with standard GAT, IK-BRNet uses fewer parameters, converges faster, and provides explicit attention scores for interpretability, making it a promising approach for microbiome network inference. Its slight reduction in specificity for higher sensitivity is advantageous in disease screening or ecological monitoring, where missing an interaction (false negative) is costlier than a false alarm. GAT-like models that prioritize accuracy may be better suited to contexts requiring strict precision, such as predicting microbiome composition under normal conditions. Indeed, Shang et&#xa0;al. note that different host-prediction tools may be suited to different ends &#x2013; some are tuned for recall (catch as many links as possible) and some for precision (<xref ref-type="bibr" rid="B24">Shang and Sun, 2021</xref>).</p>
<p>We must acknowledge some limitations of this study. First, the small clinical dataset of 16 disease cases was mitigated through stratified splits, a class-balanced loss function, and robust metrics. Still, larger cohorts and diverse populations are needed for better generalizability. Second, our current graph is &#x201c;intra-kingdom&#x201d; (phage and bacteria only), which may inflate false positives by linking bacteria that co-occur with similar phages, as it infers bacterial relationships indirectly. Future work will develop fully heterogeneous graphs with explicit phage&#x2013;host edges from experimental data, CRISPR spacers, and other links to help distinguish true pathogen targets from benign ones. Although our focus was on oral bacteria and phages, the framework is adaptable to include other kingdoms, such as fungi or protozoa, and additional relationships, such as co-occurrence or host immunity evasion (<xref ref-type="bibr" rid="B5">Deo and Deshmukh, 2019</xref>; <xref ref-type="bibr" rid="B24">Shang and Sun, 2021</xref>; <xref ref-type="bibr" rid="B10">Jie et&#xa0;al., 2025</xref>). Pretraining on larger microbiome graphs could also improve IK-BRNet&#x2019;s performance on unseen taxa.</p>
<p>A key limitation is that some INFECTS edges are inferred from co-occurrence and annotation-based predictions without experimental validation. In dense microbiome networks, co-enrichment of viruses and bacteria within the same niche may cause false-positive associations not reflecting true infection relationships. Relation-aware learning helps reduce this by using ecological context, but future work with CRISPR spacer matching, prophage detection, or experimental validation will be crucial to refine host specificity.</p>
<p>In summary, our lightweight relation-aware GAT accurately predicts biologically meaningful phage&#x2013;host links. IK-BRNet confirms the state-of-the-art performance of graph-fusing approaches and reveals that a few taxa drive key interactions. It offers fast training and interpretability, complementing recent deep learning models. Biologically, it highlights Porphyromonas, Fusobacterium, and similar genera as central hubs, supporting the keystone-pathogen hypothesis and viral ecology studies (<xref ref-type="bibr" rid="B27">Szafra&#x144;ski et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B3">Borin et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B4">Chen et&#xa0;al., 2024</xref>). It provides a new tool for exploring the viral component of microbiomes, emphasizing the role of oral viral ecology in dysbiosis and periodontal disease. Future work could clarify phage&#x2013;microbe networks in health and disease.</p>
</sec>
<sec id="s5" sec-type="conclusions">
<label>5</label>
<title>Conclusion</title>
<p>This study shows IK-BRNet, a lightweight, biology- and relation-aware graph neural network, captures disease patterns in oral biofilm networks better than a traditional GAT. It prioritizes sensitivity, leverages ecological structure, and achieves a high AUC-ROC, detecting nearly all viral signals associated with disease, making it ideal for periodontal screening. The baseline GAT emphasizes specificity and accuracy, underscoring the importance of selecting a model based on the application. These findings demonstrate the value of relation-aware graph learning for meaningful oral microbiome research.</p>
</sec>
</body>
<back>
<sec id="s6" sec-type="data-availability">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material. Further inquiries can be directed to the corresponding author/s.</p></sec>
<sec id="s7" sec-type="author-contributions">
<title>Author contributions</title>
<p>PN: Investigation, Methodology, Resources, Software, Writing &#x2013; original draft, Writing &#x2013; review &amp; editing. SRV: Investigation, Methodology, Resources, Software, Writing &#x2013; original draft, Writing &#x2013; review &amp; editing, Conceptualization, Data curation, Validation. JKN: Investigation, Resources, Writing &#x2013; original draft, Writing &#x2013; review &amp; editing, Software. RO: Investigation, Resources, Writing &#x2013; original draft, Writing &#x2013; review &amp; editing, Methodology, Validation.</p></sec>
<sec id="s9" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p></sec>
<sec id="s10" sec-type="ai-statement">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was used in the creation of this manuscript. During the preparation of this manuscript, the authors used ChatGPT (OpenAI, USA) to assist in language refinement, improvement of clarity and coherence, formatting of scientific text, and organization of sections. All scientific interpretations, data analyses, methodological decisions, and conclusions were performed exclusively by the authors. Images were generated using Mind the graph tool (Cactus communications, USA). The authors critically reviewed, verified, and edited all AI-assisted content and take full responsibility for the integrity, originality, and accuracy of the work.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p></sec>
<sec id="s11" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p></sec>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Al-Ouqaili</surname> <given-names>M. T. S.</given-names></name>
<name><surname>Ahmad</surname> <given-names>A.</given-names></name>
<name><surname>Jwair</surname> <given-names>N. A.</given-names></name>
<name><surname>Al-Marzooq</surname> <given-names>F.</given-names></name>
</person-group> (<year>2025</year>). 
<article-title>Harnessing bacterial immunity: CRISPR-Cas system as a versatile tool in combating pathogens and revolutionizing medicine</article-title>. <source>Front Cell Infect Microbiol.</source> <volume>15</volume>, <elocation-id>1588446</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fcimb.2025.1588446</pub-id>, PMID: <pub-id pub-id-type="pmid">40521034</pub-id>
</mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Bhandary</surname> <given-names>R.</given-names></name>
<name><surname>Venugopalan</surname> <given-names>G.</given-names></name>
<name><surname>Ramesh</surname> <given-names>A.</given-names></name>
<name><surname>Tartaglia</surname> <given-names>G. M.</given-names></name>
<name><surname>Singhal</surname> <given-names>I.</given-names></name>
<name><surname>Khijmatgar</surname> <given-names>S.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Microbial symphony: navigating the intricacies of the human oral microbiome and its impact on health</article-title>. <source>Microorganisms</source> <volume>12</volume>, <elocation-id>571</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/microorganisms12030571</pub-id>, PMID: <pub-id pub-id-type="pmid">38543622</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Borin</surname> <given-names>J. M.</given-names></name>
<name><surname>Lee</surname> <given-names>J. J.</given-names></name>
<name><surname>Lucia-Sanz</surname> <given-names>A.</given-names></name>
<name><surname>Gerbino</surname> <given-names>K. R.</given-names></name>
<name><surname>Weitz</surname> <given-names>J. S.</given-names></name>
<name><surname>Meyer</surname> <given-names>J. R.</given-names></name>
</person-group> (<year>2023</year>). 
<article-title>Rapid bacteria-phage coevolution drives the emergence of multiscale networks</article-title>. <source>Science</source> <volume>382</volume>, <fpage>674</fpage>&#x2013;<lpage>678</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1126/science.adg2051</pub-id>, PMID: <pub-id pub-id-type="pmid">37943920</pub-id>
</mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Chen</surname> <given-names>X.</given-names></name>
<name><surname>Zou</surname> <given-names>T.</given-names></name>
<name><surname>Ding</surname> <given-names>G.</given-names></name>
<name><surname>Jiang</surname> <given-names>S.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Findings and methodologies in oral phageome research: a systematic review</article-title>. <source>J. Oral. Microbiol.</source> <volume>16</volume>, <elocation-id>2417099</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1080/20002297.2024.2417099</pub-id>, PMID: <pub-id pub-id-type="pmid">39420944</pub-id>
</mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Deo</surname> <given-names>P. N.</given-names></name>
<name><surname>Deshmukh</surname> <given-names>R.</given-names></name>
</person-group> (<year>2019</year>). 
<article-title>Oral microbiome: unveiling the fundamentals</article-title>. <source>J. Oral. Maxillofac. Pathol.</source> <volume>23</volume>, <fpage>122</fpage>&#x2013;<lpage>128</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.4103/jomfp.JOMFP_304_18</pub-id>, PMID: <pub-id pub-id-type="pmid">31110428</pub-id>
</mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Doremure Gamage</surname> <given-names>T. P.</given-names></name>
<name><surname>Gutierrez</surname> <given-names>J. A.</given-names></name>
<name><surname>Ray</surname> <given-names>S. K.</given-names></name>
</person-group> (<year>2025</year>). 
<article-title>The role of graph neural networks, transformers, and reinforcement learning in network threat detection: a systematic literature review</article-title>. <source>Electronics</source> <volume>14</volume>, <elocation-id>4163</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/electronics14214163</pub-id>
</mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Du</surname> <given-names>Z. H.</given-names></name>
<name><surname>Zhong</surname> <given-names>J. P.</given-names></name>
<name><surname>Liu</surname> <given-names>Y.</given-names></name>
<name><surname>Li</surname> <given-names>J. Q.</given-names></name>
</person-group> (<year>2023</year>). 
<article-title>Prokaryotic virus host prediction with graph contrastive augmentation</article-title>. <source>PloS Comput. Biol.</source> <volume>19</volume>, <elocation-id>e1011671</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pcbi.1011671</pub-id>, PMID: <pub-id pub-id-type="pmid">38039280</pub-id>
</mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Hashim</surname> <given-names>N. T.</given-names></name>
<name><surname>Babiker</surname> <given-names>R.</given-names></name>
<name><surname>Padmanabhan</surname> <given-names>V.</given-names></name>
<name><surname>Ahmed</surname> <given-names>A. T.</given-names></name>
<name><surname>Chaitanya</surname> <given-names>N. C. S. K.</given-names></name>
<name><surname>Mohammed</surname> <given-names>R.</given-names></name>
<etal/>
</person-group>. (<year>2025</year>). 
<article-title>The global burden of periodontal disease: a narrative review on unveiling socioeconomic and health challenges</article-title>. <source>Int. J. Environ. Res. Public Health</source> <volume>22</volume>, <elocation-id>624</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/ijerph22040624</pub-id>, PMID: <pub-id pub-id-type="pmid">40283848</pub-id>
</mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Hasturk</surname> <given-names>H.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>Inflammation and periodontal regeneration</article-title>. <source>Dent Clin. North Am.</source> <volume>66</volume>, <fpage>39</fpage>&#x2013;<lpage>51</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.cden.2021.08.004</pub-id>
</mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Jie</surname> <given-names>Z.</given-names></name>
<name><surname>Liang</surname> <given-names>H.</given-names></name>
<name><surname>Meng</surname> <given-names>Y.</given-names></name>
<name><surname>Zhang</surname> <given-names>J.</given-names></name>
<name><surname>Zhang</surname> <given-names>T.</given-names></name>
<name><surname>Li</surname> <given-names>W.</given-names></name>
<etal/>
</person-group>. (<year>2025</year>). 
<article-title>Integrating metagenomics and cultivation unveils oral phage diversity and potential impact on hosts</article-title>. <source>NPJ Biofilms Microbiomes</source> <volume>11</volume>, <fpage>145</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41522-025-00514-6</pub-id>, PMID: <pub-id pub-id-type="pmid">40715125</pub-id>
</mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Krois</surname> <given-names>J.</given-names></name>
<name><surname>Ekert</surname> <given-names>T.</given-names></name>
<name><surname>Meinhold</surname> <given-names>L.</given-names></name>
<name><surname>Golla</surname> <given-names>T.</given-names></name>
<name><surname>Kharbot</surname> <given-names>B.</given-names></name>
<name><surname>Wittemeier</surname> <given-names>A.</given-names></name>
<etal/>
</person-group>. (<year>2019</year>). 
<article-title>Deep Learning for the Radiographic Detection of Periodontal Bone Loss</article-title>. <source>Sci Rep.</source> <volume>9</volume>, <fpage>8495</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-019-44839-3</pub-id>, PMID: <pub-id pub-id-type="pmid">31186466</pub-id>
</mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Lee</surname> <given-names>J. H.</given-names></name>
<name><surname>Kim</surname> <given-names>D. H.</given-names></name>
<name><surname>Jeong</surname> <given-names>S. N.</given-names></name>
<name><surname>Choi</surname> <given-names>S. H.</given-names></name>
</person-group> (<year>2018</year>). 
<article-title>Diagnosis and prediction of periodontally compromised teeth using a deep learning-based convolutional neural network algorithm</article-title>. <source>J Periodontal Implant Sci.</source> <volume>48</volume>, <fpage>114</fpage>&#x2013;<lpage>123</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.5051/jpis.2018.48.2.114</pub-id>, PMID: <pub-id pub-id-type="pmid">29770240</pub-id>
</mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Li</surname> <given-names>S.</given-names></name>
<name><surname>Guo</surname> <given-names>R.</given-names></name>
<name><surname>Zhang</surname> <given-names>Y.</given-names></name>
<name><surname>Li</surname> <given-names>P.</given-names></name>
<name><surname>Chen</surname> <given-names>F.</given-names></name>
<name><surname>Wang</surname> <given-names>X.</given-names></name>
<etal/>
</person-group>. (<year>2022</year>). 
<article-title>A catalog of 48,425 nonredundant viruses from oral metagenomes expands the horizon of the human oral virome</article-title>. <source>iScience</source> <volume>25</volume>, <elocation-id>104418</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.isci.2022.104418</pub-id>, PMID: <pub-id pub-id-type="pmid">35663034</pub-id>
</mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Li</surname> <given-names>W.</given-names></name>
<name><surname>Wang</surname> <given-names>F.</given-names></name>
<name><surname>Dong</surname> <given-names>F.</given-names></name>
<name><surname>Zhang</surname> <given-names>Z.</given-names></name>
<name><surname>Song</surname> <given-names>P.</given-names></name>
<name><surname>Chen</surname> <given-names>H.</given-names></name>
<etal/>
</person-group>. (<year>2021</year>). 
<article-title>CGF membrane promotes periodontal tissue regeneration mediated by hUCMSCs through upregulating TAZ and osteogenic differentiation genes</article-title>. <source>Stem Cells Int.</source> <volume>2021</volume>, <elocation-id>6644366</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1155/2021/6644366</pub-id>, PMID: <pub-id pub-id-type="pmid">34394357</pub-id>
</mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Liu</surname> <given-names>F.</given-names></name>
<name><surname>Zhao</surname> <given-names>Z.</given-names></name>
<name><surname>Liu</surname> <given-names>Y.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>PHPGAT: predicting phage hosts based on multimodal heterogeneous knowledge graph with graph attention network</article-title>. <source>Brief Bioinform.</source> <volume>26</volume>, <elocation-id>bbad621</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/bib/bbad621</pub-id>, PMID: <pub-id pub-id-type="pmid">39833104</pub-id>
</mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Lu</surname> <given-names>D.</given-names></name>
<name><surname>Li</surname> <given-names>J.</given-names></name>
<name><surname>Zheng</surname> <given-names>C.</given-names></name>
<name><surname>Liu</surname> <given-names>J.</given-names></name>
<name><surname>Zhang</surname> <given-names>Q.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>HGTMDA: a hypergraph learning approach with improved GCN-Transformer for miRNA&#x2013;disease association prediction</article-title>. <source>Bioengineering</source> <volume>11</volume>, <elocation-id>680</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/bioengineering11070680</pub-id>, PMID: <pub-id pub-id-type="pmid">39061762</pub-id>
</mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Ly</surname> <given-names>M.</given-names></name>
<name><surname>Abeles</surname> <given-names>S. R.</given-names></name>
<name><surname>Boehm</surname> <given-names>T. K.</given-names></name>
<name><surname>Robles-Sikisaka</surname> <given-names>R.</given-names></name>
<name><surname>Naidu</surname> <given-names>M.</given-names></name>
<name><surname>Santiago-Rodriguez</surname> <given-names>T.</given-names></name>
<etal/>
</person-group>. (<year>2014</year>). 
<article-title>Altered oral viral ecology in association with periodontal disease</article-title>. <source>mBio</source> <volume>5</volume>, <fpage>e01133</fpage>&#x2013;<lpage>e01114</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1128/mBio.01133-14</pub-id>, PMID: <pub-id pub-id-type="pmid">24846382</pub-id>
</mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Matrishin</surname> <given-names>C. B.</given-names></name>
<name><surname>Haase</surname> <given-names>E. M.</given-names></name>
<name><surname>Dewhirst</surname> <given-names>F. E.</given-names></name>
<name><surname>Mark Welch</surname> <given-names>J. L.</given-names></name>
<name><surname>Miranda-Sanchez</surname> <given-names>F.</given-names></name>
<name><surname>Chen</surname> <given-names>T.</given-names></name>
<etal/>
</person-group>. (<year>2023</year>). 
<article-title>Phages are unrecognized players in the ecology of the oral pathogen <italic>Porphyromonas gingivalis</italic></article-title>. <source>Microbiome</source> <volume>11</volume>, <fpage>161</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s40168-023-01607-w</pub-id>, PMID: <pub-id pub-id-type="pmid">37491415</pub-id>
</mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Murakami</surname> <given-names>S.</given-names></name>
<name><surname>Mealey</surname> <given-names>B. L.</given-names></name>
<name><surname>Mariotti</surname> <given-names>A.</given-names></name>
<name><surname>Chapple</surname> <given-names>I. L. C.</given-names></name>
</person-group> (<year>2018</year>). 
<article-title>Dental plaque-induced gingival conditions</article-title>. <source>J. Clin. Periodontol.</source> <volume>45</volume>, <fpage>S17</fpage>&#x2013;<lpage>S27</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/jcpe.12937</pub-id>, PMID: <pub-id pub-id-type="pmid">29926503</pub-id>
</mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Natarajan</surname> <given-names>P. M.</given-names></name>
<name><surname>Umapathy</surname> <given-names>V. R.</given-names></name>
</person-group> (<year>2025</year>). 
<article-title>Role of transcriptomics in the study of oral cancer</article-title>. <source>Front. Oral. Health</source> <volume>6</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/froh.2025.1524364</pub-id>, PMID: <pub-id pub-id-type="pmid">40791779</pub-id>
</mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Nerella</surname> <given-names>S.</given-names></name>
<name><surname>Bandyopadhyay</surname> <given-names>S.</given-names></name>
<name><surname>Zhang</surname> <given-names>J.</given-names></name>
<name><surname>Contreras</surname> <given-names>M.</given-names></name>
<name><surname>Siegel</surname> <given-names>S.</given-names></name>
<name><surname>Bumin</surname> <given-names>A.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>Transformers and large language models in healthcare: a review</article-title>. <source>Artif. Intell. Med.</source> <volume>154</volume>, <elocation-id>102900</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.artmed.2024.102900</pub-id>, PMID: <pub-id pub-id-type="pmid">38878555</pub-id>
</mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Pan</surname> <given-names>J.</given-names></name>
<name><surname>You</surname> <given-names>W.</given-names></name>
<name><surname>Lu</surname> <given-names>X.</given-names></name>
<name><surname>Wang</surname> <given-names>S.</given-names></name>
<name><surname>You</surname> <given-names>Z.</given-names></name>
<name><surname>Sun</surname> <given-names>Y.</given-names></name>
</person-group> (<year>2023</year>). 
<article-title>GSPHI: a novel deep learning model for predicting phage&#x2013;host interactions via multiple biological information</article-title>. <source>Comput. Struct. Biotechnol. J.</source> <volume>21</volume>, <fpage>3404</fpage>&#x2013;<lpage>3413</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.csbj.2023.06.026</pub-id>, PMID: <pub-id pub-id-type="pmid">37397626</pub-id>
</mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Przymus</surname> <given-names>P.</given-names></name>
<name><surname>Rykaczewski</surname> <given-names>K.</given-names></name>
<name><surname>Mart&#xed;n-Segura</surname> <given-names>A.</given-names></name>
<name><surname>Truu</surname> <given-names>J.</given-names></name>
<name><surname>Carrillo De Santa Pau</surname> <given-names>E.</given-names></name>
<name><surname>Kolev</surname> <given-names>M.</given-names></name>
<etal/>
</person-group>. (<year>2025</year>). 
<article-title>Deep learning in microbiome analysis: a comprehensive review of neural network models</article-title>. <source>Front. Microbiol.</source> <volume>15</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fmicb.2024.1516667</pub-id>, PMID: <pub-id pub-id-type="pmid">39911715</pub-id>
</mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Shang</surname> <given-names>J.</given-names></name>
<name><surname>Sun</surname> <given-names>Y.</given-names></name>
</person-group> (<year>2021</year>). 
<article-title>Predicting the hosts of prokaryotic viruses using GCN-based semi-supervised learning</article-title>. <source>BMC Biol.</source> <volume>19</volume>, <fpage>250</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12915-021-01166-7</pub-id>, PMID: <pub-id pub-id-type="pmid">34819064</pub-id>
</mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Shang</surname> <given-names>J.</given-names></name>
<name><surname>Sun</surname> <given-names>Y.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>CHERRY: a computational method for accurate prediction of virus&#x2013;prokaryotic interactions using a graph encoder-decoder model</article-title>. <source>Brief Bioinform.</source> <volume>23</volume>, <elocation-id>bbac182</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/bib/bbac182</pub-id>, PMID: <pub-id pub-id-type="pmid">35595715</pub-id>
</mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Sun</surname> <given-names>C.</given-names></name>
<name><surname>Thomas</surname> <given-names>B. C.</given-names></name>
<name><surname>Barrangou</surname> <given-names>R.</given-names></name>
<name><surname>Banfield</surname> <given-names>J. F.</given-names></name>
</person-group> (<year>2016</year>). 
<article-title>Metagenomic reconstructions of bacterial CRISPR loci constrain population histories</article-title>. <source>ISME J.</source> <volume>10</volume>, <fpage>858</fpage>&#x2013;<lpage>870</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/ismej.2015.162</pub-id>, PMID: <pub-id pub-id-type="pmid">26394009</pub-id>
</mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Szafra&#x144;ski</surname> <given-names>S. P.</given-names></name>
<name><surname>Slots</surname> <given-names>J.</given-names></name>
<name><surname>Stiesch</surname> <given-names>M.</given-names></name>
</person-group> (<year>2021</year>). 
<article-title>The human oral phageome</article-title>. <source>Periodontol. 2000</source> <volume>86</volume>, <fpage>79</fpage>&#x2013;<lpage>96</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/prd.12367</pub-id>, PMID: <pub-id pub-id-type="pmid">33690937</pub-id>
</mixed-citation>
</ref>
<ref id="B28">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>C. W.</given-names></name>
<name><surname>Hao</surname> <given-names>Y.</given-names></name>
<name><surname>Di Gianfilippo</surname> <given-names>R.</given-names></name>
<name><surname>Sugai</surname> <given-names>J.</given-names></name>
<name><surname>Li</surname> <given-names>J.</given-names></name>
<name><surname>Gong</surname> <given-names>W.</given-names></name>
<etal/>
</person-group>. (<year>2021</year>). 
<article-title>Machine learning-assisted immune profiling stratifies peri-implantitis patients with unique microbial colonization and clinical outcomes</article-title>. <source>Theranostics</source> <volume>11</volume>, <fpage>6703</fpage>&#x2013;<lpage>6716</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.7150/thno.57775</pub-id>, PMID: <pub-id pub-id-type="pmid">34093848</pub-id>
</mixed-citation>
</ref>
<ref id="B29">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>J.</given-names></name>
</person-group> (<year>2025</year>). 
<article-title>Multi-modal topology-aware graph neural network for robust chemical&#x2013;protein interaction prediction</article-title>. <source>Int. J. Mol. Sci.</source> <volume>26</volume>, <elocation-id>8666</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/ijms26178666</pub-id>, PMID: <pub-id pub-id-type="pmid">40943585</pub-id>
</mixed-citation>
</ref>
<ref id="B30">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wei</surname> <given-names>A.</given-names></name>
<name><surname>Xiao</surname> <given-names>Z.</given-names></name>
<name><surname>Fu</surname> <given-names>L.</given-names></name>
<name><surname>Zhao</surname> <given-names>W.</given-names></name>
<name><surname>Jiang</surname> <given-names>X.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Predicting phage&#x2013;host interactions via feature augmentation and regional graph convolution</article-title>. <source>Brief Bioinform.</source> <volume>26</volume>, <elocation-id>bbad530</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/bib/bbad530</pub-id>, PMID: <pub-id pub-id-type="pmid">39727002</pub-id>
</mixed-citation>
</ref>
<ref id="B31">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Yadalam</surname> <given-names>P. K.</given-names></name>
<name><surname>Anegundi</surname> <given-names>R. V.</given-names></name>
<name><surname>Natarajan</surname> <given-names>P. M.</given-names></name>
<name><surname>Ardila</surname> <given-names>C. M.</given-names></name>
</person-group> (<year>2025</year>a). 
<article-title>Neural networks for predicting and classifying antimicrobial resistance sequences in <italic>Porphyromonas gingivalis</italic></article-title>. <source>Int. Dent J.</source> <volume>75</volume>, <elocation-id>100890</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.identj.2025.100890</pub-id>, PMID: <pub-id pub-id-type="pmid">40618714</pub-id>
</mixed-citation>
</ref>
<ref id="B32">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Yadalam</surname> <given-names>P. K.</given-names></name>
<name><surname>Ayyachamy</surname> <given-names>S.</given-names></name>
<name><surname>Barbosa</surname> <given-names>F. T.</given-names></name>
<name><surname>Natarajan</surname> <given-names>P. M.</given-names></name>
</person-group> (<year>2025</year>b). 
<article-title>Kolmogorov&#x2013;Arnold networks for predicting drug&#x2013;gene associations of HDAC1 inhibitors in periodontitis</article-title>. <source>Comput. Biol. Chem.</source> <volume>118</volume>, <elocation-id>108451</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compbiolchem.2025.108451</pub-id>, PMID: <pub-id pub-id-type="pmid">40233475</pub-id>
</mixed-citation>
</ref>
<ref id="B33">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Yuan</surname> <given-names>K.</given-names></name>
<name><surname>Yoon</surname> <given-names>C. H.</given-names></name>
<name><surname>Gu</surname> <given-names>Q.</given-names></name>
<name><surname>Munby</surname> <given-names>H.</given-names></name>
<name><surname>Walker</surname> <given-names>A. S.</given-names></name>
<name><surname>Zhu</surname> <given-names>T.</given-names></name>
<etal/>
</person-group>. (<year>2025</year>). 
<article-title>Transformers and large language models are efficient feature extractors for electronic health record studies</article-title>. <source>Commun. Med.</source> <volume>5</volume>, <fpage>83</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s43856-025-00790-1</pub-id>, PMID: <pub-id pub-id-type="pmid">40119150</pub-id>
</mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn id="n1" fn-type="custom" custom-type="edited-by">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3287705">Priyanka Debta</ext-link>, Institute of Dental Science, Siksha&#x2019; O&#x2019; Anusandhan, India</p></fn>
<fn id="n2" fn-type="custom" custom-type="reviewed-by">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/790832">Yuanyuan Ma</ext-link>, Hubei University of Arts and Science, China</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3086100">Eman Alqaissi</ext-link>, King Khalid University, Saudi Arabia</p></fn>
</fn-group>
</back>
</article>