<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Genet.</journal-id>
<journal-title>Frontiers in Genetics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Genet.</abbrev-journal-title>
<issn pub-type="epub">1664-8021</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1511521</article-id>
<article-id pub-id-type="doi">10.3389/fgene.2025.1511521</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Genetics</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Dual graph-embedded fusion network for predicting potential microbe-disease associations with sequence learning</article-title>
<alt-title alt-title-type="left-running-head">Wu et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fgene.2025.1511521">10.3389/fgene.2025.1511521</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Wu</surname>
<given-names>Junlong</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2868279/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Xiao</surname>
<given-names>Liqi</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2906226/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Fan</surname>
<given-names>Liu</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Wang</surname>
<given-names>Lei</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/664933/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Zhu</surname>
<given-names>Xianyou</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>College of Computer Science and Technology</institution>, <institution>Hengyang Normal University</institution>, <addr-line>Hengyang</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Technology Innovation Center of Changsha</institution>, <institution>Changsha University</institution>, <addr-line>Changsha</addr-line>, <country>China</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Hunan Engineering Research Center of Cyberspace Security Technology and Applications</institution>, <institution>Hengyang Normal University</institution>, <addr-line>Hengyang</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2516648/overview">Marco Mesiti</ext-link>, University of Milan, Italy</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/887632/overview">Gianvito Pio</ext-link>, University of Bari Aldo Moro, Italy</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1973522/overview">Li ZeJun</ext-link>, Hunan Institute of Technology, China</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Lei Wang, <email>wanglei@xtu.edu.cn</email>; Xianyou Zhu, <email>zxy@hynu.edu.cn</email>
</corresp>
</author-notes>
<pub-date pub-type="epub">
<day>11</day>
<month>02</month>
<year>2025</year>
</pub-date>
<pub-date pub-type="collection">
<year>2025</year>
</pub-date>
<volume>16</volume>
<elocation-id>1511521</elocation-id>
<history>
<date date-type="received">
<day>15</day>
<month>10</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>15</day>
<month>01</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2025 Wu, Xiao, Fan, Wang and Zhu.</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Wu, Xiao, Fan, Wang and Zhu</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Recent studies indicate that microorganisms are crucial for maintaining human health. Dysbiosis, or an imbalance in these microbial communities, is strongly linked to a variety of human diseases. Therefore, understanding the impact of microbes on disease is essential. The DuGEL model leverages the strengths of graph convolutional neural network (GCN) and graph attention network (GAT), ensuring that both local and global relationships within the microbe-disease association network are captured. The integration of the Long Short-Term Memory Network (LSTM) further enhances the model&#x2019;s ability to understand sequential dependencies in the feature representations. This comprehensive approach allows DuGEL to achieve a high level of accuracy in predicting potential microbe-disease associations, making it a valuable tool for biomedical research and the discovery of new therapeutic targets. By combining advanced graph-based and sequence-based learning techniques, DuGEL addresses the limitations of existing methods and provides a robust framework for the prediction of microbe-disease associations. To evaluate the performance of DuGEL, we conducted comprehensive comparative experiments and case studies based on two databases, HMDAD, and Disbiome to demonstrate that DuGEL can effectively predict potential microbe-disease associations.</p>
</abstract>
<kwd-group>
<kwd>long and short-term memory networks</kwd>
<kwd>graph attention networks</kwd>
<kwd>microbe-disease associations</kwd>
<kwd>graph convolutional neural networks</kwd>
<kwd>full connectivity</kwd>
</kwd-group>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Computational Genomics</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>Microorganisms play an important and far-reaching role in human life and greatly impact our health (<xref ref-type="bibr" rid="B34">Liang et al., 2018</xref>). Recent reports indicate that the human body is host to trillions of microorganisms (<xref ref-type="bibr" rid="B24">Hoffmann et al., 2016</xref>) and that the number of microorganisms in the human body far exceeds the number of human cells (<xref ref-type="bibr" rid="B8">Bocci, 1992</xref>). These microorganisms constitute the microbiota in the human body (<xref ref-type="bibr" rid="B70">Zhu et al., 2010</xref>). The microbiome plays a critical role in human physiology (<xref ref-type="bibr" rid="B23">Heintz-Buschart and Wilmes, 2018</xref>), helping the body&#x2019;s intestinal tract to reduce the growth of pathogenic bacteria and infections and synthesizing some of the vitamins and amino acids needed by the body (<xref ref-type="bibr" rid="B25">Islam et al., 2022</xref>). Suppose the microbial community in the human body is out of balance. In that case, it can impair the function of the immune system, increase the risk of infection with pathogens (<xref ref-type="bibr" rid="B48">Pickard et al., 2017</xref>), lead to malnutrition or nutritional deficiencies (<xref ref-type="bibr" rid="B9">Burr et al., 2020</xref>), and contribute to the development of mental health-related problems such as anxiety and depression (<xref ref-type="bibr" rid="B4">Anisman et al., 2018</xref>), as well as metabolic diseases such as obesity and diabetes (<xref ref-type="bibr" rid="B49">Sanz et al., 2015</xref>). Of course, the microbiota can help the body regulate and prevent attacks from bacteria outside the body (<xref ref-type="bibr" rid="B6">Barr, 2017</xref>); for example, actinomycetes are a class of antibiotic-producing bacteria that produce a wide range of antibiotics such as streptomycin and tetracycline (<xref ref-type="bibr" rid="B22">Grasso et al., 2016</xref>). These antibiotics inhibit the growth of other pathogenic microorganisms and help protect the body from infection (<xref ref-type="bibr" rid="B26">Jagannathan et al., 2021</xref>). Therefore, predicting potential associations between microorganisms and diseases is vital for unraveling the complex mechanisms of disease occurrence and discovering potential biomarkers (<xref ref-type="bibr" rid="B45">Montaner et al., 2020</xref>). By inferring the interactions between microorganisms and diseases, we can better understand the diagnosis and prognosis of diseases and provide new ideas and methods for preventing, diagnosing, and treating diseases (<xref ref-type="bibr" rid="B40">Malla et al., 2019</xref>). As technology advances, we no longer rely solely on traditional biological methods to explore the association between microbes and disease (<xref ref-type="bibr" rid="B21">Gilbert et al., 2016</xref>). Instead, we are increasingly introducing computational modelling into our research to predict the role of microbes in disease occurrence, development, and treatment through techniques such as big data analytics and deep learning (<xref ref-type="bibr" rid="B42">Marcos-Zambrano et al., 2021</xref>), which are more practical and accurate (<xref ref-type="bibr" rid="B47">Najafabadi et al., 2015</xref>). Researchers have recently established a series of microbe-disease association databases to conduct an in-depth study of the potential link between microbes and diseases (<xref ref-type="bibr" rid="B29">Jin et al., 2022</xref>). These databases combine a large amount of microbial composition data and disease information. For example, the HMDAD database created by Ma et al. became the first to document human microbe-disease associations by manually organizing a large amount of public literature (<xref ref-type="bibr" rid="B38">Ma et al., 2017</xref>). This database covers 483 pieces of information about the association between 39 diseases and 292 microorganisms. Second, Janssens et al. created a microbial-disease association database called Disbiome by collecting 10,922 experimental records from 1,191 documents containing 372 diseases and 1,622 microorganisms (<xref ref-type="bibr" rid="B27">Janssens et al., 2018</xref>).</p>
<p>Researchers can explore and discover the relationships between microorganisms and different diseases using the above microbe-disease association database as the primary data. Moreover, these recent technological tools can be broadly categorized into four types, namely, network-based methods (<xref ref-type="bibr" rid="B61">Wu et al., 2018</xref>), matrix decomposition-based methods (<xref ref-type="bibr" rid="B50">Shen et al., 2021</xref>), traditional machine learning-based methods (<xref ref-type="bibr" rid="B2">Afshari et al., 2022</xref>), and graph neural network-based methods (<xref ref-type="bibr" rid="B33">Li et al., 2023</xref>).</p>
<p>In DuGEL, we use both Graph Convolutional Neural Network (GCN) and Graph Attention Network (GAT), where GCN is specifically designed to process graph data (<xref ref-type="bibr" rid="B28">Jin et al., 2021</xref>). GCN can learn feature representations at both node and graph levels, and it achieves the task of learning and predicting the representations of graph data by efficiently exploiting the connectivity relationships between the nodes (<xref ref-type="bibr" rid="B69">Zhou et al., 2023</xref>). By adaptively learning the attention weights between each node and its neighbouring nodes, GAT can better capture local structural information in graph data. The GAT introduction enriches the representational capabilities of the graph neural network, allowing it to perform well when dealing with complex graph-structured data (<xref ref-type="bibr" rid="B46">Munikoti et al., 2023</xref>). DuGEL can adapt to an extensive range of datasets with solid robustness.</p>
<p>Unlike the above methods, in this paper, we designed a new computational model called DuGEL based on the graph convolutional neural network and the graph attention network to infer possible microbe-disease associations. In DuGEL, we first downloaded known microbe-disease associations to form a heterogeneous microbe-disease network. Then, we input this network into a graph convolutional neural network and a graph attention network separately to obtain the local and global features of nodes in the network. Next, we spliced the outputs of the graph convolutional neural network and the graph attention network and then introduced a Long Short-Term Memory (LSTM) network to process the fused features. Finally, the output of the LSTM would be passed to a fully connected layer to infer potential associations between microbes and diseases. Experiments showed that DuGEL obtained satisfactory predictive performance with a 5-fold cross-validated auc of 0.9698 and 0.9119 for HMDAD and Disbiome datasets, respectively, and may be a potential tool for future microbe-disease association prediction.</p>
</sec>
<sec sec-type="materials|methods" id="s2">
<title>2 Materials and methods</title>
<sec id="s2-1">
<title>2.1 Datasets</title>
<p>HMDAD, constructed by <xref ref-type="bibr" rid="B38">Ma et al. (2017)</xref>, and Disbiome (<xref ref-type="bibr" rid="B27">Janssens et al., 2018</xref>), constructed by Janssens et al., are the main publicly available biomedical databases containing microbe-disease association data. As shown in <xref ref-type="table" rid="T1">Table 1</xref>, HMDAD database covers 483 known microbe-disease associations, and processing these data, we ended up with 450 known microbial-disease associations. The HMDAD database provides a valuable information resource for studying microbial-disease relationships. In addition, the Disbiome, constructed by Janssens et al., is a publicly available database of microbe-disease associations. As shown in <xref ref-type="table" rid="T1">Table 1</xref>, Disbiome database collects 5,573 known associations from published academic papers for 240 diseases and 1,098 microorganisms. After de-duplication, we had 4,351 known microbe-disease associations covering 218 diseases and 1,052 microorganisms. Due to its extensive data collection and detailed information records, the Disbiome database has become a vital data support for research in this field.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>The statistics of the two databases.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Datasets</th>
<th align="left">Microbes</th>
<th align="left">Diseases</th>
<th align="left">Associations</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">HMDAD</td>
<td align="left">292</td>
<td align="left">39</td>
<td align="left">450</td>
</tr>
<tr>
<td align="left">Disbiome</td>
<td align="left">1,052</td>
<td align="left">218</td>
<td align="left">4,351</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>After acquiring the initial data, we performed data preprocessing steps to ensure the quality of the data and the validity of the model training. First, we removed all duplicate records to ensure that the association of each microbe with a disease was unique. Further, we converted the data into a uniform format to facilitate subsequent processing and model training. For simplicity, for each dataset, let <inline-formula id="inf1">
<mml:math id="m1">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="{" close="}" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>m</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:msub>
<mml:mi>m</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>m</mml:mi>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>M</mml:mi>
</mml:msub>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> denote the set of newly downloaded different microorganisms, and <inline-formula id="inf2">
<mml:math id="m2">
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="{" close="}" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> represent the set of newly downloaded different diseases. Thus, we can construct a primitive known microbe-disease association network <inline-formula id="inf3">
<mml:math id="m3">
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2329;</mml:mo>
<mml:mi>M</mml:mi>
<mml:mo>&#x222a;</mml:mo>
<mml:mrow>
<mml:mfenced open="" close="&#x232a;" separators="|">
<mml:mrow>
<mml:mrow>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>D</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>E</mml:mi>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> as follows: for any given <inline-formula id="inf4">
<mml:math id="m4">
<mml:mrow>
<mml:msub>
<mml:mi>m</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>&#x2264;</mml:mo>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf5">
<mml:math id="m5">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>j</mml:mi>
<mml:mo>&#x2264;</mml:mo>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, if and only if there is a known association between them, we assume that there is an edge belonging to <inline-formula id="inf6">
<mml:math id="m6">
<mml:mrow>
<mml:mi>E</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. Obviously, based on above definition, we can obtain an adjacency matrix <inline-formula id="inf7">
<mml:math id="m7">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> as follows: for any given <inline-formula id="inf8">
<mml:math id="m8">
<mml:mrow>
<mml:msub>
<mml:mi>m</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>&#x2264;</mml:mo>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf9">
<mml:math id="m9">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>j</mml:mi>
<mml:mo>&#x2264;</mml:mo>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> if and only if there is an edge between them in <inline-formula id="inf10">
<mml:math id="m10">
<mml:mrow>
<mml:mi>E</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, there is <inline-formula id="inf11">
<mml:math id="m11">
<mml:mrow>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, otherwise, there is <inline-formula id="inf12">
<mml:math id="m12">
<mml:mrow>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</sec>
<sec id="s2-2">
<title>2.2 Multiple similarity calculation of disease</title>
<sec id="s2-2-1">
<title>2.2.1 Gaussian interaction profile kernel similarity of disease</title>
<p>Based on the assumption that two similar diseases will show similar interaction and non-interaction relationships with the same microorganism, in this section, we adapt the Gaussian interaction profile kernel similarity between a pair of diseases <inline-formula id="inf13">
<mml:math id="m13">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">d</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf14">
<mml:math id="m14">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">d</mml:mi>
<mml:mi mathvariant="normal">j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> as follows:<disp-formula id="e1">
<mml:math id="m15">
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mi>D</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="italic">exp</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>&#x3bb;</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
<mml:msup>
<mml:mrow>
<mml:mfenced open="&#x2016;" close="&#x2016;" separators="|">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mo>:</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mo>:</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>Where <inline-formula id="inf15">
<mml:math id="m16">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mo>:</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf16">
<mml:math id="m17">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mo>:</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> represent the <inline-formula id="inf17">
<mml:math id="m18">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="normal">i</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">t</mml:mi>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf18">
<mml:math id="m19">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="normal">j</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">t</mml:mi>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> rows of the adjacency matrix <inline-formula id="inf19">
<mml:math id="m20">
<mml:mrow>
<mml:mi mathvariant="normal">A</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> respectively, and <inline-formula id="inf20">
<mml:math id="m21">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">&#x3bb;</mml:mi>
<mml:mi mathvariant="normal">d</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> denotes the normalized kernel bandwidths that can be calculated as follows:<disp-formula id="e2">
<mml:math id="m22">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3bb;</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:msub>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
</mml:msubsup>
</mml:mstyle>
<mml:mrow>
<mml:mrow>
<mml:mfenced open="&#x2016;" close="" separators="|">
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mo>:</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msup>
<mml:mo>&#x2016;</mml:mo>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
</sec>
<sec id="s2-2-2">
<title>2.2.2 Cosine similarity of disease</title>
<p>Based on the assumption that if two diseases are similar to each other, then their cosine curves will be more coincident, we introduce the cosine similarity between a pair of diseases <inline-formula id="inf21">
<mml:math id="m23">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">d</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf22">
<mml:math id="m24">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">d</mml:mi>
<mml:mi mathvariant="normal">j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> as follows:<disp-formula id="e3">
<mml:math id="m25">
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>D</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mo>:</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mo>:</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mo>:</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#xd7;</mml:mo>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mo>:</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
<p>The result of cosine similarity has good stability and certainty, the calculation speed is fast and the result is more intuitive. Suitable for large-scale information retrieval. Where <inline-formula id="inf23">
<mml:math id="m26">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mo>:</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mo>:</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> denotes multiplying the vectors of row i and row <inline-formula id="inf24">
<mml:math id="m27">
<mml:mrow>
<mml:mi mathvariant="normal">j</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> , <inline-formula id="inf25">
<mml:math id="m28">
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mo>:</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula> represents the mode of <inline-formula id="inf26">
<mml:math id="m29">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mo>:</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula id="inf27">
<mml:math id="m30">
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mo>:</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula> represents the mode of <inline-formula id="inf28">
<mml:math id="m31">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mo>:</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> . <inline-formula id="inf29">
<mml:math id="m32">
<mml:mrow>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mo>:</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2a;</mml:mo>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mo>:</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> represents the multiplication of two moduli, and then the vector&#x2019;s product removes the modulus&#x2019;s value. Finally, the cosine value of the angle between the two diseases is obtained, that is the cosine similarity. The calculation result of cosine similarity is between &#x2212;1 and 1. When the similarity between two diseases is exceptionally high, the calculation result tends to be 1. When the similarity between two diseases is very low, the calculation result tends to &#x2212;1.</p>
</sec>
<sec id="s2-2-3">
<title>2.2.3 Functional similarity of disease</title>
<p>Based on the assumption that similar diseases tend to interact with similar genes, in this section, we calculate the disease functional similarity based on the functional associations between disease-related genes as follows: Firstly, we download the gene interactions from the HumanNet database in which, every interaction has an associated log-likelihood score (LLS). And then, for any given diseases <inline-formula id="inf30">
<mml:math id="m33">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">d</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf31">
<mml:math id="m34">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">d</mml:mi>
<mml:mi mathvariant="normal">j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, let <inline-formula id="inf32">
<mml:math id="m35">
<mml:mrow>
<mml:msub>
<mml:mi>G</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="{" close="}" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:msub>
<mml:mi>i</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:msub>
<mml:mi>i</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:msub>
<mml:mi>i</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf33">
<mml:math id="m36">
<mml:mrow>
<mml:msub>
<mml:mi>G</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="{" close="}" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:msub>
<mml:mi>j</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:msub>
<mml:mi>j</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:msub>
<mml:mi>j</mml:mi>
<mml:mi>n</mml:mi>
</mml:msub>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> be the gene sets of <inline-formula id="inf34">
<mml:math id="m37">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">d</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf35">
<mml:math id="m38">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">d</mml:mi>
<mml:mi mathvariant="normal">j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> separately, we will define the functional similarity between <inline-formula id="inf36">
<mml:math id="m39">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">d</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf37">
<mml:math id="m40">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">d</mml:mi>
<mml:mi mathvariant="normal">j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> as follows:<disp-formula id="e4">
<mml:math id="m41">
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>F</mml:mi>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msub>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mi>G</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
</mml:mstyle>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:msub>
<mml:mi>G</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msub>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mi>G</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
</mml:mstyle>
<mml:mrow>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:msub>
<mml:mi>G</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>where <inline-formula id="inf38">
<mml:math id="m42">
<mml:mrow>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:msub>
<mml:mi>G</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>q</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi mathvariant="italic">max</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mi>G</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>q</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf39">
<mml:math id="m43">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>q</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is the functional similarity score between the genes <inline-formula id="inf40">
<mml:math id="m44">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">g</mml:mi>
<mml:mi mathvariant="normal">p</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf41">
<mml:math id="m45">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">g</mml:mi>
<mml:mi mathvariant="normal">q</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, which can be calculated as follows:<disp-formula id="e5">
<mml:math id="m46">
<mml:mrow>
<mml:mtext>FSS</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>q</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="{" close="" separators="|">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mn>1</mml:mn>
</mml:mtd>
<mml:mtd>
<mml:mrow>
<mml:mtext>if&#x2009;</mml:mtext>
<mml:mi>p</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mfrac>
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mi>L</mml:mi>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>q</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>L</mml:mi>
<mml:mi>L</mml:mi>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>min</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mi>L</mml:mi>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>max</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>L</mml:mi>
<mml:mi>L</mml:mi>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>min</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mtd>
<mml:mtd>
<mml:mrow>
<mml:mtext>if&#x2009;</mml:mtext>
<mml:mi>p</mml:mi>
<mml:mo>&#x2260;</mml:mo>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>where <inline-formula id="inf42">
<mml:math id="m47">
<mml:mrow>
<mml:msub>
<mml:mtext>LLS</mml:mtext>
<mml:mi>max</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf43">
<mml:math id="m48">
<mml:mrow>
<mml:msub>
<mml:mtext>LLS</mml:mtext>
<mml:mi>min</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> represent the maximum value of LLS and the minimum value of <inline-formula id="inf44">
<mml:math id="m49">
<mml:mrow>
<mml:mtext>LLS</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> in HumanNet, respectively.</p>
<p>Thereafter, by combining the above GIP kernel similarity, disease cosine similarity, and functional similarity of disease, we can obtain an integrated similarity matrix of disease as follows:<disp-formula id="e6">
<mml:math id="m50">
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>S</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mi>D</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>C</mml:mi>
<mml:mi>D</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>D</mml:mi>
<mml:mi>F</mml:mi>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mn>3</mml:mn>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
</sec>
</sec>
<sec id="s2-3">
<title>2.3 Multiple similarity calculation of microbe</title>
<sec id="s2-3-1">
<title>2.3.1 Gaussian interaction profile kernel similarity of microbe</title>
<p>In the same way, we can calculate the gaussian interaction profile kernel similarity between any two microbes <inline-formula id="inf45">
<mml:math id="m51">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">m</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf46">
<mml:math id="m52">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">m</mml:mi>
<mml:mi mathvariant="normal">j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> as follows:<disp-formula id="e7">
<mml:math id="m53">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>D</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>m</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>m</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="italic">exp</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>&#x3bb;</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:msup>
<mml:mrow>
<mml:mfenced open="&#x2016;" close="&#x2016;" separators="|">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mo>:</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mo>:</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>where <inline-formula id="inf47">
<mml:math id="m54">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mo>:</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf48">
<mml:math id="m55">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mo>:</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> represent the <inline-formula id="inf49">
<mml:math id="m56">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="normal">i</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">t</mml:mi>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf50">
<mml:math id="m57">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="normal">j</mml:mi>
<mml:mrow>
<mml:mi mathvariant="normal">t</mml:mi>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> columns of the adjacency matrix <inline-formula id="inf51">
<mml:math id="m58">
<mml:mrow>
<mml:mi mathvariant="normal">A</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> respectively, and <inline-formula id="inf52">
<mml:math id="m59">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3bb;</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> denotes the normalized kernel bandwidths that can be calculated as follows:<disp-formula id="e8">
<mml:math id="m60">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3bb;</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>n</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:msub>
<mml:mi>n</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:msubsup>
</mml:mstyle>
<mml:mrow>
<mml:mrow>
<mml:mfenced open="&#x2016;" close="" separators="|">
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mo>:</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msup>
<mml:mo>&#x2016;</mml:mo>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
</sec>
<sec id="s2-3-2">
<title>2.3.2 Cosine similarity of microbe</title>
<p>Similarly, the cosine similarity between any two microbes <inline-formula id="inf53">
<mml:math id="m61">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">m</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf54">
<mml:math id="m62">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">m</mml:mi>
<mml:mi mathvariant="normal">j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> can be obtained as follows:<disp-formula id="e9">
<mml:math id="m63">
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>M</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>m</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>m</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mo>:</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mo>:</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mo>:</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#xd7;</mml:mo>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mo>:</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
<p>The calculation process of cosine similarity between two microorganisms is the same as that of disease cosine similarity. Similarly, when the similarity between two microorganisms is exceptionally high, the calculation result tends to be 1. When the similarity between two microorganisms is very low, the calculation result tends to &#x2212;1.</p>
</sec>
<sec id="s2-3-3">
<title>2.3.3 Functional similarity of microbe</title>
<p>The functional similarity of the microbe is calculated by using the following method (<xref ref-type="bibr" rid="B67">Zhang et al., 2018</xref>): for any given disease <inline-formula id="inf55">
<mml:math id="m64">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="normal">d</mml:mi>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> , it is first raised Directed Acyclic Graph <inline-formula id="inf56">
<mml:math id="m65">
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>A</mml:mi>
<mml:msub>
<mml:mi>G</mml:mi>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>V</mml:mi>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula id="inf57">
<mml:math id="m66">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">V</mml:mi>
<mml:msub>
<mml:mi mathvariant="normal">d</mml:mi>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:msub>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> includes the disease <inline-formula id="inf58">
<mml:math id="m67">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">d</mml:mi>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and its ancestor diseases, <inline-formula id="inf59">
<mml:math id="m68">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">E</mml:mi>
<mml:msub>
<mml:mi mathvariant="normal">d</mml:mi>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:msub>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> contains all the directed edges from parent nodes to children nodes, and then, the semantic contribution of the disease <inline-formula id="inf60">
<mml:math id="m69">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">d</mml:mi>
<mml:mi mathvariant="normal">l</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> in <inline-formula id="inf61">
<mml:math id="m70">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">V</mml:mi>
<mml:msub>
<mml:mi mathvariant="normal">d</mml:mi>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:msub>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> to <inline-formula id="inf62">
<mml:math id="m71">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">d</mml:mi>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is defined as:<disp-formula id="e10">
<mml:math id="m72">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>l</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="{" close="" separators="|">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mn>1</mml:mn>
</mml:mtd>
<mml:mtd>
<mml:mrow>
<mml:mtext>&#x2009;if&#x2009;</mml:mtext>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>l</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi mathvariant="italic">max</mml:mi>
<mml:mrow>
<mml:mfenced open="{" close="}" separators="|">
<mml:mrow>
<mml:mn>0.5</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>S</mml:mi>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>l</mml:mi>
</mml:msub>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msubsup>
<mml:mi>d</mml:mi>
<mml:mi>l</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2223;</mml:mo>
<mml:msubsup>
<mml:mi>d</mml:mi>
<mml:mi>l</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
<mml:mo>&#x2208;</mml:mo>
<mml:mtext>children&#x2009;of&#x2009;</mml:mtext>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>l</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mtd>
<mml:mtd>
<mml:mrow>
<mml:mtext>&#x2009;otherwise&#x2009;</mml:mtext>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
<p>The semantic value of disease <inline-formula id="inf63">
<mml:math id="m73">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">d</mml:mi>
<mml:mi mathvariant="normal">t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is formulated by:<disp-formula id="e11">
<mml:math id="m74">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:msub>
<mml:mi>V</mml:mi>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>l</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mi>V</mml:mi>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:msub>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:mi>S</mml:mi>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>l</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
<p>Then, the semantic similarity between any two diseases <inline-formula id="inf64">
<mml:math id="m75">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">d</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf65">
<mml:math id="m76">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">d</mml:mi>
<mml:mi mathvariant="normal">j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> can be defined as follows:<disp-formula id="e12">
<mml:math id="m77">
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msub>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mi>V</mml:mi>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:msub>
<mml:mo>&#x2229;</mml:mo>
<mml:msub>
<mml:mi>V</mml:mi>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:msub>
</mml:mrow>
</mml:msub>
</mml:mstyle>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>l</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>S</mml:mi>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>l</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:msub>
<mml:mi>V</mml:mi>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>S</mml:mi>
<mml:msub>
<mml:mi>V</mml:mi>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
<p>Relying on the above formulae, we can further define the similarity between the disease <inline-formula id="inf66">
<mml:math id="m78">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">d</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and a set of diseases <inline-formula id="inf67">
<mml:math id="m79">
<mml:mrow>
<mml:mi mathvariant="normal">D</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> as follows:<disp-formula id="e13">
<mml:math id="m80">
<mml:mrow>
<mml:mtext>DS</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>max</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mtext>DSS</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>Hence, for any two given microbes <inline-formula id="inf68">
<mml:math id="m81">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">m</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf69">
<mml:math id="m82">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">m</mml:mi>
<mml:mi mathvariant="normal">j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> , we can calculate the function similarity between them as follows:<disp-formula id="e14">
<mml:math id="m83">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>F</mml:mi>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>m</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>m</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msub>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
</mml:mstyle>
<mml:mi>D</mml:mi>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msub>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
</mml:mstyle>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>where <inline-formula id="inf70">
<mml:math id="m84">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">D</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> denotes the set of diseases associated with the microbe <inline-formula id="inf71">
<mml:math id="m85">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">m</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> , and <inline-formula id="inf72">
<mml:math id="m86">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">D</mml:mi>
<mml:mi mathvariant="normal">j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> represents the set of diseases associated with the microbe <inline-formula id="inf73">
<mml:math id="m87">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">m</mml:mi>
<mml:mi mathvariant="normal">j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>.Obviously, by combining the above GIP kernel similarity, disease cosine similarity, and functional similarity of the microbe, we can obtain an integrated similarity matrix of the microbe as follows:<disp-formula id="e15">
<mml:math id="m88">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>S</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>D</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>C</mml:mi>
<mml:mi>M</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>M</mml:mi>
<mml:mi>F</mml:mi>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mn>3</mml:mn>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
</sec>
</sec>
<sec id="s2-4">
<title>2.4 Construction of the heterogeneous network</title>
<p>Based on above descriptions, it is easy to see that we can construct a heterogeneous network <inline-formula id="inf74">
<mml:math id="m89">
<mml:mrow>
<mml:mi mathvariant="normal">Y</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> by combining the integrated similarity matrix <inline-formula id="inf75">
<mml:math id="m90">
<mml:mrow>
<mml:mtext>DS</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> of disease and the integrated similarity matrix <inline-formula id="inf76">
<mml:math id="m91">
<mml:mrow>
<mml:mtext>MS</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> of microbe with the adjacency matrix <inline-formula id="inf77">
<mml:math id="m92">
<mml:mrow>
<mml:mi mathvariant="normal">A</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> as follows:<disp-formula id="e16">
<mml:math id="m93">
<mml:mrow>
<mml:mi>Y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>S</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:msup>
<mml:mi>A</mml:mi>
<mml:mi>T</mml:mi>
</mml:msup>
</mml:mtd>
</mml:mtr>
</mml:mtable>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mi>A</mml:mi>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>S</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
</sec>
<sec id="s2-5">
<title>2.5 Structure of the DuGEL</title>
<p>As illustrated in above <xref ref-type="fig" rid="F1">Figure 1</xref>, the DuGEL consists of the following five steps:<list list-type="simple">
<list-item>
<p>&#x2022; Step 1: Construct a heterogeneous microbe-disease network based on newly downloaded known microbe-disease associations and multiple microbe and disease similarity metrics.</p>
</list-item>
<list-item>
<p>&#x2022; Step 2: Feeding the heterogeneous microbe-disease network forward into a dual channel structure consisting of a Graph Convolutional Neural Network (GCN) and a Graph Attention Network (GAT), where the GCN is utilized to extract spatial features of nodes in the heterogeneous microbe-disease network from local to global, and the GAT is adopted to assign different importance to the neighbors of each node in the heterogeneous microbe-disease network as it is processed.</p>
</list-item>
<list-item>
<p>&#x2022; Step 3: Splicing the outputs of GCN and GAT by simply fusing the information captured by GCN and GAT and combining structural and node characteristics and the importance between neighboring nodes.</p>
</list-item>
<list-item>
<p>&#x2022; Step 4: Implementing a Long Short-Term Memory (LSTM) network to process the fused features, then feeding the output of LSTM into a fully connected layer to convert the high-level features captured by LSTM into the target output space.</p>
</list-item>
<list-item>
<p>&#x2022; Step 5: By feeding the newly obtained feature vectors of the target output space into a Sigmoid function for binary prediction, potential associations between microbes and diseases can be finally computed.</p>
</list-item>
</list>
</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>The structure of DuGEL.</p>
</caption>
<graphic xlink:href="fgene-16-1511521-g001.tif"/>
</fig>
</sec>
<sec id="s2-6">
<title>2.6 Microbe-disease representation layer</title>
<p>In DuGEL, the input layer is the Microbe-Disease Association Representation Layer, is a component used to convert raw data of known microbe-disease associations into a structured data fame that can be processed by subsequent graph neural networks. Firstly, the newly collected microbe-disease association data need to be pre-processed to ensure consistency and accuracy. The preprocessed data will be used in turn to construct a binary microbe-disease association matrix <italic>M</italic>, which implies potential relationships between microorganisms and disease (<xref ref-type="bibr" rid="B43">Marsh and Zaura, 2017</xref>), and can be defined as follows: Given a microorganism <inline-formula id="inf78">
<mml:math id="m94">
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and a disease <inline-formula id="inf79">
<mml:math id="m95">
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, he known relationship between the microorganism and the disease can be characterized by a binary association matrix <inline-formula id="inf80">
<mml:math id="m96">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, where the matrix element <inline-formula id="inf81">
<mml:math id="m97">
<mml:mrow>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is 1 if there is a known association between the microorganism <inline-formula id="inf82">
<mml:math id="m98">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and the disease <inline-formula id="inf83">
<mml:math id="m99">
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, and 0 otherwise. Each row of the matrix represents a microorganism, and each column represents a disease. The entries in the matrix indicate the presence or absence of an association. The graph structure <inline-formula id="inf84">
<mml:math id="m100">
<mml:mrow>
<mml:mi mathvariant="normal">G</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is obtained by the association matrix <inline-formula id="inf85">
<mml:math id="m101">
<mml:mrow>
<mml:mi>M</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, where the nodes in the graph are either microbe nodes or disease nodes, and if there is an association between a microbe node and a disease node (<xref ref-type="bibr" rid="B1">Abuin-Denis et al., 2024</xref>), i.e., <inline-formula id="inf86">
<mml:math id="m102">
<mml:mrow>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, then an edge exists between the nodes.</p>
<p>To enhance the prediction ability of DuGEL, similarity information is also fused in the representation layer (<xref ref-type="bibr" rid="B18">Feng et al., 2023</xref>), which contains the microbial similarity matrix <inline-formula id="inf87">
<mml:math id="m103">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and the disease similarity matrix <inline-formula id="inf88">
<mml:math id="m104">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. Among them, the microbial similarity matrix <inline-formula id="inf89">
<mml:math id="m105">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> captures similarities between different microorganisms based on genomic, phenotypic, or ecological characteristics. After the microbial similarity matrix <inline-formula id="inf90">
<mml:math id="m106">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and disease similarity matrix <inline-formula id="inf91">
<mml:math id="m107">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> are constructed, we fuse this similarity information with the microbe-disease association matrix <inline-formula id="inf92">
<mml:math id="m108">
<mml:mrow>
<mml:mi>M</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> to enhance the performance of the association prediction model. Therefore, at this point, we can obtain the association matrix that incorporates microbial similarity and disease similarity as a graph structure (<xref ref-type="bibr" rid="B64">Yu et al., 2024</xref>), which further serves as the initial input form for the dual graph-based feature extraction module. This graph structure captures direct associations and enables the model to learn a more comprehensive representation of features.</p>
<p>In addition, we will initialize each representative microbe or disease node in the graph with a feature vector. Formally, let <inline-formula id="inf93">
<mml:math id="m109">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> represent the initial feature matrix for microbes and <inline-formula id="inf94">
<mml:math id="m110">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> represent the initial feature matrix for diseases, where <inline-formula id="inf95">
<mml:math id="m111">
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the dimension of the feature vector. The initial embeddings <inline-formula id="inf96">
<mml:math id="m112">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf97">
<mml:math id="m113">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> are combined into a unified feature matrix <inline-formula id="inf98">
<mml:math id="m114">
<mml:mrow>
<mml:mi>X</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>. <inline-formula id="inf99">
<mml:math id="m115">
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is fed forward as input to the dual graph-based feature extraction module.</p>
<p>The microbe-disease association representation layer lays the foundation for the entire DuGEL model, and by meticulously structuring the input data, the layer ensures that subsequent graph neural networks are able to effectively capture both local and global patterns in the data. The construction of the graph enables the model to fully utilize all available information, thereby improving the accuracy and robustness of microbe-disease association predictions.</p>
</sec>
<sec id="s2-7">
<title>2.7 Bipartite graph feature extraction module</title>
<p>In this study, the dual graph feature extraction module is a core component of the DuGEL designed to extract deep features from the input microbe-disease association feature matrix. The module combines two parallel graph neural network architectures: graph convolutional network (GCN) and graph attention network (GAT), ensuring effective capture of local and global relationships in the graph.</p>
<sec id="s2-7-1">
<title>2.7.1 Graph convolution sublayer</title>
<p>The Graph Convolutional Network (GCN) module extracts the spatial features of the graph by processing the microbe-disease feature matrix <inline-formula id="inf100">
<mml:math id="m116">
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> to capture the intrinsic structural information of the graph. The spatial features represent the connections between nodes, including direct links (edges) and indirect influences through neighboring nodes. For example, in a microbe-disease graph, the direct association between a microbe and a disease, as well as second- or higher-order neighborhood relationships, contribute to the spatial information. The GCN extracts the features of the nodes by applying convolutional operations on the graph structure, and aggregates the information of the aggregated features of the local neighborhoods of each node by means of the layer-wise propagation rules (<xref ref-type="bibr" rid="B15">Du et al., 2024</xref>). The propagation rules of the GCN layers are defined as follows:<disp-formula id="e17">
<mml:math id="m117">
<mml:mrow>
<mml:msup>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msup>
<mml:mover accent="true">
<mml:mi>D</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:msup>
<mml:mover accent="true">
<mml:mi>A</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:msup>
<mml:mover accent="true">
<mml:mi>D</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:msup>
<mml:msup>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>where <inline-formula id="inf101">
<mml:math id="m118">
<mml:mrow>
<mml:msup>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is the node representation <inline-formula id="inf102">
<mml:math id="m119">
<mml:mrow>
<mml:mi mathvariant="normal">X</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> of the <inline-formula id="inf103">
<mml:math id="m120">
<mml:mrow>
<mml:mi mathvariant="normal">l</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> nd layer, <inline-formula id="inf104">
<mml:math id="m121">
<mml:mrow>
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> whose initial input is, is the weight matrix of the <inline-formula id="inf105">
<mml:math id="m122">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>-th layer, <inline-formula id="inf106">
<mml:math id="m123">
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> denotes the nonlinear activation function, <inline-formula id="inf107">
<mml:math id="m124">
<mml:mrow>
<mml:mover accent="true">
<mml:mi>A</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula> is the representation of the adjacency matrix <inline-formula id="inf108">
<mml:math id="m125">
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> plus the unitary matrix <inline-formula id="inf109">
<mml:math id="m126">
<mml:mrow>
<mml:mi>I</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula id="inf110">
<mml:math id="m127">
<mml:mrow>
<mml:mover accent="true">
<mml:mi>D</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula> is the corresponding <inline-formula id="inf111">
<mml:math id="m128">
<mml:mrow>
<mml:mover accent="true">
<mml:mi mathvariant="normal">A</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula> degree matrix.</p>
<p>GCN effectively smoothies the feature representation over the graph structure and ensures that the representation of each node is influenced by its neighbors (<xref ref-type="bibr" rid="B12">Chen et al., 2021</xref>), thus capturing the local structure information. By aggregating a node&#x2019;s neighbor information, the feature representation of a node is made to reflect its local graph structure. This aggregation operation is performed in each layer, and through the gradual aggregation of multiple layers (<xref ref-type="bibr" rid="B35">Liu et al., 2018</xref>), the GCN can capture a wider range of graph information. This is particularly important for microbe-disease association prediction, as some associations may not be directly visible, but indirectly inferred through multi-hop relationships.</p>
</sec>
<sec id="s2-7-2">
<title>2.7.2 Attention sublayer</title>
<p>The Graph Attention Network (GAT) module introduces an attention mechanism that assigns different importance coefficients to each node&#x2019;s neighbors. For each node <inline-formula id="inf112">
<mml:math id="m129">
<mml:mrow>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> in the graph, GAT computes an attention coefficient <inline-formula id="inf113">
<mml:math id="m130">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b1;</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> between node <inline-formula id="inf114">
<mml:math id="m131">
<mml:mrow>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and its neighbor node <inline-formula id="inf115">
<mml:math id="m132">
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, which is learned by a shared attention mechanism:<disp-formula id="e18">
<mml:math id="m133">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mtext>&#x2003;</mml:mtext>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi mathvariant="italic">exp</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>k</mml:mi>
<mml:mi>y</mml:mi>
<mml:mi>R</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>L</mml:mi>
<mml:mi>U</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msup>
<mml:mi>a</mml:mi>
<mml:mi>T</mml:mi>
</mml:msup>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2225;</mml:mo>
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msub>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="script">N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:msub>
</mml:mstyle>
<mml:mrow>
<mml:mi mathvariant="italic">exp</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>k</mml:mi>
<mml:mi>y</mml:mi>
<mml:mi>R</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>L</mml:mi>
<mml:mi>U</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msup>
<mml:mi>a</mml:mi>
<mml:mi>T</mml:mi>
</mml:msup>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2225;</mml:mo>
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>where <inline-formula id="inf116">
<mml:math id="m134">
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf117">
<mml:math id="m135">
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> are the feature vectors of node <inline-formula id="inf118">
<mml:math id="m136">
<mml:mrow>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and node <inline-formula id="inf119">
<mml:math id="m137">
<mml:mrow>
<mml:mi mathvariant="normal">j</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, respectively; <inline-formula id="inf120">
<mml:math id="m138">
<mml:mrow>
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is the learnable weight matrix; <inline-formula id="inf121">
<mml:math id="m139">
<mml:mrow>
<mml:mo>&#x2225;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> represents the splicing operation between the vectors, <inline-formula id="inf122">
<mml:math id="m140">
<mml:mrow>
<mml:mi mathvariant="script">N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> denotes the set of neighboring nodes of node <inline-formula id="inf123">
<mml:math id="m141">
<mml:mrow>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula id="inf124">
<mml:math id="m142">
<mml:mrow>
<mml:msup>
<mml:mi>a</mml:mi>
<mml:mi>T</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is the weight vector of the attention mechanism. Further, the updated feature vector <inline-formula id="inf125">
<mml:math id="m143">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="normal">h</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> of node <inline-formula id="inf126">
<mml:math id="m144">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is computed by weighted sum of its neighboring features:<disp-formula id="e19">
<mml:math id="m145">
<mml:mrow>
<mml:msubsup>
<mml:mi>h</mml:mi>
<mml:mi>i</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="script">N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:msub>
<mml:mi>&#x3b1;</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
<p>The GAT sublayer enables the model to focus on the most relevant parts of the graph, thus capturing the importance of each neighboring node during the feature aggregation process. The advantage is that it can dynamically adjust the contribution of each neighbor to a node&#x2019;s feature update, and by learning different attention weights, GAT can allocate different attention between different parts of the graph structure (<xref ref-type="bibr" rid="B11">Chatzianastasis et al., 2023</xref>). This is particularly useful when dealing with complex biological networks, where associations between microorganisms and diseases may have different biological significance and importance.</p>
<p>The dual graph feature extraction module can capture local and global information in the graph structure by combining GCN and GAT approaches. GCN emphasizes the aggregated features of a node&#x2019;s local neighbors. At the same time, GAT dynamically adjusts the weights of the neighboring nodes through the attentional mechanism (<xref ref-type="bibr" rid="B59">Wang et al., 2022</xref>), thus providing more flexible and fine-grained control in the feature extraction process. This dual approach ensures that the model understands direct associations between microbes and diseases and identifies potential indirect relationships through graph structural features and attention weights. Combining these two approaches enables the DuGEL model to excel in microbe-disease association prediction tasks, providing more accurate and comprehensive predictions.</p>
<p>The dual graph feature extraction module plays a crucial role in the DuGEL model. Capturing complex graph structure information improves the model&#x2019;s predictive ability and enhances its robustness and generalization ability.</p>
</sec>
</sec>
<sec id="s2-8">
<title>2.8 Feature fusion and sequence learning networks</title>
<sec id="s2-8-1">
<title>2.8.1 Fusion layer</title>
<p>After processing in the GCN and GAT layers, we obtain two sets of feature representations <inline-formula id="inf127">
<mml:math id="m146">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf128">
<mml:math id="m147">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mi>A</mml:mi>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>. These two sets of feature representations capture the spatial properties of the graph and the weighted properties of the important nodes, respectively. For integrating the feature representations generated by the GCN and the GAT, we introduce the fusion layer. These representations encode complementary information: GCN captures spatial structure, while GAT focuses on the relative importance of neighboring nodes.</p>
<p>In the dual graph feature extraction module, we extract two different feature representations through GCN and GAT. The task of the feature fusion layer is to effectively fuse these different features to obtain a comprehensive feature representation. The fusion operation can be realized in various ways, such as concatenation, weighted summing, or multilayer perceptron (MLP) (<xref ref-type="bibr" rid="B3">Afzal et al., 2023</xref>). Here, we adopt the concatenation operation to stitch together feature representations from different sub-networks to form a richer feature vector. Assuming that the feature representation extracted through GCN is <inline-formula id="inf129">
<mml:math id="m148">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and the feature representation extracted through GAT is <inline-formula id="inf130">
<mml:math id="m149">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mi>A</mml:mi>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, the fused feature representation <inline-formula id="inf131">
<mml:math id="m150">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mtext>fusion</mml:mtext>
<mml:mtext>&#x2009;</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>:<disp-formula id="e20">
<mml:math id="m151">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mtext>fusion</mml:mtext>
<mml:mtext>&#x2009;</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mi>A</mml:mi>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>where <inline-formula id="inf132">
<mml:math id="m152">
<mml:mrow>
<mml:mo>&#x2225;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> represents the concatenation operation between the vectors; the fused feature matrix <inline-formula id="inf133">
<mml:math id="m153">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">H</mml:mi>
<mml:mrow>
<mml:mtext>fusion</mml:mtext>
<mml:mtext>&#x2009;</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:msup>
<mml:mrow>
<mml:mo mathvariant="double-struck">&#x2208;</mml:mo>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>2</mml:mn>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> contains features extracted from two different viewpoints, providing a more comprehensive node representation.</p>
</sec>
<sec id="s2-8-2">
<title>2.8.2 Sequence learning layer</title>
<p>After feature fusion, we need to further learn useful information from these high-dimensional features. The sequence learning layer is designed to capture the temporal or sequential dependencies between features to enhance the prediction capability (<xref ref-type="bibr" rid="B65">Yuan et al., 2023</xref>). By treating the node features as a sequence, the order in which node features are fed to the LSTM introduces a dependency chain. The model learns how the features of one node are influenced by or related to those of neighboring nodes. As illustrated in <xref ref-type="fig" rid="F2">Figure 2</xref>, we introduce the Long Short-Term Memory Network (LSTM), which is capable of effectively remembering long-term dependencies and is suitable for processing sequence data (<xref ref-type="bibr" rid="B63">Yoo et al., 2023</xref>). The LSTM processes each node&#x2019;s features across multiple time steps. Here, the &#x201c;time steps&#x201d; correspond to sequential relationships between node features derived from their embedding in the heterogeneous network. We represent the vector corresponding to the <inline-formula id="inf134">
<mml:math id="m154">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>-th time step in the matrix <inline-formula id="inf135">
<mml:math id="m155">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mtext>fusion</mml:mtext>
<mml:mtext>&#x2009;</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:msup>
<mml:mrow>
<mml:mo mathvariant="double-struck">&#x2208;</mml:mo>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>2</mml:mn>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> as <inline-formula id="inf136">
<mml:math id="m156">
<mml:mrow>
<mml:msubsup>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mi>t</mml:mi>
</mml:msubsup>
<mml:msup>
<mml:mrow>
<mml:mo mathvariant="double-struck">&#x2208;</mml:mo>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>. The computational mechanism of the LSTM can be formally represented as follows:<disp-formula id="e21">
<mml:math id="m157">
<mml:mrow>
<mml:msub>
<mml:mi>I</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:msubsup>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mi>t</mml:mi>
</mml:msubsup>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>U</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="e22">
<mml:math id="m158">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mtext>&#x2003;</mml:mtext>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
<mml:msubsup>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mi>t</mml:mi>
</mml:msubsup>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>U</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="e23">
<mml:math id="m159">
<mml:mrow>
<mml:msub>
<mml:mi>o</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>o</mml:mi>
</mml:msub>
<mml:msubsup>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mi>t</mml:mi>
</mml:msubsup>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>U</mml:mi>
<mml:mi>o</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>o</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="e24">
<mml:math id="m160">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>c</mml:mi>
<mml:mo>&#x223c;</mml:mo>
</mml:mover>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="italic">tanh</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:msubsup>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mi>t</mml:mi>
</mml:msubsup>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>U</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="e25">
<mml:math id="m161">
<mml:mrow>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2299;</mml:mo>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>i</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2299;</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi>c</mml:mi>
<mml:mo>&#x223c;</mml:mo>
</mml:mover>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="e26">
<mml:math id="m162">
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>o</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2299;</mml:mo>
<mml:mi mathvariant="italic">tanh</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>where <inline-formula id="inf137">
<mml:math id="m163">
<mml:mrow>
<mml:msub>
<mml:mi>i</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf138">
<mml:math id="m164">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula id="inf139">
<mml:math id="m165">
<mml:mrow>
<mml:msub>
<mml:mi>o</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> denote the activation vectors of the input, oblivion, and output gates, respectively, <inline-formula id="inf140">
<mml:math id="m166">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>c</mml:mi>
<mml:mo>&#x223c;</mml:mo>
</mml:mover>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the candidate cell state, <inline-formula id="inf141">
<mml:math id="m167">
<mml:mrow>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf142">
<mml:math id="m168">
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:msup>
<mml:mrow>
<mml:mo mathvariant="double-struck">&#x2208;</mml:mo>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>l</mml:mi>
</mml:msub>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> are the cell state and hidden state at time step, respectively, <inline-formula id="inf143">
<mml:math id="m169">
<mml:mrow>
<mml:mi>W</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf144">
<mml:math id="m170">
<mml:mrow>
<mml:mi>U</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> are the learnable weight matrices, and <inline-formula id="inf145">
<mml:math id="m171">
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the bias vector. <inline-formula id="inf146">
<mml:math id="m172">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>l</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the dimensionality of the LSTM hidden layer. The output of the LSTM <inline-formula id="inf147">
<mml:math id="m173">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mtext>LSTM</mml:mtext>
<mml:mtext>&#x2009;</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mfenced open="{" close="}" separators="|">
<mml:mrow>
<mml:msubsup>
<mml:mi>d</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>t</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:msup>
<mml:mrow>
<mml:mo mathvariant="double-struck">&#x2208;</mml:mo>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>l</mml:mi>
</mml:msub>
<mml:mtext>&#x2009;</mml:mtext>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> combines information provided by the feature fusion layer, and captures through sequence learning the complex dependencies between features.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>The structure of LSTM.</p>
</caption>
<graphic xlink:href="fgene-16-1511521-g002.tif"/>
</fig>
<p>The feature fusion layer effectively integrates feature representations extracted from different perspectives, providing more comprehensive input data (<xref ref-type="bibr" rid="B66">Zhang et al., 2020</xref>). The sequence learning layer, on the other hand, further enhances the model&#x2019;s predictive capability by capturing the temporal dependencies between features. Combining the two ensures the model can fully utilize all available information to achieve higher accuracy and robustness in microbe-disease association prediction tasks.</p>
</sec>
</sec>
<sec id="s2-9">
<title>2.9 Prediction</title>
<p>The feature representation <inline-formula id="inf148">
<mml:math id="m174">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mtext>LSTM</mml:mtext>
<mml:mtext>&#x2009;</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> output from the sequence learning layer is used as input to the fully connected layer for further processing. The fully connected layer maps the high-dimensional features to the final prediction results through a series of linear transformations and nonlinear activation functions (<xref ref-type="bibr" rid="B18">Feng et al., 2023</xref>). The computational process of the fully connected layer can be formally represented as:<disp-formula id="e27">
<mml:math id="m175">
<mml:mrow>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>T</mml:mi>
<mml:mi>M</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>where <inline-formula id="inf149">
<mml:math id="m176">
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf150">
<mml:math id="m177">
<mml:mrow>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> are the weights and bias parameters of the fully connected layer, respectively, and <inline-formula id="inf151">
<mml:math id="m178">
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> represents the Sigmoid nonlinear activation function.</p>
<p>Ultimately, the output layer gives the predicted probability of microbe-disease association (<xref ref-type="bibr" rid="B43">Marsh and Zaura, 2017</xref>). By setting an appropriate threshold, it is possible to determine whether there is an association between microbes and diseases.</p>
</sec>
<sec id="s2-10">
<title>2.10 Objective function</title>
<p>To measure the difference between the predicted and true values of the model, in the DuGEL model, we use the cross-entropy loss function to evaluate the effectiveness of microbe-disease association prediction (<xref ref-type="bibr" rid="B41">Mao et al., 2023</xref>). The cross-entropy loss function is commonly used in classification problems, and in microbe-disease association prediction, we modeled the problem as a binary classification task, i.e., predicting whether there is an association between a certain pair of microorganisms and a disease. The cross-entropy loss function is defined as follows:<disp-formula id="e28">
<mml:math id="m179">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">L</mml:mi>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>E</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2061;</mml:mo>
<mml:mi mathvariant="italic">log</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi mathvariant="italic">log</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>where <inline-formula id="inf152">
<mml:math id="m180">
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> denotes the number of samples; <inline-formula id="inf153">
<mml:math id="m181">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the true label of the <inline-formula id="inf154">
<mml:math id="m182">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>-th sample, indicating the presence of an association, and 0 denotes the absence of an association. <inline-formula id="inf155">
<mml:math id="m183">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the predicted probability of the model for the <inline-formula id="inf156">
<mml:math id="m184">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>-th sample, indicating the probability of an association between microorganisms and diseases. The cross-entropy loss function improves the accuracy of the prediction by penalizing the wrong prediction of the model, which makes the model continuously adjust the parameters during the training process.</p>
<p>To prevent model overfitting, we add a regularization term to the loss function. The regularization term improves the generalization ability of the model by adding a penalty to the model complexity in the loss function (<xref ref-type="bibr" rid="B20">Fu et al., 2023</xref>), encouraging the model to choose simpler parameter configurations. In the DuGEL model, we use L2 regularization or weight decay. It is defined as follows:<disp-formula id="e29">
<mml:math id="m185">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">L</mml:mi>
<mml:mrow>
<mml:mtext>reg</mml:mtext>
<mml:mtext>&#x2009;</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mi>k</mml:mi>
</mml:munder>
</mml:mstyle>
<mml:msubsup>
<mml:mrow>
<mml:mo>&#x2225;</mml:mo>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#x2225;</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
<p>where <inline-formula id="inf157">
<mml:math id="m186">
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> denotes the <inline-formula id="inf158">
<mml:math id="m187">
<mml:mrow>
<mml:mi mathvariant="normal">k</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> rd weight matrix of the model; and <inline-formula id="inf159">
<mml:math id="m188">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mo>&#x2225;</mml:mo>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#x2225;</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> is the L2 paradigm of <inline-formula id="inf160">
<mml:math id="m189">
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, i.e., the sum of squares of the weight matrix. The regularization term prevents the model from overfitting the training data by penalizing excessively large values of the weights, thus improving the model&#x2019;s performance on the test data.</p>
<p>Ultimately, the integrated loss function of the DuGEL model consists of a cross-entropy loss and a regularization term of the following form:<disp-formula id="e30">
<mml:math id="m190">
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi mathvariant="script">L</mml:mi>
<mml:mtext>CE</mml:mtext>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3bb;</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mo>&#x2a;</mml:mo>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi mathvariant="script">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>reg</mml:mtext>
<mml:mtext>&#x2009;</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</disp-formula>where <inline-formula id="inf161">
<mml:math id="m191">
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the number, which controls the weights of the regularization term. This integrated loss function takes into account both the accuracy of the model prediction and the complexity of the model, and by balancing the two (<xref ref-type="bibr" rid="B14">Cui et al., 2019</xref>), it ensures that the model not only accurately fits the training data during training, but also has good generalization ability.</p>
</sec>
</sec>
<sec id="s3">
<title>3 Experiments and results</title>
<p>In this section, we will detail the experimental setup, evaluation metrics, and baseline methodology used to evaluate the performance of the DuGEL model and present the experimental results and analysis. The effectiveness and superiority of the DuGEL model in the microbe-disease association prediction task are verified by comparing it with multiple baseline methods. The corresponding pseudo-code of the DuGEL model is shown in <xref ref-type="table" rid="T2">Table 2</xref>.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Pseudocode of the DuGEL model proposed in this study.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Algorithm 1 Algorithm of the DuGEL.</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Input: Known associations matrix <inline-formula id="inf162">
<mml:math id="m192">
<mml:mrow>
<mml:mi mathvariant="normal">A</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi mathvariant="normal">R</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">N</mml:mi>
<mml:mi mathvariant="normal">m</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi mathvariant="normal">N</mml:mi>
<mml:mi mathvariant="normal">d</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>;</td>
</tr>
<tr>
<td align="left">
<inline-formula id="inf163">
<mml:math id="m193">
<mml:mrow>
<mml:mover accent="true">
<mml:mi>A</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula>: Representation of the adjacency matrix <inline-formula id="inf164">
<mml:math id="m194">
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> plus the unitary matrix <inline-formula id="inf165">
<mml:math id="m195">
<mml:mrow>
<mml:mi>I</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="left">&#x2003;&#x2003;&#x2003;Microbe similarity matrix <inline-formula id="inf166">
<mml:math id="m196">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="left">&#x2003;&#x2003;&#x2003;Disease similarity matrix <inline-formula id="inf167">
<mml:math id="m197">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="left">&#x2003;&#x2003;&#x2003;<inline-formula id="inf168">
<mml:math id="m198">
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>: the number of iterations for DuGEL</td>
</tr>
<tr>
<td align="left">
<inline-formula id="inf169">
<mml:math id="m199">
<mml:mrow>
<mml:mi>L</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>: the number of layers in GCN and GAT</td>
</tr>
<tr>
<td align="left">Output: Reconstructed microbe-disease associations matrix <inline-formula id="inf170">
<mml:math id="m200">
<mml:mrow>
<mml:msup>
<mml:mi>A</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="left">1&#x2003;&#x2003;&#x2003;Phase 1: Construct the heterogeneous network <inline-formula id="inf171">
<mml:math id="m201">
<mml:mrow>
<mml:mi mathvariant="normal">Y</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#xd7;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="left">2&#x2003;&#x2003;&#x2003;&#x2003;<inline-formula id="inf172">
<mml:math id="m202">
<mml:mrow>
<mml:mi>Y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mi>A</mml:mi>
<mml:mi>T</mml:mi>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="left">3&#x2003;&#x2003;&#x2003;Phase 2: Initialize the embedding feature matrix <inline-formula id="inf173">
<mml:math id="m203">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="left">4&#x2003;&#x2003;&#x2003;&#x2003;<inline-formula id="inf174">
<mml:math id="m204">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> &#x3d; Initial embeddings for microbes and diseases</td>
</tr>
<tr>
<td align="left">5&#x2003;&#x2003;&#x2003;Phase 3: for i &#x3d; 1 to N do</td>
</tr>
<tr>
<td align="left">6&#x2003;&#x2003;&#x2003;&#x2003;for l &#x3d; 1 to L do</td>
</tr>
<tr>
<td align="left">7&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;Calculate the embedding feature <inline-formula id="inf175">
<mml:math id="m205">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mi>l</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>L</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> in the <italic>l</italic>th layer according to the GCN formula and GAT:</td>
</tr>
<tr>
<td align="left">8&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;<inline-formula id="inf176">
<mml:math id="m206">
<mml:mrow>
<mml:msubsup>
<mml:mi>H</mml:mi>
<mml:mi>l</mml:mi>
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msup>
<mml:mi>D</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>0.5</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2a;</mml:mo>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mover accent="true">
<mml:mi>A</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mo>&#x2a;</mml:mo>
<mml:mtext>&#x2009;</mml:mtext>
<mml:msup>
<mml:mi>D</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>0.5</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2a;</mml:mo>
<mml:mtext>&#x2009;</mml:mtext>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2a;</mml:mo>
<mml:mtext>&#x2009;</mml:mtext>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>l</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="left">9&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;Update <inline-formula id="inf177">
<mml:math id="m207">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mi>l</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> using GAT mechanism:</td>
</tr>
<tr>
<td align="left">10&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;<inline-formula id="inf178">
<mml:math id="m208">
<mml:mrow>
<mml:msubsup>
<mml:mi>H</mml:mi>
<mml:mi>l</mml:mi>
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mi>A</mml:mi>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> &#x3d; Attention (<inline-formula id="inf179">
<mml:math id="m209">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mi>l</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>)</td>
</tr>
<tr>
<td align="left">11&#x2003;&#x2003;&#x2003;end for</td>
</tr>
<tr>
<td align="left">12&#x2003;&#x2003;end for</td>
</tr>
<tr>
<td align="left">13&#x2003;&#x2003;Phase 4: Feature Fusion and Sequential Learning</td>
</tr>
<tr>
<td align="left">14&#x2003;&#x2003;Concatenate the final embeddings of microbes and diseases:</td>
</tr>
<tr>
<td align="left">15&#x2003;&#x2003;&#x2003;<inline-formula id="inf180">
<mml:math id="m210">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mtext>Concatenate</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msubsup>
<mml:mi>H</mml:mi>
<mml:mi>l</mml:mi>
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi>H</mml:mi>
<mml:mi>l</mml:mi>
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mi>A</mml:mi>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="left">16&#x2003;&#x2003;Pass <inline-formula id="inf181">
<mml:math id="m211">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> through a BiLSTM layer:</td>
</tr>
<tr>
<td align="left">17&#x2003;&#x2003;&#x2003;<inline-formula id="inf182">
<mml:math id="m212">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> &#x3d; BiLSTM(<inline-formula id="inf183">
<mml:math id="m213">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>)</td>
</tr>
<tr>
<td align="left">18&#x2003;&#x2003;Apply a dense layer to the output of BiLSTM:</td>
</tr>
<tr>
<td align="left">19&#x2003;&#x2003;&#x2003;<inline-formula id="inf184">
<mml:math id="m214">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> &#x3d; Dense (<inline-formula id="inf185">
<mml:math id="m215">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>)</td>
</tr>
<tr>
<td align="left">20&#x2003;Phase 5: Reconstruct Associations</td>
</tr>
<tr>
<td align="left">21&#x2003;&#x2003;Compute the reconstructed association matrix <inline-formula id="inf186">
<mml:math id="m216">
<mml:mrow>
<mml:msup>
<mml:mi>A</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> using the final embeddings:</td>
</tr>
<tr>
<td align="left">22&#x2003;&#x2003;&#x2003;<inline-formula id="inf187">
<mml:math id="m217">
<mml:mrow>
<mml:msup>
<mml:mi>A</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:mtext>sigmoid</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
<tr>
<td align="left">23&#x2003;Return <inline-formula id="inf188">
<mml:math id="m218">
<mml:mrow>
<mml:msup>
<mml:mi>A</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
</tr>
</tbody>
</table>
</table-wrap>
<sec id="s3-1">
<title>3.1 Experimental setup</title>
<p>In this study, we extracted disease features, microbial features, and disease-microbe association matrices from the HMDAD and Disbiome databases, and the three feature matrices mentioned above were used to construct heterogeneous maps to reflect the interactions between diseases and microbes for disease characterization and microbial characterization. The number of training rounds was set to 4,000 in the training phase. To optimize the algorithm to adjust the weights, the learning rate was set to 0.01. We set the random deactivation strategy for the adjacency matrix with the dropout ratio set to 0.5, thus preventing overfitting. For the subject model, to randomly discard some network connections during the training process to enhance the model&#x2019;s generalization ability, we similarly set the random deactivation strategy with the dropout ratio set to 0.5. In addition, we set the similarity weight to 6 to weigh the similarity features of diseases and microorganisms. From the above description, it is easy to see that there are several hyperparameters in DuGEL, such as the dimension k of the embedded features, the number of layers L, the initial learning rate r of the optimizer, the total training epoch &#x3b1;, the node dropout &#x3b2;, and the rule dropout &#x3b3;. As illustrated in <xref ref-type="fig" rid="F3">Figures 3</xref>, <xref ref-type="fig" rid="F4">4</xref>, the various results for several combinations of parameters k and L in the 5-fold cv. From the figures, it is easy to see that the optimal combination of k and L is L &#x3d; 2, k &#x3d; 128, which indicates that the first and second-layer embedded features contain more information than the third-layer embedded features. After analyzing, this may be due to excessive smoothing of LSTM.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Model parameters analysis on the HMDAD dataset.</p>
</caption>
<graphic xlink:href="fgene-16-1511521-g003.tif"/>
</fig>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Model parameters analysis on the disbiome dataset.</p>
</caption>
<graphic xlink:href="fgene-16-1511521-g004.tif"/>
</fig>
<p>In the training and evaluation phases of the model, we perform multiple cross-validations. In each validation, the dataset is divided into a training set and a test set, which trains the model on the training set and evaluates the model performance on the test set. Specifically, 5-fold cross-validation (k_folds) is used to evaluate the model performance. In each cross-validation, the data are randomly disrupted and divided into five parts, one used as the test set and the rest as the training set. To ensure the stability and reliability of the results, we repeat the execution of the experiment 5&#xa0;times and report the average performance metrics.</p>
</sec>
<sec id="s3-2">
<title>3.2 Evaluation metrics</title>
<p>In order to evaluate the method proposed in this paper, we employ a series of evaluation metrics to comprehensively measure the performance of the model, including AUC, accuracy and specificity The formal definitions and calculations of each evaluation metric are given below:</p>
<p>Accuracy is the ratio of the number of correctly categorized samples to the total number of samples, which is defined as follows:<disp-formula id="e31">
<mml:math id="m219">
<mml:mrow>
<mml:mtext mathvariant="italic">Accuracy</mml:mtext>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>T</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>T</mml:mi>
<mml:mi>N</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>where <inline-formula id="inf189">
<mml:math id="m220">
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> denotes True Positives, <inline-formula id="inf190">
<mml:math id="m221">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> denotes False Positives, <inline-formula id="inf191">
<mml:math id="m222">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> denotes False Negatives, and <inline-formula id="inf192">
<mml:math id="m223">
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> denotes True Negatives. Accuracy reflects the overall ability of the model to correctly classify.</p>
<p>AUC (Area Under the ROC Curve) represents the area under the receiver operating characteristic curve (ROC Curve), which is used to measure the classification performance of the model. The ROC Curve plots the True Positive Rate (TPR) and False Positive Rate (FPR) through different thresholds. TPR and FPR are defined as follows:<disp-formula id="e32">
<mml:math id="m224">
<mml:mrow>
<mml:mtext mathvariant="italic">TPR</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mtext mathvariant="italic">Recall</mml:mtext>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="e33">
<mml:math id="m225">
<mml:mrow>
<mml:mtext mathvariant="italic">FPR</mml:mtext>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>T</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
<p>The value of AUC is between 0 and 1, with larger values indicating better model performance.</p>
<p>Specificity, also known as True Negative Rate (TNR), is formally defined as follows:<disp-formula id="e34">
<mml:math id="m226">
<mml:mrow>
<mml:mtext mathvariant="italic">Specificity</mml:mtext>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>T</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
<p>Specificity indicates the proportion of all samples that are actually negative that are correctly predicted to be negative.</p>
</sec>
<sec id="s3-3">
<title>3.3 Alternative methods for microbedisease association prediction</title>
<p>In order to compare the methods proposed in this paper horizontally, as shown in <xref ref-type="table" rid="T3">Table 3</xref>, we introduced nine microbe-disease association prediction methods of.</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Microbe-disease association prediction methods.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Method</th>
<th align="left">Approach</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">KATZHMDA (<xref ref-type="bibr" rid="B13">Chen et al., 2017</xref>)</td>
<td align="left">Use of KATZ measurements to infer possible microbe-disease associations</td>
</tr>
<tr>
<td align="left">LRLSHMDA (<xref ref-type="bibr" rid="B55">Wang et al., 2017</xref>)</td>
<td align="left">Microbial disease prediction using the Laplace regularised least squares framework</td>
</tr>
<tr>
<td align="left">NTSHMDA (<xref ref-type="bibr" rid="B37">Luo and Long, 2018</xref>)</td>
<td align="left">Predicting potential microbe-disease associations using a model based on stochastic roaming restarts</td>
</tr>
<tr>
<td align="left">BiRWMP (<xref ref-type="bibr" rid="B51">Shen et al., 2018</xref>)</td>
<td align="left">Predicting microbial-disease using a bidirectional stochastic wandering approach</td>
</tr>
<tr>
<td align="left">NBLPIHMDA (<xref ref-type="bibr" rid="B57">Wang et al., 2019</xref>)</td>
<td align="left">Detecting potential microbe-disease associations using a bidirectional tag dissemination scheme</td>
</tr>
<tr>
<td align="left">HMDApred (<xref ref-type="bibr" rid="B17">Fan et al., 2020</xref>)</td>
<td align="left">Predicting microbe-disease associations using network-consistent projections and multiple data integration</td>
</tr>
<tr>
<td align="left">BPNNHMDA (<xref ref-type="bibr" rid="B32">Li et al., 2020</xref>)</td>
<td align="left">Based on a back propagation neural network design</td>
</tr>
<tr>
<td align="left">GATMDA (<xref ref-type="bibr" rid="B36">Long et al., 2021</xref>)</td>
<td align="left">Microbial Disease Association Prediction Using Graphical Attention Networks Combined with Matrix Filling</td>
</tr>
<tr>
<td align="left">GCNMA (<xref ref-type="bibr" rid="B58">Wang et al., 2023</xref>)</td>
<td align="left">Proposing a new computational model based on graph convolutional neural networks and multilayer attention mechanisms</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The above methods can be categorised into four groups, with network-based methods constructing complex network structures based on known microbial disease associations. These network-based methods construct complex network structures based on known microbial-disease associations. Then, the potential probability of associations between microorganisms and diseases is inferred by analyzing the structural features of these networks and the lengths and numbers of connecting paths between the nodes (<xref ref-type="bibr" rid="B56">Wang et al., 2011</xref>). For example, Chen et al. proposed the KATZHMDA computational model to infer possible microbe-disease associations using the KATZ measure, which takes measurements to capture global information in a network by counting all paths between nodes and then predicts potential associations (<xref ref-type="bibr" rid="B13">Chen et al., 2017</xref>). However, the matrix decomposition-based approach focuses on decomposing the known microbe-disease association matrix into two feature matrices and approximating the original association matrix by the product of these two matrices (<xref ref-type="bibr" rid="B39">Ma and Liu, 2022</xref>). Information such as similarity and strength of association between microorganisms and diseases can be obtained. Shen et al. proposed a computational model of CMFHMDA based on synergistic matrix decomposition (<xref ref-type="bibr" rid="B52">Shen et al., 2017</xref>). In addition, based on the traditional machine learning approach by using known associations as training samples, we can construct a model for predicting the association between unknown microorganisms and diseases (<xref ref-type="bibr" rid="B36">Long et al., 2021</xref>). For example, Wang et al. designed the LRLSHMDA model which represents the network structure by constructing a Laplace matrix and predicts the association by regularized least squares (<xref ref-type="bibr" rid="B55">Wang et al., 2017</xref>). Finally, the graph neural network-based approach utilizes neural networks to take microbial and disease-related data as inputs and to extract and explore features and patterns from graph-structured data (<xref ref-type="bibr" rid="B7">Bessadok et al., 2022</xref>). They can utilize we can utilize the powerful learning ability of neural networks to discover potential associations between microbes and diseases and mine functional patterns and features from complex graph data. For example, Wang et al. designed GCNMA that captures structural information in the network by introducing a graph convolutional neural network and incorporates a multilayer attention mechanism to enhance the ability to model complex relationships between nodes (<xref ref-type="bibr" rid="B58">Wang et al., 2023</xref>).</p>
<p>Although the above models can perform reliably in some aspects, they still have limitations. For example, the computation of Katz path correlation must consider all paths between different nodes (<xref ref-type="bibr" rid="B68">Zhang et al., 2017</xref>). This may lead to high computational complexity on large datasets, especially when the network size is large (<xref ref-type="bibr" rid="B31">Kumar et al., 2020</xref>). In addition, regularized least squares usually introduce a regularization term to avoid overfitting. However, choosing the appropriate regularization parameter is not easy. If it is not chosen correctly, it may lead to underfitting or overfitting problems. When there is noise in the input data, the regularized least squares method may be too sensitive to the noise, resulting in unstable or inaccurate prediction results (<xref ref-type="bibr" rid="B30">Jung and Park, 2017</xref>).</p>
</sec>
<sec id="s3-4">
<title>3.4 Experimental results and analysis</title>
<p>In this section, we provide a detailed analysis of the experimental results of our proposed DuGEL model on the HMDAD and Disbiome datasets and compare it with nine other state-of-the-art microbe-disease association prediction methods. First, <xref ref-type="table" rid="T4">Tables 4</xref>, <xref ref-type="table" rid="T5">5</xref> show the performance of our proposed DuGEL model, and the nine compared methods on the HMDAD and Disbiome datasets, respectively, are mainly compared under the AUC assessment metrics. On the HMDAD dataset, the DuGEL model performs well, with its AUC values of 0.9698 and 0.9606 in 5-fold cross-validation and 2-fold cross-validation, respectively, The higher AUC values indicate the more vital overall predictive ability of the model, which indicates that DuGEL can effectively distinguish between positive and negative samples. For the Disbiome dataset, the DuGEL model still performs well. Under 5-fold cross-validation and 2-fold cross-validation settings, DuGEL reaches 0.9119 and 0.8932.</p>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>HMDAD dataset in 5-fold cv and 2-fold cv.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Methods</th>
<th align="left">AUC(5-fold cv)</th>
<th align="left">AUC(2-fold cv)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">KATZHMDA (<xref ref-type="bibr" rid="B13">Chen et al., 2017</xref>)</td>
<td align="left">0.8291 &#xb1; 0.0041</td>
<td align="left">0.8164 &#xb1; 0.0047</td>
</tr>
<tr>
<td align="left">LRLSHMDA (<xref ref-type="bibr" rid="B55">Wang et al., 2017</xref>)</td>
<td align="left">0.8792 &#xb1; 0.0032</td>
<td align="left">0.8589 &#xb1; 0.0043</td>
</tr>
<tr>
<td align="left">NTSHMDA (<xref ref-type="bibr" rid="B37">Luo and Long, 2018</xref>)</td>
<td align="left">0.8893 &#xb1; 0.0043</td>
<td align="left">0.8631 &#xb1; 0.0052</td>
</tr>
<tr>
<td align="left">BiRWMP (<xref ref-type="bibr" rid="B51">Shen et al., 2018</xref>)</td>
<td align="left">0.8777 &#xb1; 0.0089</td>
<td align="left">0.8693 &#xb1; 0.0081</td>
</tr>
<tr>
<td align="left">NBLPIHMDA (<xref ref-type="bibr" rid="B57">Wang et al., 2019</xref>)</td>
<td align="left">0.8961 &#xb1; 0.0033</td>
<td align="left">0.8792 &#xb1; 0.0056</td>
</tr>
<tr>
<td align="left">HMDApred (<xref ref-type="bibr" rid="B17">Fan et al., 2020</xref>)</td>
<td align="left">0.9357 &#xb1; 0.0041</td>
<td align="left">0.9049 &#xb1; 0.0035</td>
</tr>
<tr>
<td align="left">BPNNHMDA (<xref ref-type="bibr" rid="B32">Li et al., 2020</xref>)</td>
<td align="left">0.9133 &#xb1; 0.0012</td>
<td align="left">0.8949 &#xb1; 0.0023</td>
</tr>
<tr>
<td align="left">GATMDA (<xref ref-type="bibr" rid="B36">Long et al., 2021</xref>)</td>
<td align="left">0.9561 &#xb1; 0.0142</td>
<td align="left">0.9541 &#xb1; 0.0053</td>
</tr>
<tr>
<td align="left">GCNMA (<xref ref-type="bibr" rid="B58">Wang et al., 2023</xref>)</td>
<td align="left">0.9610 &#xb1; 0.0223</td>
<td align="left">0.9512 &#xb1; 0.0076</td>
</tr>
<tr>
<td align="left">DuGEL (our model)</td>
<td align="left">0.9698 &#xb1; 0.0172</td>
<td align="left">0.9606 &#xb1; 0.0057</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T5" position="float">
<label>TABLE 5</label>
<caption>
<p>Disbiome dataset in 5-fold cv and 2-fold cv.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Methods</th>
<th align="left">AUC(5-fold cv)</th>
<th align="left">AUC(2-fold cv)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">KATZHMDA (<xref ref-type="bibr" rid="B13">Chen et al., 2017</xref>)</td>
<td align="left">0.6781 &#xb1; 0.0132</td>
<td align="left">0.6692 &#xb1; 0.0063</td>
</tr>
<tr>
<td align="left">LRLSHMDA (<xref ref-type="bibr" rid="B55">Wang et al., 2017</xref>)</td>
<td align="left">0.7361 &#xb1; 0.0221</td>
<td align="left">0.7191 &#xb1; 0.0115</td>
</tr>
<tr>
<td align="left">NTSHMDA (<xref ref-type="bibr" rid="B37">Luo and Long, 2018</xref>)</td>
<td align="left">0.8301 &#xb1; 0.0059</td>
<td align="left">0.8079 &#xb1; 0.0065</td>
</tr>
<tr>
<td align="left">BiRWMP (<xref ref-type="bibr" rid="B51">Shen et al., 2018</xref>)</td>
<td align="left">0.8319 &#xb1; 0.0092</td>
<td align="left">0.8141 &#xb1; 0.0057</td>
</tr>
<tr>
<td align="left">NBLPIHMDA (<xref ref-type="bibr" rid="B57">Wang et al., 2019</xref>)</td>
<td align="left">0.8429 &#xb1; 0.0112</td>
<td align="left">0.8281 &#xb1; 0.0142</td>
</tr>
<tr>
<td align="left">HMDApred (<xref ref-type="bibr" rid="B17">Fan et al., 2020</xref>)</td>
<td align="left">0.8521 &#xb1; 0.0381</td>
<td align="left">0.8373 &#xb1; 0.0342</td>
</tr>
<tr>
<td align="left">BPNNHMDA (<xref ref-type="bibr" rid="B32">Li et al., 2020</xref>)</td>
<td align="left">0.8716 &#xb1; 0.0191</td>
<td align="left">0.8532 &#xb1; 0.0151</td>
</tr>
<tr>
<td align="left">GATMDA (<xref ref-type="bibr" rid="B36">Long et al., 2021</xref>)</td>
<td align="left">0.9229 &#xb1; 0.0081</td>
<td align="left">0.9201 &#xb1; 0.0141</td>
</tr>
<tr>
<td align="left">GCNMA (<xref ref-type="bibr" rid="B58">Wang et al., 2023</xref>)</td>
<td align="left">0.9001 &#xb1; 0.0161</td>
<td align="left">0.8803 &#xb1; 0.0178</td>
</tr>
<tr>
<td align="left">DuGEL (our model)</td>
<td align="left">0.9119 &#xb1; 0.0059</td>
<td align="left">0.8932 &#xb1; 0.0038</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>In comparison with previous methods, the DuGEL model demonstrates excellent performance. we attribute the results to the intrinsic structure of DuGEL. DuGEL successfully achieves efficient prediction of microbial-disease associations by combining a graph convolutional neural network (GCN) and a graph Attention Network (GAT), as well as introducing a Long Short-Term Memory Network (LSTM) to process fused features, enabling efficient prediction of microbe-disease associations. This multilevel feature extraction and sequence modeling approach enables DuGEL to perform well on key metrics (e.g., AUPR and AUC),demonstrating its robustness in microbe-disease association prediction tasks.</p>
<p>In particular, the DuGEL model combines the strengths of GCN and GAT; GCN can efficiently capture both local and global spatial features in the graph and extract complex relationships between microbes and diseases by performing convolutional operations on the features of neighboring nodes. Moreover, through the attention mechanism, GAT can assign different importance weights to its neighbors when processing nodes, thus better capturing the information of critical nodes. After combining the two, the outputs of GCN and GAT are fused through the Dual Graph Enhanced Layer of the DuGEL model, which effectively integrates the structural features and node importance.</p>
<p>Furthermore, introducing the LSTM module further enhances the model&#x2019;s capabilities. LSTM is good at processing sequence data and can better identify potential associations between microbes and diseases by capturing temporal dependencies (<xref ref-type="bibr" rid="B5">Baranwal et al., 2022</xref>). The memory unit of LSTM can preserve information of long-time dependencies (<xref ref-type="bibr" rid="B60">Wu et al., 2021</xref>), which is especially important for analyzing potential microbe-disease relationships over long periods.</p>
</sec>
<sec id="s3-5">
<title>3.5 Ablation experiment</title>
<p>In addition, considering the role and contribution of every sub-module in the DuGEL model proposed in this study, we function ablation experiments on the HMDAD dataset to inspect the effect of distinctive parts of the model. We carried out three specific ablation experiments:<list list-type="simple">
<list-item>
<p>1. Doing away with the diagram convolution sublayer (denoted as &#x201c;-GCN Layer&#x201d;)</p>
</list-item>
<list-item>
<p>2. Removing the sketch attention sublayer (denoted as &#x201c;-GAT Layer&#x201d;)</p>
</list-item>
<list-item>
<p>3. Casting off the BiLSTM module (denoted as &#x201c;-BiLSTM Module&#x201d;)</p>
</list-item>
</list>
</p>
<p>Overall, each sub-module of the DuGEL model improves the primary mannequin&#x2019;s effectiveness by taking pictures of the correlation between microorganisms and diseases. <xref ref-type="table" rid="T6">Table 6</xref> suggests the results of the ablation experiments. In the first ablation experiment, after getting rid of the graph convolution sublayer, the mannequin&#x2019;s AUC drops to 0.9465 and 0.9392 in 5-fold cross-validation and 2-fold pass validation, respectively, which suggests that GCN plays a crucial role in shooting nearby and international spatial points between microbes and diseases. By performing convolutional operations on the elements of neighboring nodes, GCN can extract complex correlation information, and removing this component considerably decreases the model&#x2019;s predictive power.</p>
<table-wrap id="T6" position="float">
<label>TABLE 6</label>
<caption>
<p>Fold cv and 2-fold cv and based on HMDAD dataset.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Methods</th>
<th align="left">AUC(5-fold cv)</th>
<th align="left">AUC(2-fold cv)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">DuGEL (Proposed)</td>
<td align="left">0.9698 &#xb1; 0.0172</td>
<td align="left">0.9606 &#xb1; 0.0057</td>
</tr>
<tr>
<td align="left">-GCN Layer</td>
<td align="left">0.9465 &#xb1; 0.0185</td>
<td align="left">0.9392 &#xb1; 0.0046</td>
</tr>
<tr>
<td align="left">-GAT Layer</td>
<td align="left">0.9520 &#xb1; 0.0163</td>
<td align="left">0.9408 &#xb1; 0.0041</td>
</tr>
<tr>
<td align="left">-BiLSTM Module</td>
<td align="left">0.9602 &#xb1; 0.0053</td>
<td align="left">0.9510 &#xb1; 0.0075</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>In the second ablation experiment, doing away with the GAT sublayer reduces the model&#x2019;s AUC to 0.9520 and 0.9408 in 5-fold cross-validation and 2-fold pass-validation, respectively. GAT can better capture facts about key nodes via an attention mechanism that assigns unique weights of importance to its neighbors when processing nodes. This mechanism is necessary for improving the model&#x2019;s prediction accuracy, and the mannequin&#x2019;s performance in a similar fashion decreases after its removal.</p>
<p>Finally, we explored the function of the BiLSTM module. After eliminating the BiLSTM module, the AUC of the mannequin diminished to 0.9602 and 0.9510 in the 5-fold move validation and 2-fold move validation, respectively. The LSTM module appropriately processes sequence information and can become aware of attainable associations between microbes and diseases by capturing time dependencies. The reminiscence unit of the LSTM can retain lengthy time-dependent information, which is especially essential for examining the viable microbes&#x2019; disease. This is mainly necessary for analyzing doable microbe-disease relationships over lengthy periods. The elimination of this module resulted in a considerable reduction in the predictive energy of the model, in addition to demonstrating the necessary function of the BiLSTM module in the DuGEL model.</p>
<p>Overall, every sub-module had a practical impact on the expected performance of the DuGEL model. The GCN and GAT successfully extracted complicated associations between microbes and diseases through shooting spatial features and node importance statistics in the diagram structure, while the BiLSTM module furthermore desirable the predictive capacity of the mannequin via processing sequence features. The effects of the ablation experiments validate the effectiveness and necessity of these sub-modules and reveal the rationality and superiority of the DuGEL model sketch in the microbe-disease affiliation prediction task.3.6.</p>
</sec>
</sec>
<sec id="s4">
<title>4 Case study</title>
<p>In this section, we selected three diseases, kidney stones, eczema, and ileal Crohn&#x2019;s disease, as case studies for HMDAD to validate our model&#x2019;s performance further. Specifically, we ranked these three relevant microorganisms in the prediction score and selected the top 20. Then, we evaluated the predictive performance of DuGEL by searching the literature. Among the common diseases, kidney stones cause severe back or abdominal pain accompanied by nausea, vomiting, hematuria, and other symptoms (<xref ref-type="bibr" rid="B54">Stevens, 2018</xref>). In recent years, kidney stones have been on the rise. It is prevalent in young adult males (<xref ref-type="bibr" rid="B53">Stamatelou and Goldfarb, 2023</xref>). A diverse microbial community exists around renal stones; changes in intestinal and urinary microorganisms may cause the occurrence and development of renal stones. <italic>Clostridium difficile</italic>, Bifidobacterium, and others are more closely associated with kidney stone occurrence (<xref ref-type="bibr" rid="B44">Miller et al., 2022</xref>). As shown in <xref ref-type="table" rid="T7">Table 7</xref>, the relevance of 17 of the top 20 candidate kidney stones-associated microorganisms predicted by DuGEL has been confirmed by previous publications.</p>
<table-wrap id="T7" position="float">
<label>TABLE 7</label>
<caption>
<p>The top 20 Kidney stones microbes predicted by DuGEL.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Rank</th>
<th align="left">Microbe</th>
<th align="left">Evidence</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">1</td>
<td align="left" style="color:#auto">
<italic>Clostridium difficile</italic>
</td>
<td align="left">PMID: 30021693</td>
</tr>
<tr>
<td align="left">2</td>
<td align="left" style="color:#auto">
<italic>Helicobacter pylori</italic>
</td>
<td align="left">PMID: 25459132</td>
</tr>
<tr>
<td align="left">3</td>
<td align="left" style="color:#auto">
<italic>Staphylococcus aureus</italic>
</td>
<td align="left">PMID: 14241187</td>
</tr>
<tr>
<td align="left">4</td>
<td align="left">
<italic>Clostridium</italic> coccoides</td>
<td align="left">PMID: 37609403</td>
</tr>
<tr>
<td align="left">5</td>
<td align="left" style="color:#auto">
<italic>Staphylococcus</italic>
</td>
<td align="left">PMID: 14241187</td>
</tr>
<tr>
<td align="left">6</td>
<td align="left">Bifidobacterium</td>
<td align="left">PMID: 37145061</td>
</tr>
<tr>
<td align="left">7</td>
<td align="left">Comamonadaceae</td>
<td align="left">Unconfirmeda</td>
</tr>
<tr>
<td align="left">8</td>
<td align="left">Oxalobacteraceae</td>
<td align="left">PMID: 32381601</td>
</tr>
<tr>
<td align="left">9</td>
<td align="left">Sphingomonadaceae</td>
<td align="left">PMID: 36970590</td>
</tr>
<tr>
<td align="left">10</td>
<td align="left">Dietzia maris</td>
<td align="left">Unconfirmeda</td>
</tr>
<tr>
<td align="left">11</td>
<td align="left">
<italic>Staphylococcus</italic> epidermidis</td>
<td align="left">PMID: 20466,084</td>
</tr>
<tr>
<td align="left">12</td>
<td align="left" style="color:#auto">
<italic>Escherichia coli</italic>
</td>
<td align="left">PMID: 14241187</td>
</tr>
<tr>
<td align="left">13</td>
<td align="left" style="color:#auto">
<italic>Acinetobacter</italic>
</td>
<td align="left">PMID: 32111156</td>
</tr>
<tr>
<td align="left">14</td>
<td align="left">Corynebacterium</td>
<td align="left">PMID: 24563271</td>
</tr>
<tr>
<td align="left">15</td>
<td align="left">Prevotella copri</td>
<td align="left">PMID: 27708409</td>
</tr>
<tr>
<td align="left">16</td>
<td align="left">Propionibacterium</td>
<td align="left">PMID: 33153435</td>
</tr>
<tr>
<td align="left">17</td>
<td align="left">Propionibacterium acnes</td>
<td align="left">Unconfirmeda</td>
</tr>
<tr>
<td align="left">18</td>
<td align="left">Desulfovibrio</td>
<td align="left">PMID: 38007438</td>
</tr>
<tr>
<td align="left">19</td>
<td align="left">Oxalobacter formigenes</td>
<td align="left">PMID: 32880090</td>
</tr>
<tr>
<td align="left">20</td>
<td align="left">
<italic>Fusobacterium</italic> nucleatum</td>
<td align="left">PMID: 37458823</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>In addition, eczema is an inflammatory skin disease. According to many studies, microorganisms are strongly associated with eczema (<xref ref-type="bibr" rid="B71">Zimmermann et al., 2019</xref>). People with eczema have a less diverse and less stable skin microbiome than those without. This means the balance of beneficial and harmful bacteria on the skin is disrupted, making it more susceptible to infection and inflammation (<xref ref-type="bibr" rid="B19">Flowers and Grice, 2020</xref>). In addition, the presence of <italic>Staphylococcus aureus</italic> is associated with more severe eczema symptoms (<xref ref-type="bibr" rid="B10">Chapsa et al., 2023</xref>), suggesting a direct link between this bacteria and the condition. As shown in <xref ref-type="table" rid="T8">Table 8</xref> below, it is evident that existing publications have confirmed 17 of the 20 potential eczema-associated microorganisms predicted by DuGEL.</p>
<table-wrap id="T8" position="float">
<label>TABLE 8</label>
<caption>
<p>The top 20 Eczema microbes predicted by DuGEL.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Rank</th>
<th align="left">Microbe</th>
<th align="left">Evidence</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">1</td>
<td align="left" style="color:#auto">
<italic>Clostridium difficile</italic>
</td>
<td align="left">PMID: 27667310</td>
</tr>
<tr>
<td align="left">2</td>
<td align="left" style="color:#auto">
<italic>Helicobacter pylori</italic>
</td>
<td align="left">PMID: 17568058</td>
</tr>
<tr>
<td align="left">3</td>
<td align="left" style="color:#auto">
<italic>Staphylococcus aureus</italic>
</td>
<td align="left">PMID: 16965415</td>
</tr>
<tr>
<td align="left">4</td>
<td align="left" style="color:#auto">
<italic>Escherichia coli</italic>
</td>
<td align="left">PMID: 27667310</td>
</tr>
<tr>
<td align="left">5</td>
<td align="left">Dietzia maris</td>
<td align="left">PMID: 26821151</td>
</tr>
<tr>
<td align="left">6</td>
<td align="left">
<italic>Staphylococcus</italic> epidermidis</td>
<td align="left">PMID: 27416972</td>
</tr>
<tr>
<td align="left">7</td>
<td align="left">Stenotrophomonas maltophilia</td>
<td align="left">PMID: 26821151</td>
</tr>
<tr>
<td align="left">8</td>
<td align="left">Comamonadaceae</td>
<td align="left">Unconfirmeda</td>
</tr>
<tr>
<td align="left">9</td>
<td align="left">Oxalobacteraceae</td>
<td align="left">PMID: 32971520</td>
</tr>
<tr>
<td align="left">10</td>
<td align="left">Sphingomonadaceae</td>
<td align="left">PMID: 33735474</td>
</tr>
<tr>
<td align="left">11</td>
<td align="left" style="color:#auto">
<italic>Acinetobacter</italic>
</td>
<td align="left">PMID: 28207943</td>
</tr>
<tr>
<td align="left">12</td>
<td align="left">Corynebacterium</td>
<td align="left">PMID: 27562264</td>
</tr>
<tr>
<td align="left">13</td>
<td align="left">Prevotella copri</td>
<td align="left">Unconfirmeda</td>
</tr>
<tr>
<td align="left">14</td>
<td align="left">Oxalobacter formigenes</td>
<td align="left">Unconfirmeda</td>
</tr>
<tr>
<td align="left">15</td>
<td align="left">Desulfovibrio</td>
<td align="left">PMID: 27812181</td>
</tr>
<tr>
<td align="left">16</td>
<td align="left">Tropheryma whipplei</td>
<td align="left">PMID: 2,456,205</td>
</tr>
<tr>
<td align="left">17</td>
<td align="left" style="color:#auto">
<italic>Enterococcus</italic>
</td>
<td align="left">PMID: 16601353</td>
</tr>
<tr>
<td align="left">18</td>
<td align="left" style="color:#auto">
<italic>Pseudomonas</italic>
</td>
<td align="left">PMID: 33492004</td>
</tr>
<tr>
<td align="left">19</td>
<td align="left" style="color:#auto">
<italic>Staphylococcus</italic>
</td>
<td align="left">PMID: 20222931</td>
</tr>
<tr>
<td align="left">20</td>
<td align="left">
<italic>Clostridium</italic> coccoides</td>
<td align="left">PMID: 24650346</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Ileal Crohn&#x2019;s disease is an inflammatory bowel disease. It causes swelling of the tissues of the digestive tract, which may lead to abdominal pain, severe diarrhea, fatigue, weight loss, and malnutrition (<xref ref-type="bibr" rid="B16">Fakhoury et al., 2014</xref>). The degree of symptoms ranges from mild to severe and usually comes on gradually, but sometimes, it can come on suddenly without warning. The cause of ileal Crohn&#x2019;s disease is still unknown, but it is often assumed that a virus or bacteria may trigger Crohn&#x2019;s disease. This paper presents a case study of ileal Crohn&#x2019;s disease. As shown in <xref ref-type="table" rid="T9">Table 9</xref>, it is clear that 18 of the top 20 microorganisms associated with ileal Crohn&#x2019;s disease have been confirmed in the published literature.</p>
<table-wrap id="T9" position="float">
<label>TABLE 9</label>
<caption>
<p>The top 20 Ileal Crohn&#x2019;s disease microbes predicted by DuGEL.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Rank</th>
<th align="left">Microbe</th>
<th align="left">Evidence</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">1</td>
<td align="left">Actinobacteria</td>
<td align="left">PMID: 33975420</td>
</tr>
<tr>
<td align="left">2</td>
<td align="left">Bacteroidetes</td>
<td align="left">PMID: 32448900</td>
</tr>
<tr>
<td align="left">3</td>
<td align="left">
<italic>Clostridium</italic> coccoides</td>
<td align="left">PMID: 22719818</td>
</tr>
<tr>
<td align="left">4</td>
<td align="left">Firmicutes</td>
<td align="left">PMID: 33975420</td>
</tr>
<tr>
<td align="left">5</td>
<td align="left">Prevotella</td>
<td align="left">PMID: 35967326</td>
</tr>
<tr>
<td align="left">6</td>
<td align="left">Proteobacteria</td>
<td align="left">PMID: 31530835</td>
</tr>
<tr>
<td align="left">7</td>
<td align="left">Enterobacteriaceae</td>
<td align="left">PMID: 36268225</td>
</tr>
<tr>
<td align="left">8</td>
<td align="left">Lachnospiraceae</td>
<td align="left">Unconfirmeda</td>
</tr>
<tr>
<td align="left">9</td>
<td align="left">
<italic>Bacteroides</italic> ovatus</td>
<td align="left">PMID: 26275394</td>
</tr>
<tr>
<td align="left">10</td>
<td align="left">
<italic>Bacteroides</italic> uniformis</td>
<td align="left">PMID: 33102745</td>
</tr>
<tr>
<td align="left">11</td>
<td align="left">
<italic>Bacteroides</italic> vulgatus</td>
<td align="left">PMID: 26275394</td>
</tr>
<tr>
<td align="left">12</td>
<td align="left">Clostridia</td>
<td align="left">PMID: 30818349</td>
</tr>
<tr>
<td align="left">13</td>
<td align="left">Faecalibacterium prausnitzii</td>
<td align="left">PMID: 18936492</td>
</tr>
<tr>
<td align="left">14</td>
<td align="left">
<italic>Clostridium</italic> leptum</td>
<td align="left">PMID: 16188921</td>
</tr>
<tr>
<td align="left">15</td>
<td align="left" style="color:#auto">
<italic>Lactobacillus</italic>
</td>
<td align="left">PMID: 18839424</td>
</tr>
<tr>
<td align="left">16</td>
<td align="left" style="color:#auto">
<italic>Klebsiella</italic>
</td>
<td align="left">PMID: 24223596</td>
</tr>
<tr>
<td align="left">17</td>
<td align="left" style="color:#auto">
<italic>Staphylococcus</italic>
</td>
<td align="left">PMID: 23885156</td>
</tr>
<tr>
<td align="left">18</td>
<td align="left">Veillonella</td>
<td align="left">Unconfirmeda</td>
</tr>
<tr>
<td align="left">19</td>
<td align="left" style="color:#auto">
<italic>Bacteroides</italic>
</td>
<td align="left">PMID: 21484962</td>
</tr>
<tr>
<td align="left">20</td>
<td align="left" style="color:#auto">
<italic>Escherichia coli</italic>
</td>
<td align="left">PMID: 15300573</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s5">
<title>5 Discussion and conclusion</title>
<p>Microorganisms play an important role in our lives and exist in countless numbers and diversity. Investigating the potential link between microorganisms and diseases cannot only contribute to the discovery of new therapeutic approaches and preventive strategies but also help advance the field of microbiology and medicine.</p>
<p>In this study, we propose a deep learning model called DuGEL to predict potential microbial disease associations. The DuGEL model combines graph convolutional neural network (GCN), graph attention network (GAT), and long-short-term memory network (LSTM) to efficiently capture and fuse the complex relationships between microbes and diseases. With the dual-channel structure, DuGEL can extract local and global features in the graph structure and enhance the model&#x2019;s ability to capture critical nodes by assigning different importance weights to neighboring nodes through the attention mechanism. Our comprehensive experiments and case studies consistently show that DuGEL performs very satisfactorily in terms of prediction accuracy.</p>
<p>Although the DuGEL model has been very effective in studying the relationship between microbes and disease, it still has some limitations. Currently, the model relies heavily on the HMDAD and Disbiome datasets. Therefore, future work could focus on expanding the datasets to capture microbe-disease associations more comprehensively and accurately. In addition, the DuGEL model can be applied to drug-target interaction prediction and gene-disease association prediction to validate its broad applicability and effectiveness.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material, further inquiries can be directed to the corresponding authors.</p>
</sec>
<sec sec-type="author-contributions" id="s7">
<title>Author contributions</title>
<p>JW: Writing&#x2013;original draft, Writing&#x2013;review and editing, Conceptualization, Investigation. LX: Data curation, Formal Analysis, Writing&#x2013;review and editing. LF: Investigation, Software, Writing&#x2013;review and editing. LW: Funding acquisition, Methodology, Resources, Writing&#x2013;review and editing. XZ: Funding acquisition, Supervision, Validation, Writing&#x2013;review and editing.</p>
</sec>
<sec sec-type="funding-information" id="s8">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research, authorship, and/or publication of this article. This work was partly sponsored by the National Natural Science Foundation of China (No. 62272064), the Scientific Research Program of Education Department of Hunan Province (No. 23A0514), the Natural Science Foundation of Hunan Province (No. 2023JJ60185), the Natural Science Foundation of Hunan Province Program (No. 2022JJ50138), the Application-oriented Special Disciplines, Double First-Class University Project of Hunan Province [Xiangjiaotong (2018) 469] and the Hunan Provincial Education Department Scientific Research Project (No. 20B080).</p>
</sec>
<sec sec-type="COI-statement" id="s9">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="s10">
<title>Generative AI statement</title>
<p>The author(s) declare that no Generative AI was used in the creation of this manuscript.</p>
</sec>
<sec sec-type="disclaimer" id="s11">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Abuin-Denis</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Piloto-Sardi&#xf1;as</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Ma&#xee;tre</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Wu-Chuang</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Mateos-Hern&#xe1;ndez</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Obregon</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <article-title>Exploring the impact of Anaplasma phagocytophilum on colonization resistance of Ixodes scapularis microbiota using network node manipulation</article-title>. <source>Curr. Res. Parasitol. and Vector-Borne Dis.</source> <volume>5</volume>, <fpage>100177</fpage>. <pub-id pub-id-type="doi">10.1016/j.crpvbd.2024.100177</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Afshari</surname>
<given-names>S. S.</given-names>
</name>
<name>
<surname>Enayatollahi</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Liang</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Machine learning-based methods in structural reliability analysis: a review</article-title>. <source>Reliab. Eng. and Syst. Saf.</source> <volume>219</volume>, <fpage>108223</fpage>. <pub-id pub-id-type="doi">10.1016/j.ress.2021.108223</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Afzal</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Ziapour</surname>
<given-names>B. M.</given-names>
</name>
<name>
<surname>Shokri</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Shakibi</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Sobhani</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Building energy consumption prediction using multilayer perceptron neural network-assisted models; comparison of different optimization algorithms</article-title>. <source>Energy</source> <volume>282</volume>, <fpage>128446</fpage>. <pub-id pub-id-type="doi">10.1016/j.energy.2023.128446</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Anisman</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Hayley</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Kusnecov</surname>
<given-names>A. W.</given-names>
</name>
</person-group> (<year>2018</year>). <source>The immune system and mental health</source>. <publisher-name>Academic Press</publisher-name>. <pub-id pub-id-type="doi">10.1111/cei.13334</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Baranwal</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Clark</surname>
<given-names>R. L.</given-names>
</name>
<name>
<surname>Thompson</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Hero</surname>
<given-names>A. O.</given-names>
</name>
<name>
<surname>Venturelli</surname>
<given-names>O. S.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Recurrent neural networks enable design of multifunctional synthetic human gut microbiome dynamics</article-title>. <source>Elife</source> <volume>11</volume>, <fpage>e73870</fpage>. <pub-id pub-id-type="doi">10.7554/eLife.73870</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Barr</surname>
<given-names>J. J.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>A bacteriophages journey through the human body</article-title>. <source>Immunol. Rev.</source> <volume>279</volume> (<issue>1</issue>), <fpage>106</fpage>&#x2013;<lpage>122</lpage>. <pub-id pub-id-type="doi">10.1111/imr.12565</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bessadok</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Mahjoub</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Rekik</surname>
<given-names>I.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Graph neural networks in network neuroscience</article-title>. <source>IEEE Trans. Pattern Analysis Mach. Intell.</source> <volume>45</volume> (<issue>5</issue>), <fpage>5833</fpage>&#x2013;<lpage>5848</lpage>. <pub-id pub-id-type="doi">10.1109/tpami.2022.3209686</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bocci</surname>
<given-names>V.</given-names>
</name>
</person-group> (<year>1992</year>). <article-title>The neglected organ: bacterial flora has a crucial immunostimulatory role</article-title>. <source>Perspect. Biol. Med.</source> <volume>35</volume> (<issue>2</issue>), <fpage>251</fpage>&#x2013;<lpage>260</lpage>. <pub-id pub-id-type="doi">10.1353/pbm.1992.0004</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Burr</surname>
<given-names>A. H.</given-names>
</name>
<name>
<surname>Bhattacharjee</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Hand</surname>
<given-names>T. W.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Nutritional modulation of the microbiome and immune response</article-title>. <source>J. Immunol.</source> <volume>205</volume> (<issue>6</issue>), <fpage>1479</fpage>&#x2013;<lpage>1487</lpage>. <pub-id pub-id-type="doi">10.4049/jimmunol.2000419</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chapsa</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>R&#xf6;nsch</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>L&#xf6;we</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Gunzer</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Beissert</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Bauer</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>The role of bacterial colonisation in severity, symptoms and aetiology of hand eczema: the importance of <italic>Staphylococcus aureus</italic> and presence of commensal skin flora</article-title>. <source>Contact Dermat.</source> <volume>89</volume> (<issue>4</issue>), <fpage>270</fpage>&#x2013;<lpage>276</lpage>. <pub-id pub-id-type="doi">10.1111/cod.14384</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Chatzianastasis</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Lutzeyer</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Dasoulas</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Vazirgiannis</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2023</year>). &#x201c;<article-title>Graph ordering attention networks</article-title>,&#x201d; in <conf-name>Proceedings of the AAAI Conference on Artificial Intelligence</conf-name>, <conf-loc>USA</conf-loc>, <conf-date>February 2023</conf-date>, <fpage>7006</fpage>&#x2013;<lpage>7014</lpage>. <pub-id pub-id-type="doi">10.1609/aaai.v37i6.25856</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Feng</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Ling</surname>
<given-names>G.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Catgcn: graph convolutional networks with categorical node features</article-title>, <source>IEEE Trans. Knowl. Data Eng.</source> <volume>35</volume>(<issue>4</issue>): <fpage>3500</fpage>&#x2013;<lpage>3511</lpage>. <pub-id pub-id-type="doi">10.1016/j.apcatb.2018.01.024</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Y.-A.</given-names>
</name>
<name>
<surname>You</surname>
<given-names>Z.-H.</given-names>
</name>
<name>
<surname>Yan</surname>
<given-names>G.-Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X.-S.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>A novel approach based on KATZ measure to predict associations of human microbiota with non-infectious diseases</article-title>. <source>Bioinformatics</source> <volume>33</volume> (<issue>5</issue>), <fpage>733</fpage>&#x2013;<lpage>739</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btw715</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Cui</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Jia</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>T.-Y.</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Belongie</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Class-balanced loss based on effective number of samples</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE/CVF conference on computer vision and pattern recognition</conf-name>, <conf-loc>USA</conf-loc>, <conf-date>June 15th, 2025</conf-date>, <fpage>9268</fpage>&#x2013;<lpage>9277</lpage>. <pub-id pub-id-type="doi">10.1109/CVPR.2019.00949</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Du</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Bai</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Learning structure perception MLPs on graphs: a layer-wise graph knowledge distillation framework</article-title>, <source>Int. J. Mach. Learn. Cybern.</source> <volume>15</volume> <fpage>4357</fpage>&#x2013;<lpage>4372</lpage>. <pub-id pub-id-type="doi">10.1007/s13042-024-02150-2</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fakhoury</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Negrulj</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Mooranian</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Al-Salami</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Inflammatory bowel disease: clinical aspects and treatments</article-title>. <source>J. Inflamm. Res.</source> <volume>7</volume>, <fpage>113</fpage>&#x2013;<lpage>120</lpage>. <pub-id pub-id-type="doi">10.2147/JIR.S65979</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fan</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Inferring disease-associated microbes based on multi-data integration and network consistency projection</article-title>. <source>Front. Bioeng. Biotechnol.</source> <volume>8</volume>, <fpage>831</fpage>. <pub-id pub-id-type="doi">10.3389/fbioe.2020.00831</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Feng</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Shi</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>One-dimensional VGGNet for high-dimensional data</article-title>. <source>Appl. Soft Comput.</source> <volume>135</volume>, <fpage>110035</fpage>. <pub-id pub-id-type="doi">10.1016/j.asoc.2023.110035</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Flowers</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Grice</surname>
<given-names>E. A.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>The skin microbiota: balancing risk and reward</article-title>. <source>Cell. host and microbe</source> <volume>28</volume> (<issue>2</issue>), <fpage>190</fpage>&#x2013;<lpage>200</lpage>. <pub-id pub-id-type="doi">10.1016/j.chom.2020.06.017</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Tian</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Cost-sensitive learning with modified Stein loss function</article-title>. <source>Neurocomputing</source> <volume>525</volume>, <fpage>57</fpage>&#x2013;<lpage>75</lpage>. <pub-id pub-id-type="doi">10.1016/j.neucom.2023.01.052</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gilbert</surname>
<given-names>J. A.</given-names>
</name>
<name>
<surname>Quinn</surname>
<given-names>R. A.</given-names>
</name>
<name>
<surname>Debelius</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>Z. Z.</given-names>
</name>
<name>
<surname>Morton</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Garg</surname>
<given-names>N.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>Microbiome-wide association studies link dynamic microbial consortia to disease</article-title>. <source>Nature</source> <volume>535</volume> (<issue>7610</issue>), <fpage>94</fpage>&#x2013;<lpage>103</lpage>. <pub-id pub-id-type="doi">10.1038/nature18850</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Grasso</surname>
<given-names>L. L.</given-names>
</name>
<name>
<surname>Martino</surname>
<given-names>D. C.</given-names>
</name>
<name>
<surname>Alduina</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Production of antibacterial compounds from Actinomycetes</article-title>. <source>Actinobacteria-basics Biotechnol. Appl.</source> <volume>7</volume>, <fpage>177</fpage>&#x2013;<lpage>198</lpage>. <pub-id pub-id-type="doi">10.5772/61525</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Heintz-Buschart</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Wilmes</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Human gut microbiome: function matters</article-title>. <source>Trends Microbiol.</source> <volume>26</volume> (<issue>7</issue>), <fpage>563</fpage>&#x2013;<lpage>574</lpage>. <pub-id pub-id-type="doi">10.1016/j.tim.2017.11.002</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hoffmann</surname>
<given-names>A. R.</given-names>
</name>
<name>
<surname>Proctor</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Surette</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Suchodolski</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>The microbiome: the trillions of microorganisms that maintain health and cause disease in humans and companion animals</article-title>, <source>Veterinary pathol.</source> <volume>53</volume>(<issue>1</issue>): <fpage>10</fpage>&#x2013;<lpage>21</lpage>. <pub-id pub-id-type="doi">10.1177/0300985815595517</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Islam</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Haque</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Rahman</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Hossen</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Reza</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Barua</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>A review on measures to rejuvenate immune system: Natural mode of protection against coronavirus infection</article-title>. <source>Front. Immunol.</source> <volume>13</volume>, <fpage>837290</fpage>. <pub-id pub-id-type="doi">10.3389/fimmu.2022.837290</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jagannathan</surname>
<given-names>S. V.</given-names>
</name>
<name>
<surname>Manemann</surname>
<given-names>E. M.</given-names>
</name>
<name>
<surname>Rowe</surname>
<given-names>S. E.</given-names>
</name>
<name>
<surname>Callender</surname>
<given-names>M. C.</given-names>
</name>
<name>
<surname>Soto</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Marine actinomycetes, new sources of biotechnological products</article-title>. <source>Mar. Drugs</source> <volume>19</volume> (<issue>7</issue>), <fpage>365</fpage>. <pub-id pub-id-type="doi">10.3390/md19070365</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Janssens</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Nielandt</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Bronselaer</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Debunne</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Verbeke</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Wynendaele</surname>
<given-names>E.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>Disbiome database: linking the microbiome to disease</article-title>. <source>BMC Microbiol.</source> <volume>18</volume>, <fpage>50</fpage>&#x2013;<lpage>56</lpage>. <pub-id pub-id-type="doi">10.1186/s12866-018-1197-5</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Jin</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>Z.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). &#x201c;<article-title>Bite-gcn: a new GCN architecture via bidirectional convolution of topology and features on text-rich networks</article-title>,&#x201d; in <conf-name>Proceedings of the 14th ACM International Conference on Web Search and Data Mining</conf-name>, <conf-loc>China</conf-loc>, <conf-date>12 October 2021</conf-date>, <fpage>157</fpage>&#x2013;<lpage>165</lpage>. <pub-id pub-id-type="doi">10.1145/3437963.3441774</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jin</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Duan</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Z.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>mBodyMap: a curated database for microbes across human body and their associations with health and diseases</article-title>. <source>Nucleic Acids Res.</source> <volume>50</volume> (<issue>D1</issue>), <fpage>D808</fpage>&#x2013;<lpage>D816</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkab973</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jung</surname>
<given-names>S. M.</given-names>
</name>
<name>
<surname>Park</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Stabilization of a bias-compensated normalized least-mean-square algorithm for noisy inputs</article-title>. <source>IEEE Trans. Signal Process.</source> <volume>65</volume> (<issue>11</issue>), <fpage>2949</fpage>&#x2013;<lpage>2961</lpage>. <pub-id pub-id-type="doi">10.1109/TSP.2017.2675865</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kumar</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Mishra</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Singh</surname>
<given-names>S. S.</given-names>
</name>
<name>
<surname>Singh</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Biswas</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Link prediction in complex networks based on significance of higher-order path index (SHOPI)</article-title>. <source>Phys. A Stat. Mech. its Appl.</source> <volume>545</volume>, <fpage>123790</fpage>. <pub-id pub-id-type="doi">10.1016/j.physa.2020.124289</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhen</surname>
<given-names>Z. Z.</given-names>
</name>
<name>
<surname>Tan</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Identifying microbe-disease association based on a novel back-propagation neural network model</article-title>. <source>IEEE/ACM Trans. Comput. Biol. Bioinforma.</source> <volume>PP</volume> (<issue>99</issue>), <fpage>1</fpage>. <pub-id pub-id-type="doi">10.1109/TCBB.2020.2986459</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Ling</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Peng</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>A survey of graph neural network based recommendation in social networks</article-title>. <source>Neurocomputing</source> <volume>549</volume>, <fpage>126441</fpage>. <pub-id pub-id-type="doi">10.1016/j.neucom.2023.126441</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liang</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Leung</surname>
<given-names>R.K.-K.</given-names>
</name>
<name>
<surname>Guan</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Au</surname>
<given-names>W. W.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Involvement of gut microbiome in human health and disease: brief overview, knowledge gaps and research opportunities</article-title>. <source>Gut Pathog.</source> <volume>10</volume>, <fpage>3</fpage>&#x2013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1186/s13099-018-0230-4</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Qi</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Qin</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Shi</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Jia</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Path aggregation network for instance segmentation</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE conference on computer vision and pattern recognition</conf-name>, <conf-loc>USA</conf-loc>, <conf-date>18-23 June 2018</conf-date>, <fpage>8759</fpage>&#x2013;<lpage>8768</lpage>. <pub-id pub-id-type="doi">10.1109/CVPR.2018.00913</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Long</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Xia</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Predicting human microbe&#x2013;disease associations via graph attention networks with inductive matrix completion</article-title>. <source>Briefings Bioinforma.</source> <volume>22</volume> (<issue>3</issue>), <fpage>bbaa146</fpage>. <pub-id pub-id-type="doi">10.1093/bib/bbaa146</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Luo</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Long</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>NTSHMDA: prediction of human microbe-disease association based on random walk by integrating network topological similarity</article-title>. <source>IEEE/ACM Trans. Comput. Biol. Bioinforma.</source> <volume>17</volume> (<issue>4</issue>), <fpage>1341</fpage>&#x2013;<lpage>1351</lpage>. <pub-id pub-id-type="doi">10.1109/TCBB.2018.2883041</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ma</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zeng</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Geng</surname>
<given-names>B.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>An analysis of human microbe&#x2013;disease associations</article-title>. <source>Briefings Bioinforma.</source> <volume>18</volume> (<issue>1</issue>), <fpage>85</fpage>&#x2013;<lpage>97</lpage>. <pub-id pub-id-type="doi">10.1093/bib/bbw005</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ma</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Q.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Generalized matrix factorization based on weighted hypergraph learning for microbe-drug association prediction</article-title>. <source>Comput. Biol. Med.</source> <volume>145</volume>, <fpage>105503</fpage>. <pub-id pub-id-type="doi">10.1016/j.compbiomed.2022.105503</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Malla</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Dubey</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Kumar</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Yadav</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Hashem</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Abd_Allah</surname>
<given-names>E. F.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Exploring the human microbiome: the potential future role of next-generation sequencing in disease diagnosis and treatment</article-title>. <source>Front. Immunol.</source> <volume>9</volume>, <fpage>2868</fpage>. <pub-id pub-id-type="doi">10.3389/fimmu.2018.02868</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Mao</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Mohri</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Zhong</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2023</year>). &#x201c;<article-title>Cross-entropy loss functions: theoretical analysis and applications</article-title>,&#x201d; in <conf-name>International Conference on Machine Learning</conf-name>, <conf-loc>China</conf-loc>, <conf-date>14 Apr 2023</conf-date> (<publisher-name>PMLR</publisher-name>), <fpage>23803</fpage>&#x2013;<lpage>23828</lpage>. <pub-id pub-id-type="doi">10.48550/ARXIV.2304.07288</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Marcos-Zambrano</surname>
<given-names>L. J.</given-names>
</name>
<name>
<surname>Karaduzovic-Hadziabdic</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Loncar Turukalo</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Przymus</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Trajkovik</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Aasmets</surname>
<given-names>O.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Applications of machine learning in human microbiome studies: a review on feature selection, biomarker identification, disease prediction and treatment</article-title>. <source>Front. Microbiol.</source> <volume>12</volume>, <fpage>634511</fpage>. <pub-id pub-id-type="doi">10.3389/fmicb.2021.634511</pub-id>
</citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Marsh</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Zaura</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Dental biofilm: ecological interactions in health and disease</article-title>. <source>J. Clin. periodontology</source> <volume>44</volume>, <fpage>S12</fpage>&#x2013;<lpage>S22</lpage>. <pub-id pub-id-type="doi">10.1111/jcpe.12679</pub-id>
</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Miller</surname>
<given-names>A. W.</given-names>
</name>
<name>
<surname>Penniston</surname>
<given-names>K. L.</given-names>
</name>
<name>
<surname>Fitzpatrick</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Agudelo</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Tasian</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Lange</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Mechanisms of the intestinal and urinary microbiome in kidney stone disease</article-title>. <source>Nat. Rev. Urol.</source> <volume>19</volume> (<issue>12</issue>), <fpage>695</fpage>&#x2013;<lpage>707</lpage>. <pub-id pub-id-type="doi">10.1038/s41585-022-00647-5</pub-id>
</citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Montaner</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Ramiro</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Simats</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Tiedt</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Makris</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Jickling</surname>
<given-names>G. C.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Multilevel omics for the discovery of biomarkers and therapeutic targets for stroke</article-title>. <source>Nat. Rev. Neurol.</source> <volume>16</volume> (<issue>5</issue>), <fpage>247</fpage>&#x2013;<lpage>264</lpage>. <pub-id pub-id-type="doi">10.1038/s41582-020-0350-6</pub-id>
</citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Munikoti</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Agarwal</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Das</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Halappanavar</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Natarajan</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Challenges and opportunities in deep reinforcement learning with graph neural networks: a comprehensive review of algorithms and applications</article-title>. <source>IEEE Trans. neural Netw. Learn. Syst.</source> <volume>35</volume>, <fpage>15051</fpage>&#x2013;<lpage>15071</lpage>. <pub-id pub-id-type="doi">10.1109/tnnls.2023.3283523</pub-id>
</citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Najafabadi</surname>
<given-names>M. M.</given-names>
</name>
<name>
<surname>Villanustre</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Khoshgoftaar</surname>
<given-names>T. M.</given-names>
</name>
<name>
<surname>Seliya</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Wald</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Muharemagic</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Deep learning applications and challenges in big data analytics</article-title>. <source>J. big data</source> <volume>2</volume>, <fpage>1</fpage>&#x2013;<lpage>21</lpage>. <pub-id pub-id-type="doi">10.1186/s40537-014-0007-7</pub-id>
</citation>
</ref>
<ref id="B48">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pickard</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Zeng</surname>
<given-names>M. Y.</given-names>
</name>
<name>
<surname>Caruso</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>N&#xfa;&#xf1;ez</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Gut microbiota: role in pathogen colonization, immune responses, and inflammatory disease</article-title>, <source>Immunol. Rev.</source> <volume>279</volume>(<issue>1</issue>): <fpage>70</fpage>&#x2013;<lpage>89</lpage>. <pub-id pub-id-type="doi">10.1111/imr.12567</pub-id>
</citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sanz</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Olivares</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Moya-P&#xe9;rez</surname>
<given-names>&#xc1;.</given-names>
</name>
<name>
<surname>Agostoni</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Understanding the role of gut microbiome in metabolic disease risk</article-title>. <source>Pediatr. Res.</source> <volume>77</volume> (<issue>1</issue>), <fpage>236</fpage>&#x2013;<lpage>244</lpage>. <pub-id pub-id-type="doi">10.1038/pr.2014.170</pub-id>
</citation>
</ref>
<ref id="B50">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shen</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Xiao</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>ADMM-HFNet: a matrix decomposition-based deep approach for hyperspectral image fusion</article-title>. <source>IEEE Trans. Geoscience Remote Sens.</source> <volume>60</volume>, <fpage>1</fpage>&#x2013;<lpage>17</lpage>. <pub-id pub-id-type="doi">10.1109/TGRS.2021.3112181</pub-id>
</citation>
</ref>
<ref id="B51">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Shen</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>A novel approach based on bi-random walk to predict microbe-disease associations</article-title>,&#x201d; <conf-name>Intelligent Computing Methodologies: 14th International Conference, ICIC 2018</conf-name>, <conf-loc>Wuhan, China</conf-loc>, <conf-date>August 15-18, 2018</conf-date>. <publisher-name>Springer</publisher-name>, <fpage>746</fpage>&#x2013;<lpage>752</lpage>. <pub-id pub-id-type="doi">10.1039/C8EE02656D</pub-id>
</citation>
</ref>
<ref id="B52">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Shen</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Bao</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>CMFHMDA: collaborative matrix factorization for human microbe-disease association prediction</article-title>,&#x201d; in <conf-name>Intelligent Computing Theories and Application: 13th International Conference, ICIC 2017, Liverpool</conf-name>, <conf-loc>UK</conf-loc>, <conf-date>August 7-10, 2017</conf-date> (<publisher-name>Springer</publisher-name>), <fpage>261</fpage>&#x2013;<lpage>269</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-319-63312-1_24</pub-id>
</citation>
</ref>
<ref id="B53">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Stamatelou</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Goldfarb</surname>
<given-names>D. S.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Epidemiology of kidney stones, Healthcare</article-title>. <source>MDPI</source>, <fpage>424</fpage>. <pub-id pub-id-type="doi">10.3390/healthcare11030424</pub-id>
</citation>
</ref>
<ref id="B54">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Stevens</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Obstructive kidney disease</article-title>. <source>Nurs. Clin.</source> <volume>53</volume> (<issue>4</issue>), <fpage>569</fpage>&#x2013;<lpage>578</lpage>. <pub-id pub-id-type="doi">10.1016/j.cnur.2018.07.007</pub-id>
</citation>
</ref>
<ref id="B55">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Z.-A.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Wen</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>LRLSHMDA: laplacian regularized least squares for human microbe&#x2013;disease association prediction</article-title>. <source>Sci. Rep.</source> <volume>7</volume> (<issue>1</issue>), <fpage>7601</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-017-08127-2</pub-id>
</citation>
</ref>
<ref id="B56">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Mo</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Jin</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Exploring the network structure and nodal centrality of China&#x2019;s air transport network: a complex network approach</article-title>. <source>J. Transp. Geogr.</source> <volume>19</volume> (<issue>4</issue>), <fpage>712</fpage>&#x2013;<lpage>721</lpage>. <pub-id pub-id-type="doi">10.1016/j.jtrangeo.2010.08.012</pub-id>
</citation>
</ref>
<ref id="B57">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Feng</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Yuan</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>A bidirectional label propagation based computational model for potential microbe-disease association prediction</article-title>. <source>Front. Microbiol.</source> <volume>10</volume>, <fpage>684</fpage>. <pub-id pub-id-type="doi">10.3389/fmicb.2019.00684</pub-id>
</citation>
</ref>
<ref id="B58">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Kuang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zeng</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Graph convolutional neural network with multi-layer attention mechanism for predicting potential microbe-disease associations</article-title>. <source>Curr. Bioinforma.</source> <volume>18</volume> (<issue>6</issue>), <fpage>497</fpage>&#x2013;<lpage>508</lpage>. <pub-id pub-id-type="doi">10.2174/1574893618666230316113621</pub-id>
</citation>
</ref>
<ref id="B59">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Shi</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Meta-learning based spatial-temporal graph attention network for traffic signal control</article-title>. <source>Knowledge-based Syst.</source> <volume>250</volume>, <fpage>109166</fpage>. <pub-id pub-id-type="doi">10.1016/j.knosys.2022.109166</pub-id>
</citation>
</ref>
<ref id="B60">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Feng</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Liang</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>An attention&#x2010;based CNN&#x2010;LSTM&#x2010;BiLSTM model for short&#x2010;term electric load forecasting in integrated energy system</article-title>. <source>Int. Trans. Electr. Energy Syst.</source> <volume>31</volume> (<issue>1</issue>), <fpage>e12637</fpage>. <pub-id pub-id-type="doi">10.1002/2050-7038.12637</pub-id>
</citation>
</ref>
<ref id="B61">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Network-based methods for prediction of drug-target interactions</article-title>. <source>Front. Pharmacol.</source> <volume>9</volume>, <fpage>1134</fpage>. <pub-id pub-id-type="doi">10.3389/fphar.2018.01134</pub-id>
</citation>
</ref>
<ref id="B63">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yoo</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Hong</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Prediction of cognitive load from electroencephalography signals using long short-term memory network</article-title>. <source>Bioengineering</source> <volume>10</volume> (<issue>3</issue>), <fpage>361</fpage>. <pub-id pub-id-type="doi">10.3390/bioengineering10030361</pub-id>
</citation>
</ref>
<ref id="B64">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Hua</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Liang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Sparse graph cascade multi-kernel fusion contrastive learning for microbe&#x2013;disease association prediction</article-title>. <source>Expert Syst. Appl.</source> <volume>252</volume>, <fpage>124092</fpage>. <pub-id pub-id-type="doi">10.1016/j.eswa.2024.124092</pub-id>
</citation>
</ref>
<ref id="B65">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yuan</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Ye</surname>
<given-names>L.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>Multiscale dynamic feature learning for quality prediction based on hierarchical sequential generative network</article-title>. <source>IEEE Sensors J.</source> <volume>23</volume>, <fpage>19561</fpage>&#x2013;<lpage>19570</lpage>. <pub-id pub-id-type="doi">10.1109/jsen.2023.3290163</pub-id>
</citation>
</ref>
<ref id="B66">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Deng</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Multimodal intelligence: representation learning, information fusion, and applications</article-title>. <source>IEEE J. Sel. Top. Signal Process.</source> <volume>14</volume> (<issue>3</issue>), <fpage>478</fpage>&#x2013;<lpage>493</lpage>. <pub-id pub-id-type="doi">10.1109/JSTSP.2020.2987728</pub-id>
</citation>
</ref>
<ref id="B67">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>The bi-direction similarity integration method for predicting microbe-disease associations</article-title>. <source>Ieee Access2018</source> <volume>6</volume>, <fpage>38052</fpage>&#x2013;<lpage>38061</lpage>. <pub-id pub-id-type="doi">10.1109/ACCESS.2018.2851751</pub-id>
</citation>
</ref>
<ref id="B68">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Fan</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Deng</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>KATZLGO: large-scale prediction of LncRNA functions by using the KATZ measure based on multiple networks</article-title>. <source>IEEE/ACM Trans. Comput. Biol. Bioinforma.</source> <volume>16</volume> (<issue>2</issue>), <fpage>407</fpage>&#x2013;<lpage>416</lpage>. <pub-id pub-id-type="doi">10.1109/tcbb.2017.2704587</pub-id>
</citation>
</ref>
<ref id="B69">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Dynamic multichannel fusion mechanism based on a graph attention network and BERT for aspect-based sentiment classification</article-title>. <source>Appl. Intell.</source> <volume>53</volume> (<issue>6</issue>), <fpage>6800</fpage>&#x2013;<lpage>6813</lpage>. <pub-id pub-id-type="doi">10.1007/s10489-022-03851-3</pub-id>
</citation>
</ref>
<ref id="B70">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhu</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Human gut microbiome: the second genome of human body</article-title>. <source>Protein and Cell.</source> <volume>1</volume> (<issue>8</issue>), <fpage>718</fpage>&#x2013;<lpage>725</lpage>. <pub-id pub-id-type="doi">10.1007/s13238-010-0093-z</pub-id>
</citation>
</ref>
<ref id="B71">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zimmermann</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Messina</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Mohn</surname>
<given-names>W. W.</given-names>
</name>
<name>
<surname>Finlay</surname>
<given-names>B. B.</given-names>
</name>
<name>
<surname>Curtis</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Association between the intestinal microbiota and allergic sensitization, eczema, and asthma: a systematic review</article-title>. <source>J. Allergy Clin. Immunol.</source> <volume>143</volume> (<issue>2</issue>), <fpage>467</fpage>&#x2013;<lpage>485</lpage>. <pub-id pub-id-type="doi">10.1016/j.jaci.2018.09.025</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>