<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Microbiol.</journal-id>
<journal-title>Frontiers in Microbiology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Microbiol.</abbrev-journal-title>
<issn pub-type="epub">1664-302X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fmicb.2024.1483983</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Microbiology</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Predicting microbe-disease associations via graph neural network and contrastive learning</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" equal-contrib="yes">
<name><surname>Jiang</surname> <given-names>Cong</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn001"><sup>&#x02020;</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author" equal-contrib="yes">
<name><surname>Feng</surname> <given-names>Junxuan</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn001"><sup>&#x02020;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2864002/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author" equal-contrib="yes">
<name><surname>Shan</surname> <given-names>Bingshen</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn001"><sup>&#x02020;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2866920/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Chen</surname> <given-names>Qiyue</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Yang</surname> <given-names>Jian</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1076463/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Wang</surname> <given-names>Gang</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2558488/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Peng</surname> <given-names>Xiaogang</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Li</surname> <given-names>Xiaozheng</given-names></name>
<xref ref-type="aff" rid="aff6"><sup>6</sup></xref>
<xref ref-type="aff" rid="aff7"><sup>7</sup></xref>
<xref ref-type="corresp" rid="c002"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2233538/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>College of Computer Science and Software Engineering, Shenzhen University</institution>, <addr-line>Shenzhen</addr-line>, <country>China</country></aff>
<aff id="aff2"><sup>2</sup><institution>National Engineering Laboratory for Big Data System Computing Technology, Shenzhen University</institution>, <addr-line>Shenzhen</addr-line>, <country>China</country></aff>
<aff id="aff3"><sup>3</sup><institution>College of Management, Shenzhen University</institution>, <addr-line>Shenzhen</addr-line>, <country>China</country></aff>
<aff id="aff4"><sup>4</sup><institution>Beijing Key Laboratory of Mental Disorders, National Clinical Research Center for Mental Disorders and National Center for Mental Disorders, Beijing Anding Hospital, Capital Medical University</institution>, <addr-line>Beijing</addr-line>, <country>China</country></aff>
<aff id="aff5"><sup>5</sup><institution>Advanced Innovation Center for Human Brain Protection, Capital Medical University</institution>, <addr-line>Beijing</addr-line>, <country>China</country></aff>
<aff id="aff6"><sup>6</sup><institution>College of Life Sciences and Oceanography, Shenzhen University</institution>, <addr-line>Shenzhen</addr-line>, <country>China</country></aff>
<aff id="aff7"><sup>7</sup><institution>JCY Biotech Ltd., Pingshan Translational Medicine Center, Shenzhen Bay Laboratory</institution>, <addr-line>Shenzhen</addr-line>, <country>China</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Liang Wang, Guangdong Provincial People&#x00027;s Hospital, China</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Guanghui Li, East China Jiaotong University, China</p>
<p>Lihong Peng, Hunan University of Technology, China</p></fn>
<corresp id="c001">&#x0002A;Correspondence: Xiaogang Peng <email>pengxg&#x00040;szu.edu.cn</email></corresp>
<corresp id="c002">Xiaozheng Li <email>li.xiaozheng&#x00040;szu.edu.cn</email></corresp>
<fn fn-type="equal" id="fn001"><p>&#x02020;These authors have contributed equally to this work</p></fn></author-notes>
<pub-date pub-type="epub">
<day>13</day>
<month>12</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>15</volume>
<elocation-id>1483983</elocation-id>
<history>
<date date-type="received">
<day>22</day>
<month>08</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>14</day>
<month>10</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2024 Jiang, Feng, Shan, Chen, Yang, Wang, Peng and Li.</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Jiang, Feng, Shan, Chen, Yang, Wang, Peng and Li</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract>
<p>In the contemporary field of life sciences, researchers have gradually recognized the critical role of microbes in maintaining human health. However, traditional biological experimental methods for validating the association between microbes and diseases are both time-consuming and costly. Therefore, developing effective computational methods to predict potential associations between microbes and diseases is an important and urgent task. In this study, we propose a novel computational framework, called GCATCMDA, for forecasting potential associations between microbes and diseases. Firstly, we construct Gaussian kernel similarity networks for microbes and diseases using known microbe-disease association data. Then, we design a feature encoder that combines graph convolutional network and graph attention mechanism to learn the node features of networks, and propose a feature dual-fusion module to effectively integrate node features from each layer&#x00027;s output. Next, we apply the feature encoder separately to the microbe similarity network, disease similarity network, and microbe-disease association network, and enhance the consistency of features for the same nodes across different association networks through contrastive learning. Finally, we pass the microbe and disease features into an inner product decoder to obtain the association scores between them. Experimental results demonstrate that the GCATCMDA model achieves superior predictive performance compared to previous methods. Furthermore, case studies confirm that GCATCMDA is an effective tool for predicting microbe-disease associations in real situations.</p></abstract>
<kwd-group>
<kwd>microbe-disease associations</kwd>
<kwd>graph convolutional network</kwd>
<kwd>graph attention mechanism</kwd>
<kwd>contrastive learning</kwd>
<kwd>gut microbial metagenomics</kwd>
</kwd-group>
<contract-sponsor id="cn001">National Natural Science Foundation of China<named-content content-type="fundref-id">10.13039/501100001809</named-content></contract-sponsor>
<contract-sponsor id="cn002">Fundamental Research Funds for the Central Universities<named-content content-type="fundref-id">10.13039/501100012226</named-content></contract-sponsor>
<counts>
<fig-count count="7"/>
<table-count count="4"/>
<equation-count count="19"/>
<ref-count count="38"/>
<page-count count="13"/>
<word-count count="7812"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Microbial Symbioses</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<title>1 Introduction</title>
<p>Microbes are primarily composed of bacteria, fungi, archaea, and viruses, predominantly inhabit the gut within the human body (Sommer and B&#x000E4;ckhed, <xref ref-type="bibr" rid="B27">2013</xref>; Blum, <xref ref-type="bibr" rid="B3">2017</xref>). The gut microbiota is closely associated with human health, playing a crucial role in regulating host physiological processes, such as immunity and metabolism (Lynch and Pedersen, <xref ref-type="bibr" rid="B18">2016</xref>; Tooley, <xref ref-type="bibr" rid="B29">2020</xref>). In recent years, biological experiments have demonstrated that dysbiosis or imbalance in the human microbiota could cause human diseases (Marchesi et al., <xref ref-type="bibr" rid="B21">2016</xref>), such as liver diseases (Henao-Mejia et al., <xref ref-type="bibr" rid="B6">2013</xref>), diabetes (Paun et al., <xref ref-type="bibr" rid="B22">2017</xref>), obesity (Tseng and Wu, <xref ref-type="bibr" rid="B30">2019</xref>), and even cancer (Schwabe and Jobin, <xref ref-type="bibr" rid="B25">2013</xref>). However, traditional biological experiments suffer from drawbacks such as long experimental cycles and expensive costs. Therefore, if we can utilize effective computational methods to predict potential sets of associations between microbes and diseases in advance, it would be possible to reduce unnecessary experimental trials and costs in traditional biological experiments, thereby accelerating the development of research in the field of microbe-disease associations.</p>
<p>Current computational methods for predicting microbe-disease associations can primarily be categorized into three categories, namely network-based methods, random walk-based methods, and deep learning-based methods. The network-based methods infer the potential association between microbes and diseases by utilizing the topological information within the network. For example, Chen et al. (<xref ref-type="bibr" rid="B5">2017</xref>) proposed a KATZHMDA model based on the KATZ measure, which scores potential disease related microbes by calculating all paths of different lengths between microbes and diseases. Bao et al. (<xref ref-type="bibr" rid="B1">2017</xref>) proposed the Network Consistency Projection for Human Microbe-Disease Association Prediction (NCPHMDA) model, evaluating the association scores between microbes and diseases by computing disease space projection scores and microbe space projection scores. Long and Luo (<xref ref-type="bibr" rid="B15">2019</xref>) designed a meta-graph-based method named WMGHMDA, which calculates the probability scores of microbe-disease pairs by utilizing a weighted meta-graph search algorithm on a heterogeneous network. Wang et al. (<xref ref-type="bibr" rid="B33">2023</xref>) proposed a SAELGMDA model by combining sparse autoencoder and Light Gradient boosting machine.</p>
<p>The success of random walk algorithms in graph data processing has prompted researchers to propose various microbe-disease association prediction algorithms based on this approach. For instance, Zou et al. (<xref ref-type="bibr" rid="B38">2017</xref>) developed a novel computational model of BiRWHMDA, which predicts potential microbe-disease associations by bi-random walks on a heterogeneous network. Luo and Long (<xref ref-type="bibr" rid="B17">2018</xref>) proposed a novel computational model of NTSHMDA, which integrates network topology similarity into the restarted random walk algorithm to distinguish the walking probabilities of disease-microbe node pairs. Yan et al. (<xref ref-type="bibr" rid="B36">2019</xref>) introduced a BRWMDA method, predicting potential microbe-disease associations by executing bi-random walks with different steps on microbe and disease networks.</p>
<p>With the significant achievements of deep learning algorithms in various research fields, researchers have gradually begun to explore the application of these algorithms in the task of predicting the associations between microbes and diseases. For example, Ma and Jiang (<xref ref-type="bibr" rid="B20">2020</xref>) developed an end-to-end graph convolutional neural network-based mining model NinimHMDA to predict different types of microbe-disease associations. Long et al. (<xref ref-type="bibr" rid="B16">2021</xref>) proposed a novel deep learning framework of GATMDA, which utilizes graph attention networks along with inductive matrix completion for predicting human microbe-disease associations. Hua et al. (<xref ref-type="bibr" rid="B7">2022</xref>) developed a multi-view graph augmentation convolutional network (MVGCNMDA) to predict potential disease-associated microbes. Jiang et al. (<xref ref-type="bibr" rid="B8">2022</xref>) proposed the KGNMDA method, using a knowledge graph neural network method for predicting microbe-disease associations. Peng et al. (<xref ref-type="bibr" rid="B23">2023</xref>) developed a computational method for predicting microbe-disease associations, named GPUDMDA, which integrates graph attention autoencoder, positive-unlabeled learning, and deep neural network.</p>
<p>In addition to the three mainstream methods mentioned, some computational approaches for microbe-disease prediction have been developed based on regularization and matrix factorization/completion techniques. For instance, Wang et al. (<xref ref-type="bibr" rid="B32">2017</xref>) proposed a semi-supervised computational model of Laplacian Regularized Least Squares for Human Microbe&#x02014;Disease Association (LRLSHMDA) to predict microbe-disease associations. Shen et al. (<xref ref-type="bibr" rid="B26">2017</xref>) developed a computational method of CMFHMDA, which utilizes collaborative matrix factorization to reconstruct correlation matrices between diseases and microbes. Liu et al. (<xref ref-type="bibr" rid="B14">2023</xref>) proposed a novel method called MNNMDA to predict microbe-disease associations by applying a Matrix Nuclear Norm method.</p>
<p>Among the methods mentioned above, network-based and random walk-based methods may encounter constraints in learning features of nodes representing microbes and diseases with few known associations, due to the limited information propagation caused by the sparsity of the microbe-disease association network. Meanwhile, matrix factorization/completion methods can only capture linear associations, thus failing to accurately capture the nonlinear interactions between microbes and diseases. Recent studies have suggested that graph neural network algorithms in deep learning could offer a more effective approach for learning node features in microbe-disease association networks. Therefore, this study further attempts to design node feature learning algorithms based on graph neural networks, aiming to obtain more effective node features from the microbe-disease association network, thereby predicting more accurate candidate sets of microbe-disease associations.</p>
<p>In this work, we propose a deep learning framework named GCATCMDA, which explores the application of graph neural networks for the microbe-disease association prediction task. First Gaussian kernel similarity is calculated based on known microbe-disease association data to construct microbe similarity networks and disease similarity networks. We then combine graph convolutional networks and graph attention mechanisms to learn feature representations of microbes and diseases in different networks, and propose a feature dual-fusion module to effectively integrate node features generated by each graph attention layer. Next, we utilize contrastive learning to enhance the feature consistency of the same microbe (or disease) across different association networks. Finally, the obtained microbe and disease features are inputted into an inner product decoder to compute their corresponding association scores. The model can obtain better node features through GCAT aggregation. In addition, contrastive learning increases the distance between nodes, allowing the model to better distinguish nodes and make subsequent predictions better. Experimental results demonstrate that the GCATCMDA model achieves better predictive performance compared to previous methods, and case studies of obesity and IBD (inflammatory bowel disease) confirm the high accuracy of the microbe-disease association candidate set produced by our method.</p>
</sec>
<sec sec-type="materials and methods" id="s2">
<title>2 Materials and methods</title>
<sec>
<title>2.1 Datasets</title>
<p>The dataset in this study was sourced from the HMDAD database (<ext-link ext-link-type="uri" xlink:href="http://www.cuilab.cn/hmdad">http://www.cuilab.cn/hmdad</ext-link>), which collects known associations between microbes and diseases by searching past research literature (Ma et al., <xref ref-type="bibr" rid="B19">2017</xref>). HMDAD adapted a systematic approach by only including associations that have been experimentally validated and published in reputable journals. This ensures a high level of reliability in the dataset. Past researchers commonly employ metagenomic sequencing techniques to analyze fluctuations in microbial community abundance within specific diseases, contrasting them with the microbial compositions of healthy individuals, thus exploring the associations between microbes and diseases. In the HMDAD dataset, a microbe-disease association pair may contain multiple entries from different research literature sources. Therefore, here, we regard the same microbe-disease association from different evidences as a pair, further removing the redundant information present in the HMDAD dataset. Finally, for this study, we employed a dataset consisting of 450 microbe-disease associations, encompassing 39 human diseases and 292 microbes.</p>
</sec>
<sec>
<title>2.2 Problem definition</title>
<p>For the convenience of clarity in describing the subsequent research methods, we provide a simple problem definition for the task of predicting associations between microbes and diseases here. We denote <italic>M</italic> &#x0003D; {<italic>m</italic><sub>1</sub>, <italic>m</italic><sub>2</sub>, &#x02026;, <italic>m</italic><sub><italic>n</italic><sub><italic>m</italic></sub></sub>} and <italic>D</italic> &#x0003D; {<italic>d</italic><sub>1</sub>, <italic>d</italic><sub>2</sub>, &#x02026;, <italic>d</italic><sub><italic>n</italic><sub><italic>d</italic></sub></sub>} as the sets representing <italic>n</italic><sub><italic>m</italic></sub> microbes and <italic>n</italic><sub><italic>d</italic></sub> diseases, respectively. The matrix <inline-formula><mml:math id="M1"><mml:mrow><mml:mi>A</mml:mi><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:mo>&#x000D7;</mml:mo><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula> represents the known associations between microbes and diseases, where <italic>A</italic><sub><italic>ij</italic></sub> &#x0003D; 1 if microbe <italic>m</italic><sub><italic>i</italic></sub> is associated with disease <italic>d</italic><sub><italic>j</italic></sub>, otherwise <italic>A</italic><sub><italic>ij</italic></sub> &#x0003D; 0. However, <italic>A</italic><sub><italic>ij</italic></sub> &#x0003D; 0 does not mean that microbe <italic>m</italic><sub><italic>i</italic></sub> has no relation with disease <italic>d</italic><sub><italic>j</italic></sub>. It may be the reason that their association has not yet been discovered. Therefore, the task of predicting associations between microbes and diseases aims to find microbe <italic>m</italic><sub><italic>i</italic></sub> for each disease <italic>d</italic><sub><italic>j</italic></sub> where <italic>A</italic><sub><italic>ij</italic></sub> &#x0003D; 0 in the known association matrix, but microbe <italic>m</italic><sub><italic>i</italic></sub> is actually related to disease <italic>d</italic><sub><italic>j</italic></sub>.</p>
</sec>
<sec>
<title>2.3 GCATCMDA</title>
<p><xref ref-type="fig" rid="F1">Figure 1</xref> illustrates the workflow of GCATCMDA, a model based on graph neural networks and contrastive learning for predicting effective candidate sets of microbe-disease associations. First microbe-microbe and disease-disease Gaussian kernel similarity networks are constructed using known associations. The model then integrates graph neural networks and contrastive learning principles to extract meaningful feature representations of microbes and diseases from the association networks. Last the obtained microbe and disease features are fed into an inner product decoder to compute their corresponding association scores. A detailed description of the key components of this model is elucidated below.</p>
<fig id="F1" position="float">
<label>Figure 1</label>
<caption><p>The workflow of GCATCMDA for microbe&#x02013;disease prediction.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmicb-15-1483983-g0001.tif"/>
</fig>
<sec>
<title>2.3.1 Microbe and disease similarity network construction</title>
<p>Previous study (Chen et al., <xref ref-type="bibr" rid="B5">2017</xref>) have hypothesized that functionally similar microbes (or diseases) tend to exhibit similar interaction or non-interaction patterns with similar diseases (or microbes). They utilize Gaussian kernel functions to measure the similarity between two microbes (or diseases) in the same space. Therefore, in this study, we consider constructing microbe and disease similarity networks based on Gaussian kernel similarity scores for microbes and diseases.</p>
<p>We have recorded the known associations between microbes and diseases using the association matrix <inline-formula><mml:math id="M2"><mml:mrow><mml:mi>A</mml:mi><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:mo>&#x000D7;</mml:mo><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>. The calculation formulas for the Gaussian kernel similarity score between microbe <italic>m</italic><sub><italic>i</italic></sub> and <italic>m</italic><sub><italic>j</italic></sub>, and between disease <italic>d</italic><sub><italic>i</italic></sub> and <italic>d</italic><sub><italic>j</italic></sub>, are as follows:</p>
<disp-formula id="E1"><label>(1)</label><mml:math id="M3"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mtext class="textrm" mathvariant="normal">KM</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>m</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>m</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mo class="qopname">exp</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mo>-</mml:mo><mml:msub><mml:mrow><mml:mi>&#x003BB;</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:mo>|</mml:mo><mml:mo>|</mml:mo><mml:mtext class="textrm" mathvariant="normal">IP</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>m</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>-</mml:mo><mml:mtext class="textrm" mathvariant="normal">IP</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>m</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>|</mml:mo><mml:msup><mml:mrow><mml:mo>|</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<disp-formula id="E2"><label>(2)</label><mml:math id="M4"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mtext class="textrm" mathvariant="normal">KD</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mo class="qopname">exp</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mo>-</mml:mo><mml:msub><mml:mrow><mml:mi>&#x003BB;</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>|</mml:mo><mml:mo>|</mml:mo><mml:mtext class="textrm" mathvariant="normal">IP</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>-</mml:mo><mml:mtext class="textrm" mathvariant="normal">IP</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>|</mml:mo><mml:msup><mml:mrow><mml:mo>|</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <italic>KM</italic>(<italic>m</italic><sub><italic>i</italic></sub>, <italic>m</italic><sub><italic>j</italic></sub>) represents the Gaussian kernel similarity score between microbes <italic>m</italic><sub><italic>i</italic></sub> and <italic>m</italic><sub><italic>j</italic></sub>, and <italic>KD</italic>(<italic>d</italic><sub><italic>i</italic></sub>, <italic>d</italic><sub><italic>j</italic></sub>) represents the Gaussian kernel similarity score between diseases <italic>d</italic><sub><italic>i</italic></sub> and <italic>d</italic><sub><italic>j</italic></sub>. The term IP(<italic>m</italic><sub><italic>i</italic></sub>) represents the <italic>i</italic>-th row of the association matrix <italic>A</italic> recording the associations between microbe <italic>m</italic><sub><italic>i</italic></sub> and other diseases, IP(<italic>d</italic><sub><italic>i</italic></sub>) represents the <italic>i</italic>-th column of the association matrix <italic>A</italic> recording the associations between disease <italic>d</italic><sub><italic>i</italic></sub> and other microbes. The parameters &#x003BB;<sub><italic>m</italic></sub> and &#x003BB;<sub><italic>d</italic></sub> represent the normalized kernel bandwidths and are defined as follows:</p>
<disp-formula id="E3"><label>(3)</label><mml:math id="M5"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>&#x003BB;</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:msubsup><mml:mrow><mml:mi>&#x003BB;</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup></mml:mrow><mml:mrow><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:mstyle displaystyle="true"><mml:msubsup><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msubsup></mml:mstyle><mml:mo>|</mml:mo><mml:mo>|</mml:mo><mml:mtext class="textrm" mathvariant="normal">IP</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>m</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>|</mml:mo><mml:mo>|</mml:mo></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<disp-formula id="E4"><label>(4)</label><mml:math id="M6"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>&#x003BB;</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:msubsup><mml:mrow><mml:mi>&#x003BB;</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup></mml:mrow><mml:mrow><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:mstyle displaystyle="true"><mml:msubsup><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msubsup></mml:mstyle><mml:mo>|</mml:mo><mml:mo>|</mml:mo><mml:mtext class="textrm" mathvariant="normal">IP</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>|</mml:mo><mml:mo>|</mml:mo></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <italic>n</italic><sub><italic>m</italic></sub> and <italic>n</italic><sub><italic>d</italic></sub> represented the number of microbes and diseases. And <inline-formula><mml:math id="M7"><mml:mrow><mml:msubsup><mml:mrow><mml:mi>&#x003BB;</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="M8"><mml:mrow><mml:msubsup><mml:mrow><mml:mi>&#x003BB;</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup></mml:mrow></mml:math></inline-formula> are the original bandwidths, and generally both set to 1.</p>
<p>We consider microbes (or diseases) to be strongly associated with each other when the Gaussian kernel similarity score between microbes (or diseases) exceeds a threshold of <italic>t</italic>. Therefore, the association matrices <italic>MA</italic> for microbes and <italic>DA</italic> for diseases can be expressed as follows:</p>
<disp-formula id="E5"><label>(5)</label><mml:math id="M9"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mtext class="textrm" mathvariant="normal">MA</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>m</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>m</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mtable style="text-align:axis;" equalrows="false" columnlines="none" equalcolumns="false" class="array"><mml:mtr><mml:mtd><mml:mn>1</mml:mn><mml:mo>,</mml:mo></mml:mtd><mml:mtd><mml:mtext class="textrm" mathvariant="normal">&#x000A0;if&#x000A0;KM&#x000A0;</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>m</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>m</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x02265;</mml:mo><mml:mi>t</mml:mi></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>0</mml:mn><mml:mo>,</mml:mo></mml:mtd><mml:mtd><mml:mtext class="textrm" mathvariant="normal">otherwise</mml:mtext></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<disp-formula id="E6"><label>(6)</label><mml:math id="M10"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mtext class="textrm" mathvariant="normal">DA</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>m</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>m</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mtable style="text-align:axis;" equalrows="false" columnlines="none" equalcolumns="false" class="array"><mml:mtr><mml:mtd><mml:mn>1</mml:mn><mml:mo>,</mml:mo></mml:mtd><mml:mtd><mml:mtext class="textrm" mathvariant="normal">&#x000A0;if&#x000A0;&#x000A0;KD&#x000A0;</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x02265;</mml:mo><mml:mi>t</mml:mi></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>0</mml:mn><mml:mo>,</mml:mo></mml:mtd><mml:mtd><mml:mtext class="textrm" mathvariant="normal">otherwise</mml:mtext></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
</sec>
<sec>
<title>2.3.2 GCAT</title>
<p>Inspired by the work of Sun et al. (<xref ref-type="bibr" rid="B28">2022</xref>) on predicting metabolite-disease associations, this study adopted the GCAT feature encoder. The encoder initially combines graph convolution algorithms and graph attention mechanisms to learn the nodal features of the network, followed by the design of a feature dual-fusion module to effectively integrate the node features outputted by each graph attention layer. Since the GCAT feature encoder learns embedding representations on different association networks in a similar process, we take microbe-disease association network as an example to introduce the process of learning node features, as illustrated in <xref ref-type="fig" rid="F2">Figure 2</xref>.</p>
<fig id="F2" position="float">
<label>Figure 2</label>
<caption><p>The flowchart of GCAT node feature encoding in microbe-disease association network.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmicb-15-1483983-g0002.tif"/>
</fig>
<p>We represent the microbe-disease association network using a symmetric adjacency matrix <inline-formula><mml:math id="M11"><mml:mrow><mml:mi>G</mml:mi><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x000D7;</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>, where <italic>n</italic><sub><italic>m</italic></sub> and <italic>n</italic><sub><italic>d</italic></sub> denote the numbers of microbes and diseases, respectively. The initial features of nodes in the network are represented by the matrix <italic>H</italic><sup>(0)</sup>.</p>
<disp-formula id="E7"><label>(7)</label><mml:math id="M12"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>G</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mtable style="text-align:axis;" equalrows="false" columnlines="none none none none none none none none none" equalcolumns="false" class="array"><mml:mtr><mml:mtd><mml:mn>0</mml:mn></mml:mtd><mml:mtd><mml:mi>A</mml:mi></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:msup><mml:mrow><mml:mi>A</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:msup></mml:mtd><mml:mtd><mml:mn>0</mml:mn></mml:mtd></mml:mtr></mml:mtable></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<disp-formula id="E8"><label>(8)</label><mml:math id="M13"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msup><mml:mrow><mml:mi>H</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>0</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mtable style="text-align:axis;" equalrows="false" columnlines="none none none none none none none none none" equalcolumns="false" class="array"><mml:mtr><mml:mtd><mml:mi>K</mml:mi><mml:mi>M</mml:mi></mml:mtd><mml:mtd><mml:mn>0</mml:mn></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>0</mml:mn></mml:mtd><mml:mtd><mml:mi>K</mml:mi><mml:mi>D</mml:mi></mml:mtd></mml:mtr></mml:mtable></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>Considering the ability of graph convolutional networks in capturing the global graph structural information, and the ability of graph attention mechanisms to assign attention weights to different nodes based on the local graph structure, the GCAT feature encoder integrates these two algorithms to effectively learn the node features of the microbe-disease association network. Firstly, the GCAT feature encoder linearly projects the node features of <italic>H</italic><sup>(0)</sup> onto a feature space of dimensional size <italic>F</italic>, denoted as <italic>H</italic><sup>(0)</sup> &#x0003D; <italic>H</italic><sup>(0)</sup><italic>W</italic>, where <inline-formula><mml:math id="M14"><mml:mrow><mml:mi>W</mml:mi><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x000D7;</mml:mo><mml:mi>F</mml:mi></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula> is the weight matrix. Next, this module employs the graph convolutional networks (GCN) proposed by Kipf and Welling (<xref ref-type="bibr" rid="B11">2016</xref>) to learn node features in the network. GCN learns low-dimensional representations of nodes by aggregating neighbor node information through graph convolution operations while preserving the graph&#x00027;s structural information. The first-layer graph convolutional propagation formula for graph <italic>G</italic> can be expressed as:</p>
<disp-formula id="E9"><label>(9)</label><mml:math id="M15"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msup><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>H</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mi>&#x003C3;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mo>&#x0007E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mo>-</mml:mo><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:mfrac></mml:mrow></mml:msup><mml:mover accent="true"><mml:mrow><mml:mi>G</mml:mi></mml:mrow><mml:mo>&#x0007E;</mml:mo></mml:mover><mml:msup><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mo>&#x0007E;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mo>-</mml:mo><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:mfrac></mml:mrow></mml:msup><mml:msup><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>H</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>0</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:msup><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>W</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>0</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>Here, &#x003C3;(.) denotes the activation function, <inline-formula><mml:math id="M16"><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>G</mml:mi></mml:mrow><mml:mo>&#x0007E;</mml:mo></mml:mover><mml:mo>=</mml:mo><mml:mi>G</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>I</mml:mi></mml:mrow></mml:math></inline-formula> represents the adjacency matrix with self-loops added, <inline-formula><mml:math id="M17"><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>D</mml:mi></mml:mrow><mml:mo>&#x0007E;</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula> is the degree matrix of <inline-formula><mml:math id="M18"><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>G</mml:mi></mml:mrow><mml:mo>&#x0007E;</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula>, <bold>W</bold><sup>(0)</sup> denotes the trainable weight matrix of the first-layer graph convolution, and <bold>H</bold><sup>(1)</sup> represents the feature matrix outputted by the first-layer graph convolution.</p>
<p>Subsequently, the GCAT feature encoder enhances the learned node feature representations from the graph convolutional layers by incorporating a graph attention mechanism to aggregate weighted sums of neighbor information. In this study, we adopt the graph attention network (GAT) proposed by Veli&#x0010D;kovi&#x00107; et al. (<xref ref-type="bibr" rid="B31">2017</xref>), which introduces an attention mechanism to assign different attention weights to the features of different neighbor nodes, enabling to focus on important neighbor features during aggregation for the target node. Thus, following the computation of the first-layer graph convolution, the attention scores <inline-formula><mml:math id="M19"><mml:mrow><mml:msubsup><mml:mrow><mml:mi>&#x003B1;</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow></mml:math></inline-formula> for node <italic>j</italic> with respect to its neighbor node <italic>i</italic> in graph <italic>G</italic> can be calculated as:</p>
<disp-formula id="E10"><label>(10)</label><mml:math id="M20"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msubsup><mml:mrow><mml:mi>&#x003B1;</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mo class="qopname">exp</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>f</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">att</mml:mtext></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:msubsup><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>|</mml:mo><mml:mo>|</mml:mo><mml:msubsup><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">att</mml:mtext></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:msubsup><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mstyle displaystyle="true"><mml:msub><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>k</mml:mi><mml:mo>&#x02208;</mml:mo><mml:msub><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub></mml:mstyle><mml:mo class="qopname">exp</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>f</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">att</mml:mtext></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:msubsup><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>|</mml:mo><mml:mo>|</mml:mo><mml:msubsup><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">att</mml:mtext></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:msubsup><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where || denotes the concatenation operation, <inline-formula><mml:math id="M21"><mml:mrow><mml:msubsup><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mo>*</mml:mo></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow></mml:math></inline-formula> represents the node features obtained by the graph <italic>G</italic> through the first-layer graph convolution, <inline-formula><mml:math id="M22"><mml:mrow><mml:msubsup><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">att</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow></mml:math></inline-formula> represents the weight matrix for the linear transformation of node features, <italic>N</italic><sub><italic>i</italic></sub> denotes the first-order neighboring nodes of node <italic>i</italic>. The attention mechanism <italic>f</italic>(&#x000B7;) is a single-layer feedforward neural network, parametrized by a weight vector <inline-formula><mml:math id="M23"><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>a</mml:mi></mml:mrow><mml:mo>&#x02192;</mml:mo></mml:mover><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn><mml:mi>F</mml:mi></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>, and applying the LeakyReLU nonlinearity. We further employs a multi-head attention mechanism to stabilize the process of learning node representations in attention networks. It aggregates the features obtained from all attention heads by taking their average. Thus, the updated feature <inline-formula><mml:math id="M24"><mml:mrow><mml:msubsup><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow></mml:math></inline-formula> for node <italic>i</italic> via graph attention mechanism can be expressed as follows:</p>
<disp-formula id="E11"><label>(11)</label><mml:math id="M25"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msubsup><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mi>&#x003C3;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>K</mml:mi></mml:mrow></mml:mfrac><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>K</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:mstyle displaystyle="true"><mml:munder class="msub"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>&#x02208;</mml:mo><mml:msub><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:munder></mml:mstyle><mml:msubsup><mml:mrow><mml:mi>&#x003B1;</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msubsup><mml:mo>&#x000B7;</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">att</mml:mtext></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:msubsup><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>Here, &#x003C3; denotes the activation function, <italic>K</italic> represents the number of attention heads, <italic>N</italic><sub><italic>i</italic></sub> signifies the neighborhood of node <italic>i</italic>, <inline-formula><mml:math id="M26"><mml:mrow><mml:msubsup><mml:mrow><mml:mi>&#x003B1;</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msubsup></mml:mrow></mml:math></inline-formula> represents the attention coefficient for node <italic>j</italic> with respect to node <italic>i</italic> in the <italic>k</italic>-th attention head, <inline-formula><mml:math id="M27"><mml:mrow><mml:msubsup><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">att</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msubsup></mml:mrow></mml:math></inline-formula> is the weight matrix for attention in the <italic>k</italic>-th head, and <inline-formula><mml:math id="M28"><mml:mrow><mml:msubsup><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow></mml:math></inline-formula> denotes the feature vector of node <italic>j</italic> after the first graph convolutional layer.</p>
<p>Finally, inspired by the work of Wang et al. (<xref ref-type="bibr" rid="B34">2019</xref>) on node feature fusion, this study further designs a feature dual-fusion module, which considers both concatenation and element-wise product operations to integrate the node features outputted by each graph attention layer. We posit that the concatenation operation helps preserve more node feature information, while the element-wise product operation emphasizes the correlation between node features. We demonstrated the effectiveness of this fusion module in ablation experiments. The node features outputted by each graph attention layer in the GCAT feature encoder can be represented as {<italic>Z</italic><sup>(1)</sup>, <italic>Z</italic><sup>(2)</sup>, &#x022EF;&#x02009;, <italic>Z</italic><sup>(<italic>N</italic>)</sup>}. Then, the feature dual-fusion module can be represented by the following equation:</p>
<disp-formula id="E12"><label>(12)</label><mml:math id="M29"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mtable style="text-align:axis;" equalrows="false" equalcolumns="false" class="array"><mml:mtr><mml:mtd><mml:mi>Z</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>|</mml:mo><mml:mo>|</mml:mo><mml:msup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>2</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>|</mml:mo><mml:mo>|</mml:mo><mml:mo>&#x022EF;</mml:mo><mml:mo>|</mml:mo><mml:mo>|</mml:mo><mml:msup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:msub><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mi>a</mml:mi></mml:mrow></mml:msub></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mo>&#x0002B;</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>&#x02299;</mml:mo><mml:msup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>2</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>&#x02299;</mml:mo><mml:mo>&#x022EF;</mml:mo><mml:mo>&#x02299;</mml:mo><mml:msup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:msub><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mi>b</mml:mi></mml:mrow></mml:msub></mml:mtd></mml:mtr></mml:mtable></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>Here, || represents concatenation, and &#x02299; represents element-wise (Hadamard) product, <inline-formula><mml:math id="M30"><mml:mrow><mml:msub><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mi>a</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>N</mml:mi><mml:mo>&#x000D7;</mml:mo><mml:mi>F</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x000D7;</mml:mo><mml:mi>F</mml:mi></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="M31"><mml:mrow><mml:msub><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mi>b</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:mi>F</mml:mi><mml:mo>&#x000D7;</mml:mo><mml:mi>F</mml:mi></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula> denote the trainable weight matrices, Z represents the final node feature.</p>
<p>In summary, this study represents the final microbe and disease features obtained from the microbe-disease association network as <inline-formula><mml:math id="M32"><mml:mrow><mml:msubsup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mi>A</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msubsup><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:mo>&#x000D7;</mml:mo><mml:mi>F</mml:mi></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="M33"><mml:mrow><mml:msubsup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mi>A</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msubsup><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>&#x000D7;</mml:mo><mml:mi>F</mml:mi></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>, respectively. Similarly, the microbe features obtained from the microbe similarity network are represented as <inline-formula><mml:math id="M34"><mml:mrow><mml:msubsup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mi>S</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msubsup><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:mo>&#x000D7;</mml:mo><mml:mi>F</mml:mi></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>, and the disease features obtained from the disease similarity network are represented as <inline-formula><mml:math id="M35"><mml:mrow><mml:msubsup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mi>S</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msubsup><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>&#x000D7;</mml:mo><mml:mi>F</mml:mi></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>.</p>
</sec>
<sec>
<title>2.3.3 Contrastive learning</title>
<p>Inspired by the work of Jin et al. (<xref ref-type="bibr" rid="B9">2024</xref>) on miRNA-disease association prediction, this study introduces contrastive learning to enhance the consistency of features of the same nodes across different association networks and the distinctiveness of features between different pairs of nodes. This approach leverages the complementary information among various association networks to obtain more effective representations of microbe and disease features. This module employs the contrastive loss function proposed by Zhu et al. (<xref ref-type="bibr" rid="B37">2020</xref>) for graph nodes. It considers the node features of the same disease <italic>d</italic><sub><italic>i</italic></sub> obtained from different association networks <inline-formula><mml:math id="M36"><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mi>A</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mi>S</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> as positive samples, while all other pairs of different nodes form negative sample pairs. Therefore, the contrastive learning loss function <italic>Loss</italic><sub><italic>d</italic></sub> for disease node features across different association networks can be expressed as:</p>
<disp-formula id="E13"><label>(13)</label><mml:math id="M37"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mtable style="text-align:axis;" equalrows="false" columnlines="none" equalcolumns="false" class="array"><mml:mtr><mml:mtd><mml:mi>l</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mi>A</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mi>S</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mo class="qopname">log</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:msup><mml:mrow><mml:mi>e</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x003B8;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mi>A</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mi>S</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>/</mml:mo><mml:mi>&#x003C4;</mml:mi></mml:mrow></mml:msup></mml:mrow><mml:mrow><mml:msup><mml:mrow><mml:mi>e</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x003B8;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mi>A</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mi>S</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>/</mml:mo><mml:mi>&#x003C4;</mml:mi></mml:mrow></mml:msup><mml:mo>&#x0002B;</mml:mo><mml:mstyle displaystyle="true"><mml:msub><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>k</mml:mi><mml:mo>&#x02260;</mml:mo><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mstyle><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mi>e</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x003B8;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mi>A</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mi>A</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>/</mml:mo><mml:mi>&#x003C4;</mml:mi></mml:mrow></mml:msup><mml:mo>&#x0002B;</mml:mo><mml:msup><mml:mrow><mml:mi>e</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x003B8;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mi>A</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mi>S</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>/</mml:mo><mml:mi>&#x003C4;</mml:mi></mml:mrow></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:mfrac></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<disp-formula id="E14"><label>(14)</label><mml:math id="M38"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mtext class="textrm" mathvariant="normal">Loss</mml:mtext></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mo>-</mml:mo><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mn>2</mml:mn><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:munderover></mml:mstyle><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>l</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mi>A</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mi>S</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x0002B;</mml:mo><mml:mi>l</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mi>S</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mi>A</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where &#x003B8;(&#x000B7;) is the cosine similarity, &#x003C4; is a temperature parameter, <italic>n</italic><sub><italic>d</italic></sub> denotes the number of disease. Similarly, the contrastive learning loss function Loss<sub><italic>m</italic></sub> for microbe node features across different association networks can be formulated as follows:</p>
<disp-formula id="E15"><label>(15)</label><mml:math id="M39"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mtext class="textrm" mathvariant="normal">Loss</mml:mtext></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mn>2</mml:mn><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:munderover></mml:mstyle><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>l</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mi>A</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>m</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mi>S</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>m</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x0002B;</mml:mo><mml:mi>l</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mi>S</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>m</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mi>A</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>m</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <italic>n</italic><sub><italic>m</italic></sub> denotes the number of microbe. Therefore, the overall loss function of the GCATCMDA model in the contrastive learning module is formulated as follows:</p>
<disp-formula id="E16"><label>(16)</label><mml:math id="M40"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>L</mml:mi><mml:mi>o</mml:mi><mml:mi>s</mml:mi><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">contrast</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>L</mml:mi><mml:mi>o</mml:mi><mml:mi>s</mml:mi><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>L</mml:mi><mml:mi>o</mml:mi><mml:mi>s</mml:mi><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
</sec>
<sec>
<title>2.3.4 Microbe&#x02014;disease associations prediction</title>
<p>This study aggregates the node features of microbes and diseases obtained from different association networks through vector concatenation, resulting in the final microbial feature representation <inline-formula><mml:math id="M41"><mml:mrow><mml:msub><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mi>A</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msubsup><mml:mo>|</mml:mo><mml:mo>|</mml:mo><mml:msubsup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mi>S</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msubsup></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:mo>&#x000D7;</mml:mo><mml:mn>2</mml:mn><mml:mi>F</mml:mi></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula> and disease feature representation <inline-formula><mml:math id="M42"><mml:mrow><mml:msub><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mi>A</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msubsup><mml:mo>|</mml:mo><mml:mo>|</mml:mo><mml:msubsup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mi>S</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msubsup></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>&#x000D7;</mml:mo><mml:mn>2</mml:mn><mml:mi>F</mml:mi></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>. Subsequently, these aggregated feature representations are passed into an inner product decoder to compute the association scores between microbes and diseases. The calculation process is as follows:</p>
<disp-formula id="E17"><label>(17)</label><mml:math id="M43"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msup><mml:mrow><mml:mi>A</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mtext class="textrm" mathvariant="normal">sigmoid</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:msubsup><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>Where sigmoid is the activation function defined as 1/(1&#x0002B;<italic>e</italic><sup>&#x02212;<italic>x</italic></sup>), which maps output values to the interval (0, 1), <inline-formula><mml:math id="M44"><mml:mrow><mml:msubsup><mml:mrow><mml:mi>A</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup></mml:mrow></mml:math></inline-formula> represents the association prediction score between microbe <italic>m</italic><sub><italic>i</italic></sub> and disease <italic>d</italic><sub><italic>j</italic></sub>.</p>
<p>Finally, the training of the GCATCMDA model employs Binary Cross-Entropy as the loss function for microbe-disease association prediction. The formula for this function is as follows:</p>
<disp-formula id="E18"><label>(18)</label><mml:math id="M45"><mml:mtable columnalign='left'><mml:mtr><mml:mtd><mml:msub><mml:mtext>Loss</mml:mtext><mml:mrow><mml:mtext>classify</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mo>&#x02212;</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mi>N</mml:mi></mml:mfrac><mml:mstyle displaystyle='true'><mml:munder><mml:mo>&#x02211;</mml:mo><mml:mrow><mml:mo stretchy='false'>(</mml:mo><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi><mml:mo stretchy='false'>)</mml:mo><mml:mo>&#x02208;</mml:mo><mml:mo>&#x0007B;</mml:mo><mml:msup><mml:mi>N</mml:mi><mml:mo>+</mml:mo></mml:msup><mml:mo>&#x0222A;</mml:mo><mml:msup><mml:mi>N</mml:mi><mml:mo>&#x02212;</mml:mo></mml:msup><mml:mo>&#x0007D;</mml:mo></mml:mrow></mml:munder><mml:mrow><mml:mo stretchy='false'>[</mml:mo><mml:msub><mml:mi>A</mml:mi><mml:mrow><mml:mo stretchy='false'>(</mml:mo><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi><mml:mo stretchy='false'>)</mml:mo></mml:mrow></mml:msub><mml:mi>log</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:msubsup><mml:mi>A</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow><mml:mo>&#x02032;</mml:mo></mml:msubsup><mml:mo stretchy='false'>)</mml:mo></mml:mrow></mml:mstyle></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mo>+</mml:mo><mml:mo stretchy='false'>(</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x02212;</mml:mo><mml:msub><mml:mi>A</mml:mi><mml:mrow><mml:mo stretchy='false'>(</mml:mo><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi><mml:mo stretchy='false'>)</mml:mo></mml:mrow></mml:msub><mml:mo stretchy='false'>)</mml:mo><mml:mi>log</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x02212;</mml:mo><mml:msubsup><mml:mi>A</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow><mml:mo>&#x02032;</mml:mo></mml:msubsup><mml:mo stretchy='false'>)</mml:mo><mml:mo stretchy='false'>]</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>Where <italic>N</italic> denotes the total number of associations between microbes and diseases, <italic>N</italic><sup>&#x0002B;</sup> represents the confirmed associations between microbes and diseases, and <italic>N</italic><sup>&#x02212;</sup> represents the associations yet to be confirmed. The tuple (<italic>i, j</italic>) represents the association between microbe <italic>m</italic><sub><italic>i</italic></sub> and disease <italic>d</italic><sub><italic>j</italic></sub>. If (<italic>m</italic><sub><italic>i</italic></sub>, <italic>d</italic><sub><italic>j</italic></sub>) belongs to <italic>N</italic><sup>&#x0002B;</sup>, then <italic>A</italic><sub>(<italic>i, j</italic>)</sub> &#x0003D; 1; otherwise, <italic>A</italic><sub>(<italic>i, j</italic>)</sub> &#x0003D; 0. <inline-formula><mml:math id="M46"><mml:mrow><mml:msubsup><mml:mrow><mml:mi>A</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup></mml:mrow></mml:math></inline-formula> denotes the predicted association score by the model for this association pair. Therefore, the overall loss function of the GCATCMDA model can be expressed as:</p>
<disp-formula id="E19"><label>(19)</label><mml:math id="M47"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>L</mml:mi><mml:mi>o</mml:mi><mml:mi>s</mml:mi><mml:msub><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">total</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>L</mml:mi><mml:mi>o</mml:mi><mml:mi>s</mml:mi><mml:msub><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">classify</mml:mtext></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:mi>&#x003BB;</mml:mi><mml:mi>L</mml:mi><mml:mi>o</mml:mi><mml:mi>s</mml:mi><mml:msub><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">contrast</mml:mtext></mml:mrow></mml:msub></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>Where &#x003BB; represents the weighting parameter for the contrastive learning loss. The detailed steps of GCAT to predict novel microbe&#x02014;disease associations is described in <xref ref-type="table" rid="T5">Algorithm 1</xref>.</p>
<table-wrap position="float" id="T5">
<label>Algorithm 1</label>
<caption><p>GCAT framework for microbe-disease association.</p></caption>
<table frame="hsides" rules="groups">
<tbody>
<tr>
<td align="left" valign="top"><monospace> 1: &#x000A0;Input: Microbe-disease associations <bold>x</bold>, real associations <bold>y</bold></monospace></td>
</tr>
<tr>
<td align="left" valign="top"><monospace> 2: &#x000A0;Output: Loss value</monospace></td>
</tr>
<tr>
<td align="left" valign="top"><monospace> 3: &#x000A0;<italic>x</italic>_<italic>micro</italic>&#x02190;Gaussion_kernel1(<bold>x</bold>)</monospace></td>
</tr>
<tr>
<td align="left" valign="top"><monospace> 4: &#x000A0;<italic>x</italic>_<italic>disease</italic>&#x02190;Gaussion_kernel2(<bold>x</bold>)</monospace></td>
</tr>
<tr>
<td align="left" valign="top"><monospace> 5: &#x000A0;<italic>micro</italic>_<italic>f</italic>_<italic>association, disease</italic>_<italic>f</italic>_<italic>association</italic>&#x02190;GCAT_association(<bold>x</bold>)</monospace></td>
</tr>
<tr>
<td align="left" valign="top"><monospace> 6: &#x000A0;<italic>micro</italic>_<italic>f</italic>_<italic>similarity</italic>&#x02190;GCAT_micro_similarity(<italic>x</italic>_<italic>micro</italic>)</monospace></td>
</tr>
<tr>
<td align="left" valign="top"><monospace> 7: &#x000A0;<italic>disease</italic>_<italic>f</italic>_<italic>similarity</italic>&#x02190;GCAT_disease_similarity(<italic>x</italic>_<italic>disease</italic>)</monospace></td>
</tr>
<tr>
<td align="left" valign="top"><monospace> 8: &#x000A0;<italic>micro</italic>_<italic>feature</italic>&#x02190;cat([<italic>micro</italic>_<italic>f</italic>_<italic>association, micro</italic>_<italic>f</italic>_<italic>similarity</italic>], dim &#x0003D; &#x02212;1)</monospace></td>
</tr>
<tr>
<td align="left" valign="top"><monospace> 9: &#x000A0;<italic>disease</italic>_<italic>feature</italic>&#x02190;cat([<italic>disease</italic>_<italic>f</italic>_<italic>association, disease</italic>_<italic>f</italic>_<italic>similarity</italic>], dim &#x0003D; &#x02212;1)</monospace></td>
</tr>
<tr>
<td align="left" valign="top"><monospace> 10: &#x000A0;<italic>pred</italic>&#x02190;Sigmoid(<italic>micro</italic>_<italic>feature</italic>&#x0002A;<italic>disease</italic>_<italic>feature</italic>)</monospace></td>
</tr>
<tr>
<td align="left" valign="top"><monospace> 11: &#x000A0;<italic>loss</italic>&#x02190;Binary_Cross_Entropy(<italic>pred</italic>, <bold>y</bold>)</monospace></td>
</tr>
<tr>
<td align="left" valign="top"><monospace> 12: &#x000A0;<italic>micro</italic>_<italic>contrastive</italic>_<italic>loss</italic>&#x02190;Contrastive_Loss(<italic>micro</italic>_<italic>f</italic>_<italic>similarity, micro</italic>_<italic>f</italic>_<italic>similarity</italic>)</monospace></td>
</tr>
<tr>
<td align="left" valign="top"><monospace> 13: &#x000A0;<italic>disease</italic>_<italic>contrastive</italic>_<italic>loss</italic>&#x02190;Contrastive_Loss(<italic>disease</italic>_<italic>f</italic>_<italic>similarity, disease</italic>_<italic>f</italic>_<italic>association</italic>)</monospace></td>
</tr>
<tr>
<td align="left" valign="top"><monospace> 14: &#x000A0;<italic>loss</italic>&#x02190;<italic>loss</italic> &#x0002B; <italic>micro</italic>_<italic>contrastive</italic>_<italic>loss</italic>&#x0002B;<italic>disease</italic>_<italic>contrastive</italic>_<italic>loss</italic></monospace></td>
</tr>
<tr>
<td align="left" valign="top"><monospace> 15: &#x000A0;return <italic>loss</italic></monospace></td></tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
</sec>
<sec id="s3">
<title>3 Results and discussion</title>
<p>In this section, we will provide an exposition of the experimental setup and subsequently delve into an analysis and discussion of the experimental results.</p>
<sec>
<title>3.1 Experimental setup</title>
<p>The GCATCMDA model proposed in this study is a microbe-disease association prediction model based on graph neural networks and contrastive learning. It aims to predict potential associations between microbes and diseases from known microbial-disease association dataset. The hyperparameter settings required for this model are described as follows. Firstly, the Gaussian kernel similarity threshold <italic>t</italic> needs to be set for constructing microbe and disease similarity networks. Secondly, parameters need to be set for the GCAT feature encoder module, including the dimensionality <italic>F</italic> of node features, the number of network layers <italic>L</italic> for graph convolution, and the number of attention heads <italic>heads</italic> for the graph attention mechanism. Then, in the contrastive learning loss module, the temperature hyperparameter &#x003C4; and the weight parameter &#x003BB; relative to the total loss are adjusted. Finally, the GCATCMDA model is trained using the Adam (Kingma and Ba, <xref ref-type="bibr" rid="B10">2014</xref>) optimizer, with parameters including the learning rate <italic>lr</italic>, weight decay <italic>wd</italic>, and the number of training iterations <italic>epochs</italic>.</p>
<p>This study determines the optimal parameter settings of the GCATCMDA model on the dataset by enumerating different parameter combinations. Subsequently, there is an analysis of key parameters <italic>t</italic>, <italic>F</italic>, <italic>L</italic>, and <italic>heads</italic>. After comparing experimental results, the optimal hyperparameter settings for the GCATCMDA model on the HMDAD dataset are determined as follows: <italic>t</italic> &#x0003D; 0.4, <italic>F</italic> &#x0003D; 128, <italic>L</italic> &#x0003D; 3, <italic>heads</italic> &#x0003D; 2, &#x003C4; &#x0003D; 1, &#x003BB; &#x0003D; 0.2, <italic>lr</italic> &#x0003D; 0.00001, <italic>wd</italic> &#x0003D; 0.001, and <italic>epochs</italic> &#x0003D; 100.</p>
<p>In order to verify the effectiveness of the proposed GCATCMDA model, we compares it with KATZHMDA (Chen et al., <xref ref-type="bibr" rid="B5">2017</xref>), LRLSHMDA (Wang et al., <xref ref-type="bibr" rid="B32">2017</xref>), NTSHMDA (Luo and Long, <xref ref-type="bibr" rid="B17">2018</xref>), and KGNMDA (Jiang et al., <xref ref-type="bibr" rid="B8">2022</xref>). These five methods are recognized for their outstanding performance in this task in past studies and provide research methods with open-source code. For negative samples required in model training, this study randomly selects an equal number of negative samples from all unknown microbe-disease association pairs. The number of negative samples matched the number of positive samples as to maintain a balanced dataset. In each cross-validation experiment, the Gaussian kernel similarity scores for microbes and diseases are recalculated based on the training set to ensure the effectiveness of evaluating model performance through the test set. In this experiment, we employ the same dataset and follow the hyperparameter settings used in the original papers or provide open source codes for other compared models. We adopted the same evaluation metrics as the previous study (Jiang et al., <xref ref-type="bibr" rid="B8">2022</xref>), including the area under the ROC curve (AUC) and the area under the precision-recall curve (AUPR) to assess the performance of the models. To evaluate the performance of these models in predicting potential associations between microbes and diseases, this study conducted 10 repetitions of five-fold cross-validation experiments and 10 repetitions of ten-fold cross-validation experiments by setting different random seeds, and then computed the average to ensure the accuracy of our results.</p>
</sec>
<sec>
<title>3.2 The classification performance of models</title>
<p>The comparative results of the two cross-validation experiments conducted on the HMDAD dataset for the five aforementioned models are presented in <xref ref-type="table" rid="T1">Table 1</xref>. The optimal performance is highlighted in bold, with standard deviations indicated in parentheses. To provide readers with a clearer visualization of the performance of the models, this study further plotted the ROC curve and PR curve, as shown in <xref ref-type="fig" rid="F3">Figures 3</xref>, <xref ref-type="fig" rid="F4">4</xref>, respectively.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>Classification performance comparison of GCATCMDA with existing methods.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Cross-validation</bold></th>
<th valign="top" align="left"><bold>Methods</bold></th>
<th valign="top" align="center"><bold>AUC</bold></th>
<th valign="top" align="center"><bold>AUPR</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left" rowspan="5">Five-fold-CV</td>
<td valign="top" align="left">KATZHMDA</td>
<td valign="top" align="center">0.877 (0.023)</td>
<td valign="top" align="center">0.890 (0.021)</td>
</tr>
 <tr>
<td valign="top" align="left">LRLSHMDA</td>
<td valign="top" align="center">0.801 (0.032)</td>
<td valign="top" align="center">0.774 (0.039)</td>
</tr>
 <tr>
<td valign="top" align="left">NTSHMDA</td>
<td valign="top" align="center">0.892 (0.028)</td>
<td valign="top" align="center">0.892 (0.036)</td>
</tr>
 <tr>
<td valign="top" align="left">KGNMDA</td>
<td valign="top" align="center">0.895 (0.021)</td>
<td valign="top" align="center">0.903 (0.020)</td>
</tr>
 <tr>
<td valign="top" align="left">GCATCMDA</td>
<td valign="top" align="center"><bold>0.908 (0.020)</bold></td>
<td valign="top" align="center"><bold>0.913 (0.022)</bold></td>
</tr> <tr>
<td valign="top" align="left" rowspan="5">10-fold-CV</td>
<td valign="top" align="left">KATZHMDA</td>
<td valign="top" align="center">0.880 (0.031)</td>
<td valign="top" align="center">0.892 (0.027)</td>
</tr>
 <tr>
<td valign="top" align="left">LRLSHMDA</td>
<td valign="top" align="center">0.805 (0.047)</td>
<td valign="top" align="center">0.788 (0.058)</td>
</tr>
 <tr>
<td valign="top" align="left">NTSHMDA</td>
<td valign="top" align="center">0.897 (0.030)</td>
<td valign="top" align="center">0.897 (0.038)</td>
</tr>
 <tr>
<td valign="top" align="left">KGNMDA</td>
<td valign="top" align="center">0.900 (0.029)</td>
<td valign="top" align="center">0.909 (0.029)</td>
</tr>
 <tr>
<td valign="top" align="left">GCATCMDA</td>
<td valign="top" align="center"><bold>0.910 (0.026)</bold></td>
<td valign="top" align="center"><bold>0.914 (0.033)</bold></td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>The values in bold represent the best ones.</p>
</table-wrap-foot>
</table-wrap>
<fig id="F3" position="float">
<label>Figure 3</label>
<caption><p>The AUROC curve and AUPR curve of five-fold CV on the HMDAD datasets between different methods.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmicb-15-1483983-g0003.tif"/>
</fig>
<fig id="F4" position="float">
<label>Figure 4</label>
<caption><p>The AUROC curve and AUPR curve of 10-fold CV on the HMDAD datasets between different methods.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmicb-15-1483983-g0004.tif"/>
</fig>
<p>From the experimental data presented above, it can be observed that the GCATCMDA model proposed in this study has achieved excellent predictive performance in the task of predicting associations between microbes and diseases, surpassing methods proposed in previous studies. For instance, in the five-fold cross-validation experiment, the model obtained an approximate 1.3% improvement in AUC compared to the best previous predictive performance. Similarly, in the ten-fold cross-validation experiment, the model obtained an approximate 1.0% enhancement in AUC compared to the best previous predictive performance. The improvement in predictive performance was slightly more pronounced in the fold-fold cross-validation compared to the 10-fold cross-validation. This can be attributed to the larger training sets used in the 10-fold validation, which reduce variability across folds and provide more comprehensive data for model training. However, the reduced variability can lead to subtler improvements in performance metrics, as the model benefits from a more stable but less varied dataset. In contrast, the five-fold validation, with its larger test sets, introduces more variability, making performance improvements more apparent.</p>
<p>Graph transformer models offer strong capabilities in capturing global node features through their self-attention mechanisms (Li et al., <xref ref-type="bibr" rid="B12">2024a</xref>,<xref ref-type="bibr" rid="B13">b</xref>). This allows them to handle complex and non-local structures, which can be beneficial for highly heterogeneous datasets. However, these models come with significant computational complexity, scaling quadratically with the number of nodes, making them less practical for large datasets like microbe-disease networks.</p>
<p>While the GCATCMDA model combines GCN and GAT to effectively capture both local features and selective attention on relevant neighbors, Graph Transformer models are designed to capture these relationships on a broader scale. The full attention mechanism of Graph Transformers allows them to dynamically weigh the importance of distant nodes, offering more flexibility in feature extraction across large and complex networks. In contrast, our GCATCMDA model, which combines GCNs and GATs, is more computationally efficient and particularly suited to smaller, sparser datasets like the HMDAD database. While graph transformers excel in capturing global relationships, our approach balances local feature aggregation and attention, offering a more efficient solution. Future work could explore integrating graph transformers to leverage their global feature-capturing capabilities alongside our model&#x00027;s efficiency in handling localized data.</p>
</sec>
<sec>
<title>3.3 Parameter analysis</title>
<p>The GCATCMDA model proposed in this study possesses several crucial parameters, such as the Gaussian kernel similarity threshold <italic>t</italic> for constructing microbe and disease similarity networks, the dimensionality <italic>F</italic> of node features, the number of network layers <italic>L</italic> for graph convolution, and the number of attention heads <italic>heads</italic> for the graph attention mechanism. Therefore, this study conducted training with different parameter combinations on the HMDAD dataset and utilized the experimental results from 10 repetitions of five-fold cross-validation to analyze the impact of these parameters on the model&#x00027;s performance.</p>
<p>As shown in <xref ref-type="fig" rid="F5">Figure 5</xref>, the model fails to achieve the best predictive performance when the Gaussian kernel similarity threshold <italic>t</italic> is either set too high or too low, the optimal predictive performance of the model is attained when <italic>t</italic> &#x0003D; 0.4. Moreover, as the dimensionality of node features increases, the predictive performance of the model gradually improves, with the best performance observed when <italic>F</italic> &#x0003D; 128. Additionally, the model exhibits its best predictive performance when the number of network layers for graph convolution <italic>L</italic> &#x0003D; 3. Furthermore, it is observed that the evaluation metrics AUC and AUPR attain their maximum values when the number of attention heads for the graph attention mechanism <italic>heads</italic> &#x0003D; 2.</p>
<fig id="F5" position="float">
<label>Figure 5</label>
<caption><p>The effect of parameters <italic>t</italic>, <italic>F</italic>, <italic>L</italic>, and <italic>heads</italic> on the GCATCMDA model.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmicb-15-1483983-g0005.tif"/>
</fig>
</sec>
<sec>
<title>3.4 Ablation studies</title>
<p>To further validate the impact of each module in the GCATCMDA model on the prediction performance of microbe-disease associations, this study conducted ablation experiments on the HMDAD dataset. The evaluation metrics included AUC, AUPR, Precision, Recall, and F1 score. These metrics aimed to comprehensively analyze the influence of different modules on the performance of the GCATCMDA model. The experimental results represent the average scores of 10 repetitions of five-fold cross-validation experiments. Initially, given that the GCATCMDA model simultaneously utilizes microbe similarity networks, disease similarity networks, and microbe-disease association networks to learn the feature representations of microbes and diseases, this study assessed the impact of node features from different association networks on the model&#x00027;s prediction performance. The experimental results are illustrated in <xref ref-type="fig" rid="F6">Figure 6</xref>, where GCATCMDA_sim denotes learning the feature representations of microbes and diseases only from microbe and disease similarity networks, while GCATCMDA_asso denotes learning the feature representations only from the microbe-disease association network. It can be observed from <xref ref-type="fig" rid="F6">Figure 6</xref> that integrating feature representations of microbes and diseases from different association networks effectively enhances the model&#x00027;s predictive performance.</p>
<fig id="F6" position="float">
<label>Figure 6</label>
<caption><p>Effect of node embedding extracted from different networks on prediction.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmicb-15-1483983-g0006.tif"/>
</fig>
<p>Next, given that the GCATCMDA model mainly consists of GCN, GAT, feature dual fusion module, and contrastive learning module, this study attempted to remove each module individually to investigate the impact of different modules on the model&#x00027;s prediction performance. The experimental results are presented in <xref ref-type="table" rid="T2">Table 2</xref>, where &#x0201C;GCATCMDA_GCN&#x0201D; denotes the removal of the graph convolutional network from the original model, &#x0201C;GCATCMDA_GAT&#x0201D; denotes the removal of the graph attention mechanism, &#x0201C;GCATCMDA_SUM&#x0201D; denotes replacing the feature dual fusion module of the original model with a simple summation operation, and &#x0201C;GCATCMDA_CL&#x0201D; denotes the removal of the contrastive learning module from the original model. From the results in <xref ref-type="table" rid="T2">Table 2</xref>, it can be observed that both &#x0201C;GCATCMDA_GCN&#x0201D; and &#x0201C;GCATCMDA_GAT&#x0201D; exhibit lower predictive performance compared to the original model, indicating that the integration of graph convolutional networks and graph attention mechanisms for node feature learning is effective in obtaining more informative node feature representations from the network. The predictive performance of &#x0201C;GCATCMDA_SUM&#x0201D; is also lower than that of the original model, suggesting that the designed feature dual fusion module effectively fuses node feature information outputted by the graph attention layers. Similarly, the predictive performance of &#x0201C;GCATCMDA_CL&#x0201D; is slightly lower than that of the original model, indicating that the addition of the contrastive learning module can improve the model&#x00027;s predictive performance to some extent.</p>
<table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p>Classification performance comparison of GCATCMDA with existing methods.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Method</bold></th>
<th valign="top" align="center"><bold>AUC</bold></th>
<th valign="top" align="center"><bold>AUPR</bold></th>
<th valign="top" align="center"><bold>Precision</bold></th>
<th valign="top" align="center"><bold>Recall</bold></th>
<th valign="top" align="center"><bold>F1</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">GCATCMDA_GCN</td>
<td valign="top" align="center">0.893 (0.023)</td>
<td valign="top" align="center">0.903 (0.021)</td>
<td valign="top" align="center">0.858 (0.033)</td>
<td valign="top" align="center">0.771 (0.041)</td>
<td valign="top" align="center">0.812 (0.027)</td>
</tr> <tr>
<td valign="top" align="left">GCATCMDA_GAT</td>
<td valign="top" align="center">0.884 (0.034)</td>
<td valign="top" align="center">0.900 (0.033)</td>
<td valign="top" align="center">0.865 (0.040)</td>
<td valign="top" align="center">0.737 (0.086)</td>
<td valign="top" align="center">0.793 (0.062)</td>
</tr> <tr>
<td valign="top" align="left">GCATCMDA_SUM</td>
<td valign="top" align="center">0.894 (0.028)</td>
<td valign="top" align="center">0.887 (0.040)</td>
<td valign="top" align="center">0.866 (0.045)</td>
<td valign="top" align="center">0.749 (0.078)</td>
<td valign="top" align="center">0.801 (0.056)</td>
</tr> <tr>
<td valign="top" align="left">GCATCMDA_CL</td>
<td valign="top" align="center">0.904 (0.021)</td>
<td valign="top" align="center">0.908 (0.025)</td>
<td valign="top" align="center">0.869 (0.034)</td>
<td valign="top" align="center">0.770 (0.039)</td>
<td valign="top" align="center">0.816 (0.028)</td>
</tr> <tr>
<td valign="top" align="left">GCATCMDA</td>
<td valign="top" align="center"><bold>0.908 (0.020)</bold></td>
<td valign="top" align="center"><bold>0.913 (0.022)</bold></td>
<td valign="top" align="center"><bold>0.874 (0.034)</bold></td>
<td valign="top" align="center"><bold>0.772 (0.045)</bold></td>
<td valign="top" align="center"><bold>0.819 (0.032)</bold></td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>The values in bold represent the best ones.</p>
</table-wrap-foot>
</table-wrap>
<p>Finally, to investigate the impact of different operations for fusing node features outputted by the graph attention layers on the GCATCMDA model prediction performance, this study sophisticatedly combined three common feature vector operations: concatenation, sum, and element-wise product. The combined fusion feature formulas are similar to the feature dual fusion formula described earlier. The experimental results are illustrated in <xref ref-type="fig" rid="F7">Figure 7</xref>. GCATCMDA_C represents the use of concatenation only, GCATCMDA_S represents the use of sum only, and GCATCMDA_H represents the use of element-wise product only. GCATCMDA_CS represents the combination of concatenation and sum, GCATCMDA_CH represents the combination of concatenation and element-wise product, GCATCMDA_SH represents the combination of sum and element-wise product, and GCATCMDA_CSH represents the combination of concatenation, sum, and element-wise product. From the experimental results in <xref ref-type="fig" rid="F7">Figure 7</xref>, it can be observed that selecting the combination operations of concatenation and element-wise product in the feature dual fusion module can most effectively fuse node features outputted by the graph attention layers.</p>
<fig id="F7" position="float">
<label>Figure 7</label>
<caption><p>The impact of different feature fusion methods on model prediction performance.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmicb-15-1483983-g0007.tif"/>
</fig>
</sec>
<sec>
<title>3.5 Case studies</title>
<p>To further validate whether the GCATCMDA model can predict associations between microbes and diseases, this study initially trained the model using all known microbial-disease associations in the HMDAD dataset. Subsequently, obesity and inflammatory bowel disease (IBD), two common diseases, were selected as subjects for case analysis. The model predicted microbial associations with obesity and IBD by sorting the predicted association scores from high to low and retaining the top 20 unknown microbial associations with high scores for these two diseases. Finally, employing a literature search approach, this study validated whether these microbial associations with diseases existed by examining relevant publications in the biomedical literature database PubMed. This validation process aimed to assess the accuracy of the microbial-disease associations predicted by the GCATCMDA model.</p>
<p>From <xref ref-type="table" rid="T3">Table 3</xref>, it can be observed that among the top 20 associated microbes identified by the GCATCMDA model for obesity, 16 of them have been previously documented in the literature to be associated with obesity. For instance, Xu et al. (<xref ref-type="bibr" rid="B35">2022</xref>), by reviewing literature on gut microbiota and obesity, identified an association between Prevotella and obesity. Baradaran et al. (<xref ref-type="bibr" rid="B2">2021</xref>) experimentally demonstrated that individuals positive for <italic>Helicobacter pylori</italic> infection are more likely to suffer from obesity, with an increased risk of <italic>Helicobacter pylori</italic> infection among obese individuals. From <xref ref-type="table" rid="T4">Table 4</xref>, it can be observed that in IBD, among the top 20 associated microbes identified by the GCATCMDA model, 15 have been previously demonstrated to be associated with IBD in the literature. For example, Quaglio et al. (<xref ref-type="bibr" rid="B24">2022</xref>) demonstrated that the abundance of Bacteroidetes and Firmicutes in patients with IBD undergoes significant changes. Cardoneanu et al. (<xref ref-type="bibr" rid="B4">2021</xref>) experimental research showed a significant decrease in the abundance of Clostridium coccoides in patients with IBD compared to healthy individuals.</p>
<table-wrap position="float" id="T3">
<label>Table 3</label>
<caption><p>Candidate microbes related to obesity predicted by GCATCMDA model.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Rank</bold></th>
<th valign="top" align="left"><bold>Microbe</bold></th>
<th valign="top" align="left"><bold>Evidence</bold></th>
<th valign="top" align="center"><bold>Rank</bold></th>
<th valign="top" align="left"><bold>Microbe</bold></th>
<th valign="top" align="left"><bold>Evidence</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">1</td>
<td valign="top" align="left"><italic>Prevotella</italic></td>
<td valign="top" align="left">PMID:35093025</td>
<td valign="top" align="center">11</td>
<td valign="top" align="left"><italic>Enterobacter aerogenes</italic></td>
<td valign="top" align="left">Unconfirmed</td>
</tr> <tr>
<td valign="top" align="left">2</td>
<td valign="top" align="left">Proteobacteria</td>
<td valign="top" align="left">PMID:31197613</td>
<td valign="top" align="center">12</td>
<td valign="top" align="left"><italic>Enterobacter hormaechei</italic></td>
<td valign="top" align="left">Unconfirmed</td>
</tr> <tr>
<td valign="top" align="left">3</td>
<td valign="top" align="left"><italic>Helicobacter pylori</italic></td>
<td valign="top" align="left">PMID:34243821</td>
<td valign="top" align="center">13</td>
<td valign="top" align="left"><italic>Klebsiella pneumoniae</italic></td>
<td valign="top" align="left">PMID:31921729</td>
</tr> <tr>
<td valign="top" align="left">4</td>
<td valign="top" align="left">Lachnospiraceae</td>
<td valign="top" align="left">PMID:31397240</td>
<td valign="top" align="center">14</td>
<td valign="top" align="left"><italic>Shigella dysenteriae</italic></td>
<td valign="top" align="left">Unconfirmed</td>
</tr> <tr>
<td valign="top" align="left">5</td>
<td valign="top" align="left">Actinobacteria</td>
<td valign="top" align="left">PMID:19043404</td>
<td valign="top" align="center">15</td>
<td valign="top" align="left"><italic>Haemophilus</italic></td>
<td valign="top" align="left">PMID:31976177</td>
</tr> <tr>
<td valign="top" align="left">6</td>
<td valign="top" align="left"><italic>Staphylococcus</italic></td>
<td valign="top" align="left">PMID:29667480</td>
<td valign="top" align="center">16</td>
<td valign="top" align="left"><italic>Clostridium coccoides</italic></td>
<td valign="top" align="left">PMID:29667480</td>
</tr> <tr>
<td valign="top" align="left">7</td>
<td valign="top" align="left"><italic>Enterococcus</italic></td>
<td valign="top" align="left">PMID:35967777</td>
<td valign="top" align="center">17</td>
<td valign="top" align="left"><italic>Betaproteobacteria</italic></td>
<td valign="top" align="left">Unconfirmed</td>
</tr> <tr>
<td valign="top" align="left">8</td>
<td valign="top" align="left"><italic>Clostridium</italic></td>
<td valign="top" align="left">PMID:29667480</td>
<td valign="top" align="center">18</td>
<td valign="top" align="left"><italic>Clostridium leptum</italic></td>
<td valign="top" align="left">PMID:36756620</td>
</tr> <tr>
<td valign="top" align="left">9</td>
<td valign="top" align="left"><italic>Clostridium difficile</italic></td>
<td valign="top" align="left">PMID:25638400</td>
<td valign="top" align="center">19</td>
<td valign="top" align="left">Bacteroidales</td>
<td valign="top" align="left">PMID:33407104</td>
</tr> <tr>
<td valign="top" align="left">10</td>
<td valign="top" align="left"><italic>Faecalibacterium prausnitzii</italic></td>
<td valign="top" align="left">PMID:23985870</td>
<td valign="top" align="center">20</td>
<td valign="top" align="left"><italic>Enterococcus faecium</italic></td>
<td valign="top" align="left">PMID:36590404</td>
</tr></tbody>
</table>
</table-wrap>
<table-wrap position="float" id="T4">
<label>Table 4</label>
<caption><p>Candidate microbes related to IBD predicted by GCATCMDA model.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Rank</bold></th>
<th valign="top" align="left"><bold>Microbe</bold></th>
<th valign="top" align="left"><bold>Evidence</bold></th>
<th valign="top" align="center"><bold>Rank</bold></th>
<th valign="top" align="left"><bold>Microbe</bold></th>
<th valign="top" align="left"><bold>Evidence</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">1</td>
<td valign="top" align="left">Bacteroidetes</td>
<td valign="top" align="left">PMID:36157114</td>
<td valign="top" align="center">11</td>
<td valign="top" align="left"><italic>Enterobacter hormaechei</italic></td>
<td valign="top" align="left">Unconfirmed</td>
</tr> <tr>
<td valign="top" align="left">2</td>
<td valign="top" align="left">Firmicutes</td>
<td valign="top" align="left">PMID:36157114</td>
<td valign="top" align="center">12</td>
<td valign="top" align="left"><italic>Klebsiella pneumoniae</italic></td>
<td valign="top" align="left">PMID:36436756</td>
</tr> <tr>
<td valign="top" align="left">3</td>
<td valign="top" align="left"><italic>Clostridium coccoides</italic></td>
<td valign="top" align="left">PMID:33548121</td>
<td valign="top" align="center">13</td>
<td valign="top" align="left"><italic>Shigella dysenteriae</italic></td>
<td valign="top" align="left">Unconfirmed</td>
</tr> <tr>
<td valign="top" align="left">4</td>
<td valign="top" align="left"><italic>Helicobacter pylori</italic></td>
<td valign="top" align="left">PMID:30237392</td>
<td valign="top" align="center">14</td>
<td valign="top" align="left"><italic>Clostridium leptum</italic></td>
<td valign="top" align="left">PMID:33548121</td>
</tr> <tr>
<td valign="top" align="left">5</td>
<td valign="top" align="left"><italic>Prevotella</italic></td>
<td valign="top" align="left">PMID:38053528</td>
<td valign="top" align="center">15</td>
<td valign="top" align="left">Lysobacter</td>
<td valign="top" align="left">Unconfirmed</td>
</tr> <tr>
<td valign="top" align="left">6</td>
<td valign="top" align="left"><italic>Clostridium difficile</italic></td>
<td valign="top" align="left">PMID:31698044</td>
<td valign="top" align="center">16</td>
<td valign="top" align="left">Rickettsiales</td>
<td valign="top" align="left">Unconfirmed</td>
</tr> <tr>
<td valign="top" align="left">7</td>
<td valign="top" align="left"><italic>Staphylococcus</italic></td>
<td valign="top" align="left">PMID:31662859</td>
<td valign="top" align="center">17</td>
<td valign="top" align="left"><italic>Streptococcus mitis</italic></td>
<td valign="top" align="left">PMID:30796823</td>
</tr> <tr>
<td valign="top" align="left">8</td>
<td valign="top" align="left"><italic>Staphylococcus aureus</italic></td>
<td valign="top" align="left">PMID:31698044</td>
<td valign="top" align="center">18</td>
<td valign="top" align="left"><italic>Xanthomonas</italic></td>
<td valign="top" align="left">PMID:35689701</td>
</tr> <tr>
<td valign="top" align="left">9</td>
<td valign="top" align="left"><italic>Enterococcus</italic></td>
<td valign="top" align="left">PMID:32292819</td>
<td valign="top" align="center">19</td>
<td valign="top" align="left">Enterobacteriaceae</td>
<td valign="top" align="left">PMID:24629344</td>
</tr> <tr>
<td valign="top" align="left">10</td>
<td valign="top" align="left"><italic>Enterobacter aerogenes</italic></td>
<td valign="top" align="left">Unconfirmed</td>
<td valign="top" align="center">20</td>
<td valign="top" align="left"><italic>Lactobacillus</italic></td>
<td valign="top" align="left">PMID:37773196</td>
</tr></tbody>
</table>
</table-wrap>
<p>In summary, it can be observed from <xref ref-type="table" rid="T3">Tables 3</xref>, <xref ref-type="table" rid="T4">4</xref> that the GCATCMDA model achieves an accuracy of over 75% in predicting potential associated microbes for both obesity and inflammatory bowel disease. Therefore, this study concludes that the GCATCMDA model can provide effective and accurate candidate sets of microbes associated with diseases, thereby reducing the research costs and duration of traditional biological experiments.</p>
</sec>
</sec>
<sec sec-type="conclusions" id="s4">
<title>4 Conclusion</title>
<p>This article primarily introduces the GCATCMDA model proposed in this study, aimed at predicting potential sets of microbe-disease associations based on known microbe-disease association data. Initially, the article outlines the construction of Gaussian kernel similarity networks for microbes and diseases using known association data and explains how the model combines graph neural networks with contrastive learning to obtain effective feature representations for microbes and diseases. Subsequently, experimental evaluations are conducted to compare the GCATCMDA model with existing methods, demonstrating its superiority in microbe-disease association prediction tasks. Additionally, parameter analysis experiments validate the rationality of parameter settings in the GCATCMDA model, while ablation experiments confirm the effectiveness of each module in the model. Finally, obesity and inflammatory bowel disease are selected as case studies to validate the high accuracy of the microbe-disease association candidate sets predicted by the GCATCMDA model.</p>
<p>The proposed model combines GCN and GAT to leverage the strengths of both approaches. GCN effectively captures local neighborhood information by performing convolution operations over graph structures, allowing the model to aggregate features across connected nodes. However, GCN applies equal weighting to all neighboring nodes, which may limit its ability to differentiate between more and less important neighbors. To address this limitation, GAT introduces an attention mechanism that assigns different importance to neighboring nodes by computing attention coefficients. This allows the model to focus more on the relevant nodes, improving its ability to capture complex interactions. By combining GCN&#x00027;s ability to aggregate global structural information with GAT&#x00027;s selective attention on important neighbors, the proposed model effectively captures both local and global patterns within the graph, leading to enhanced predictive performance.</p>
<p>While our study has demonstrated the effectiveness of the GCATCMDA model in predicting microbe-disease associations, there are several limitations that must be acknowledged. First, the model has only been evaluated using the HMDAD database, and its generalization ability requires further validation across other public datasets, such as HMDA and Disbiome. The limited volume of data in this study may also hinder the model&#x00027;s ability to capture complex patterns, suggesting the need for more extensive datasets to enhance its predictive performance. Additionally, our current approach does not differentiate between positive and negative association information, a distinction that will be addressed in future research to refine prediction accuracy. By overcoming these limitations, we anticipate further improvements in the model&#x00027;s robustness and its potential application across a broader range of microbial and disease studies.</p>
<p>In conclusion, this study asserts that the GCATCMDA model can advance the development of deep learning algorithms in the field of microbe-disease association prediction. Moreover, it effectively aids biologists in exploring potential associations between microbes and human diseases from a big data perspective, thereby reducing the costs of traditional biological experiments and accelerating research progress in the field of gut microbes and disease association studies.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s5">
<title>Data availability statement</title>
<p>The HMDAD database is available at: <ext-link ext-link-type="uri" xlink:href="http://www.cuilab.cn/hmdad">http://www.cuilab.cn/hmdad</ext-link>. The source code is available upon reasonable request to the corresponding authors.</p>
</sec>
<sec sec-type="author-contributions" id="s6">
<title>Author contributions</title>
<p>CJ: Investigation, Software, Visualization, Writing &#x02013; original draft. JF: Formal analysis, Validation, Writing &#x02013; original draft. BS: Formal analysis, Validation, Writing &#x02013; original draft. QC: Data curation, Writing &#x02013; original draft. JY: Funding acquisition, Writing &#x02013; review &#x00026; editing. GW: Resources, Writing &#x02013; review &#x00026; editing. XP: Methodology, Supervision, Writing &#x02013; review &#x00026; editing. XL: Conceptualization, Funding acquisition, Project administration, Supervision, Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing.</p>
</sec>
<sec sec-type="funding-information" id="s7">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research, authorship, and/or publication of this article. This work was supported by the National Natural Science Foundation Project of China (82171526), Beijing Talents Project (2020A38), and Open Fund of National Engineering Laboratory for Big Data System Computing Technology (Grant No. SZU-BDSC-OF2024-19).</p>
</sec>
<ack><p>The authors thank Shenzhen University Big Data Research Center for computing hardware facilities.</p>
</ack>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>XL was a co-founder of JCY Biotech Ltd.</p>
<p>The remaining authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s8">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bao</surname> <given-names>W.</given-names></name> <name><surname>Jiang</surname> <given-names>Z.</given-names></name> <name><surname>Huang</surname> <given-names>D.-S.</given-names></name></person-group> (<year>2017</year>). <article-title>Novel human microbe-disease association prediction using network consistency projection</article-title>. <source>BMC Bioinformatics</source> <volume>18</volume>, <fpage>173</fpage>&#x02013;<lpage>181</lpage>. <pub-id pub-id-type="doi">10.1186/s12859-017-1968-2</pub-id><pub-id pub-id-type="pmid">29297304</pub-id></citation></ref>
<ref id="B2">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Baradaran</surname> <given-names>A.</given-names></name> <name><surname>Dehghanbanadaki</surname> <given-names>H.</given-names></name> <name><surname>Naderpour</surname> <given-names>S.</given-names></name> <name><surname>Pirkashani</surname> <given-names>L. M.</given-names></name> <name><surname>Rajabi</surname> <given-names>A.</given-names></name> <name><surname>Rashti</surname> <given-names>R.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>The association between <italic>Helicobacter pylori</italic> and obesity: a systematic review and meta-analysis of case-control studies</article-title>. <source>Clin. Diabetes Endocrinol</source>. <volume>7</volume>, <fpage>1</fpage>&#x02013;<lpage>11</lpage>. <pub-id pub-id-type="doi">10.1186/s40842-021-00131-w</pub-id><pub-id pub-id-type="pmid">34243821</pub-id></citation></ref>
<ref id="B3">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Blum</surname> <given-names>H. E.</given-names></name></person-group> (<year>2017</year>). <article-title>The human microbiome</article-title>. <source>Adv. Med. Sci</source>. <volume>62</volume>, <fpage>414</fpage>&#x02013;<lpage>420</lpage>. <pub-id pub-id-type="doi">10.1016/j.advms.2017.04.005</pub-id><pub-id pub-id-type="pmid">28711782</pub-id></citation></ref>
<ref id="B4">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cardoneanu</surname> <given-names>A.</given-names></name> <name><surname>Mihai</surname> <given-names>C.</given-names></name> <name><surname>Rezus</surname> <given-names>E.</given-names></name> <name><surname>Burlui</surname> <given-names>A.</given-names></name> <name><surname>Popa</surname> <given-names>I.</given-names></name> <name><surname>Prelipcean</surname> <given-names>C. C.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Gut microbiota changes in inflammatory bowel diseases and ankylosing spondylitis</article-title>. <source>J. Gastrointestin. Liver Dis</source>. <volume>30</volume>, <fpage>46</fpage>&#x02013;<lpage>54</lpage>. <pub-id pub-id-type="doi">10.15403/jgld-2823</pub-id><pub-id pub-id-type="pmid">33548121</pub-id></citation></ref>
<ref id="B5">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>X.</given-names></name> <name><surname>Huang</surname> <given-names>Y.-A.</given-names></name> <name><surname>You</surname> <given-names>Z.-H.</given-names></name> <name><surname>Yan</surname> <given-names>G.-Y.</given-names></name> <name><surname>Wang</surname> <given-names>X.-S.</given-names></name></person-group> (<year>2017</year>). <article-title>A novel approach based on Katz measure to predict associations of human microbiota with non-infectious diseases</article-title>. <source>Bioinformatics</source> <volume>33</volume>, <fpage>733</fpage>&#x02013;<lpage>739</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btw715</pub-id><pub-id pub-id-type="pmid">28025197</pub-id></citation></ref>
<ref id="B6">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Henao-Mejia</surname> <given-names>J.</given-names></name> <name><surname>Elinav</surname> <given-names>E.</given-names></name> <name><surname>Thaiss</surname> <given-names>C. A.</given-names></name> <name><surname>Licona-Limon</surname> <given-names>P.</given-names></name> <name><surname>Flavell</surname> <given-names>R. A.</given-names></name></person-group> (<year>2013</year>). <article-title>Role of the intestinal microbiome in liver disease</article-title>. <source>J. Autoimmun</source>. <volume>46</volume>, <fpage>66</fpage>&#x02013;<lpage>73</lpage>. <pub-id pub-id-type="doi">10.1016/j.jaut.2013.07.001</pub-id><pub-id pub-id-type="pmid">24075647</pub-id></citation></ref>
<ref id="B7">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hua</surname> <given-names>M.</given-names></name> <name><surname>Yu</surname> <given-names>S.</given-names></name> <name><surname>Liu</surname> <given-names>T.</given-names></name> <name><surname>Yang</surname> <given-names>X.</given-names></name> <name><surname>Wang</surname> <given-names>H.</given-names></name></person-group> (<year>2022</year>). <article-title>Mvgcnmda: multi-view graph augmentation convolutional network for uncovering disease-related microbes</article-title>. <source>Interdiscip. Sci. Comput. Life sci</source>. <volume>14</volume>, <fpage>669</fpage>&#x02013;<lpage>682</lpage>. <pub-id pub-id-type="doi">10.1007/s12539-022-00514-2</pub-id><pub-id pub-id-type="pmid">35428964</pub-id></citation></ref>
<ref id="B8">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jiang</surname> <given-names>C.</given-names></name> <name><surname>Tang</surname> <given-names>M.</given-names></name> <name><surname>Jin</surname> <given-names>S.</given-names></name> <name><surname>Huang</surname> <given-names>W.</given-names></name> <name><surname>Liu</surname> <given-names>X.</given-names></name></person-group> (<year>2022</year>). <article-title>Kgnmda: a knowledge graph neural network method for predicting microbe-disease associations</article-title>. <source>IEEE/ACM Trans. Comput. Biol. Bioinform</source>. <volume>20</volume>, <fpage>1147</fpage>&#x02013;<lpage>1155</lpage>. <pub-id pub-id-type="doi">10.1109/TCBB.2022.3184362</pub-id><pub-id pub-id-type="pmid">35724280</pub-id></citation></ref>
<ref id="B9">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jin</surname> <given-names>Z.</given-names></name> <name><surname>Wang</surname> <given-names>M.</given-names></name> <name><surname>Tang</surname> <given-names>C.</given-names></name> <name><surname>Zheng</surname> <given-names>X.</given-names></name> <name><surname>Zhang</surname> <given-names>W.</given-names></name> <name><surname>Sha</surname> <given-names>X.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>Predicting mirna-disease association via graph attention learning and multiplex adaptive modality fusion</article-title>. <source>Comput. Biol. Med</source>. <volume>169</volume>:<fpage>107904</fpage>. <pub-id pub-id-type="doi">10.1016/j.compbiomed.2023.107904</pub-id><pub-id pub-id-type="pmid">38181611</pub-id></citation></ref>
<ref id="B10">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kingma</surname> <given-names>D. P.</given-names></name> <name><surname>Ba</surname> <given-names>J.</given-names></name></person-group> (<year>2014</year>). <article-title>Adam: a method for stochastic optimization</article-title>. <source>arXiv</source> [Preprint]. arXiv:1412.6980. <pub-id pub-id-type="doi">10.48550/arXiv.1412.6980</pub-id></citation>
</ref>
<ref id="B11">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kipf</surname> <given-names>T. N.</given-names></name> <name><surname>Welling</surname> <given-names>M.</given-names></name></person-group> (<year>2016</year>). <article-title>Semi-supervised classification with graph convolutional networks</article-title>. <source>arXiv</source> [Preprint]. arXiv:1609.02907. <pub-id pub-id-type="doi">10.48550/arXiv.1609.02907</pub-id></citation>
</ref>
<ref id="B12">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>G.</given-names></name> <name><surname>Bai</surname> <given-names>P.</given-names></name> <name><surname>Chen</surname> <given-names>J.</given-names></name> <name><surname>Liang</surname> <given-names>C.</given-names></name></person-group> (<year>2024a</year>). <article-title>Identifying virulence factors using graph transformer autoencoder with esmfold-predicted structures</article-title>. <source>Comput. Biol. Med</source>. <volume>170</volume>:<fpage>108062</fpage>. <pub-id pub-id-type="doi">10.1016/j.compbiomed.2024.108062</pub-id><pub-id pub-id-type="pmid">38308869</pub-id></citation></ref>
<ref id="B13">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>G.</given-names></name> <name><surname>Bai</surname> <given-names>P.</given-names></name> <name><surname>Liang</surname> <given-names>C.</given-names></name> <name><surname>Luo</surname> <given-names>J.</given-names></name></person-group> (<year>2024b</year>). <article-title>Node-adaptive graph transformer with structural encoding for accurate and robust lncrna-disease association prediction</article-title>. <source>BMC Genomics</source> <volume>25</volume>:<fpage>73</fpage>. <pub-id pub-id-type="doi">10.1186/s12864-024-09998-2</pub-id><pub-id pub-id-type="pmid">38233788</pub-id></citation></ref>
<ref id="B14">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>H.</given-names></name> <name><surname>Bing</surname> <given-names>P.</given-names></name> <name><surname>Zhang</surname> <given-names>M.</given-names></name> <name><surname>Tian</surname> <given-names>G.</given-names></name> <name><surname>Ma</surname> <given-names>J.</given-names></name> <name><surname>Li</surname> <given-names>H.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Mnnmda: predicting human microbe-disease association via a method to minimize matrix nuclear norm</article-title>. <source>Comput. Struct. Biotechnol. J</source>. <volume>21</volume>, <fpage>1414</fpage>&#x02013;<lpage>1423</lpage>. <pub-id pub-id-type="doi">10.1016/j.csbj.2022.12.053</pub-id><pub-id pub-id-type="pmid">36824227</pub-id></citation></ref>
<ref id="B15">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Long</surname> <given-names>Y.</given-names></name> <name><surname>Luo</surname> <given-names>J.</given-names></name></person-group> (<year>2019</year>). <article-title>Wmghmda: a novel weighted meta-graph-based model for predicting human microbe-disease association on heterogeneous information network</article-title>. <source>BMC Bioinformatics</source> <volume>20</volume>, <fpage>1</fpage>&#x02013;<lpage>18</lpage>. <pub-id pub-id-type="doi">10.1186/s12859-019-3066-0</pub-id><pub-id pub-id-type="pmid">31675979</pub-id></citation></ref>
<ref id="B16">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Long</surname> <given-names>Y.</given-names></name> <name><surname>Luo</surname> <given-names>J.</given-names></name> <name><surname>Zhang</surname> <given-names>Y.</given-names></name> <name><surname>Xia</surname> <given-names>Y.</given-names></name></person-group> (<year>2021</year>). <article-title>Predicting human microbe-disease associations via graph attention networks with inductive matrix completion</article-title>. <source>Brief. Bioinform</source>. <volume>22</volume>:<fpage>bbaa146</fpage>. <pub-id pub-id-type="doi">10.1093/bib/bbaa146</pub-id><pub-id pub-id-type="pmid">32725163</pub-id></citation></ref>
<ref id="B17">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Luo</surname> <given-names>J.</given-names></name> <name><surname>Long</surname> <given-names>Y.</given-names></name></person-group> (<year>2018</year>). <article-title>Ntshmda: prediction of human microbe-disease association based on random walk by integrating network topological similarity</article-title>. <source>IEEE/ACM Trans. Comput. Biol. Bioinform</source>. <volume>17</volume>, <fpage>1341</fpage>&#x02013;<lpage>1351</lpage>. <pub-id pub-id-type="doi">10.1109/TCBB.2018.2883041</pub-id><pub-id pub-id-type="pmid">30489271</pub-id></citation></ref>
<ref id="B18">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lynch</surname> <given-names>S. V.</given-names></name> <name><surname>Pedersen</surname> <given-names>O.</given-names></name></person-group> (<year>2016</year>). <article-title>The human intestinal microbiome in health and disease</article-title>. <source>N. Engl. J. Med</source>. <volume>375</volume>, <fpage>2369</fpage>&#x02013;<lpage>2379</lpage>. <pub-id pub-id-type="doi">10.1056/NEJMra1600266</pub-id><pub-id pub-id-type="pmid">27974040</pub-id></citation></ref>
<ref id="B19">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ma</surname> <given-names>W.</given-names></name> <name><surname>Zhang</surname> <given-names>L.</given-names></name> <name><surname>Zeng</surname> <given-names>P.</given-names></name> <name><surname>Huang</surname> <given-names>C.</given-names></name> <name><surname>Li</surname> <given-names>J.</given-names></name> <name><surname>Geng</surname> <given-names>B.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>An analysis of human microbe-disease associations</article-title>. <source>Brief. Bioinform</source>. <volume>18</volume>, <fpage>85</fpage>&#x02013;<lpage>97</lpage>. <pub-id pub-id-type="doi">10.1093/bib/bbw005</pub-id><pub-id pub-id-type="pmid">26883326</pub-id></citation></ref>
<ref id="B20">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ma</surname> <given-names>Y.</given-names></name> <name><surname>Jiang</surname> <given-names>H.</given-names></name></person-group> (<year>2020</year>). <article-title>Ninimhmda: neural integration of neighborhood information on a multiplex heterogeneous network for multiple types of human microbe-disease association</article-title>. <source>Bioinformatics</source> <volume>36</volume>, <fpage>5665</fpage>&#x02013;<lpage>5671</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btaa1080</pub-id><pub-id pub-id-type="pmid">33416850</pub-id></citation></ref>
<ref id="B21">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Marchesi</surname> <given-names>J. R.</given-names></name> <name><surname>Adams</surname> <given-names>D. H.</given-names></name> <name><surname>Fava</surname> <given-names>F.</given-names></name> <name><surname>Hermes</surname> <given-names>G. D.</given-names></name> <name><surname>Hirschfield</surname> <given-names>G. M.</given-names></name> <name><surname>Hold</surname> <given-names>G.</given-names></name> <etal/></person-group>. (<year>2016</year>). <article-title>The gut microbiota and host health: a new clinical frontier</article-title>. <source>Gut</source> <volume>65</volume>, <fpage>330</fpage>&#x02013;<lpage>339</lpage>. <pub-id pub-id-type="doi">10.1136/gutjnl-2015-309990</pub-id><pub-id pub-id-type="pmid">26338727</pub-id></citation></ref>
<ref id="B22">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Paun</surname> <given-names>A.</given-names></name> <name><surname>Yau</surname> <given-names>C.</given-names></name> <name><surname>Danska</surname> <given-names>J. S.</given-names></name></person-group> (<year>2017</year>). <article-title>The influence of the microbiome on type 1 diabetes</article-title>. <source>J. Immunol</source>. <volume>198</volume>, <fpage>590</fpage>&#x02013;<lpage>595</lpage>. <pub-id pub-id-type="doi">10.4049/jimmunol.1601519</pub-id><pub-id pub-id-type="pmid">28069754</pub-id></citation></ref>
<ref id="B23">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Peng</surname> <given-names>L.</given-names></name> <name><surname>Huang</surname> <given-names>L.</given-names></name> <name><surname>Tian</surname> <given-names>G.</given-names></name> <name><surname>Wu</surname> <given-names>Y.</given-names></name> <name><surname>Li</surname> <given-names>G.</given-names></name> <name><surname>Li</surname> <given-names>Z.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Predicting potential microbe-disease associations with graph attention autoencoder, positive-unlabeled learning, and deep neural network</article-title>. <source>Front. Microbiol</source>. <volume>14</volume>:<fpage>1244527</fpage>. <pub-id pub-id-type="doi">10.3389/fmicb.2023.1244527</pub-id><pub-id pub-id-type="pmid">37789848</pub-id></citation></ref>
<ref id="B24">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Quaglio</surname> <given-names>A. E. V.</given-names></name> <name><surname>Grillo</surname> <given-names>T. G.</given-names></name> <name><surname>De Oliveira</surname> <given-names>E. C. S.</given-names></name> <name><surname>Di Stasi</surname> <given-names>L. C.</given-names></name> <name><surname>Sassaki</surname> <given-names>L. Y.</given-names></name></person-group> (<year>2022</year>). <article-title>Gut microbiota, inflammatory bowel disease and colorectal cancer</article-title>. <source>World J. Gastroenterol</source>. 28, 4053. <pub-id pub-id-type="doi">10.3748/wjg.v28.i30.4053</pub-id><pub-id pub-id-type="pmid">36157114</pub-id></citation></ref>
<ref id="B25">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Schwabe</surname> <given-names>R. F.</given-names></name> <name><surname>Jobin</surname> <given-names>C.</given-names></name></person-group> (<year>2013</year>). <article-title>The microbiome and cancer</article-title>. <source>Nat. Rev. Cancer</source> <volume>13</volume>, <fpage>800</fpage>&#x02013;<lpage>812</lpage>. <pub-id pub-id-type="doi">10.1038/nrc3610</pub-id><pub-id pub-id-type="pmid">24132111</pub-id></citation></ref>
<ref id="B26">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Shen</surname> <given-names>Z.</given-names></name> <name><surname>Jiang</surname> <given-names>Z.</given-names></name> <name><surname>Bao</surname> <given-names>W.</given-names></name></person-group> (<year>2017</year>). <article-title>&#x0201C;CMFHMDA: collaborative matrix factorization for human microbe-disease association prediction,&#x0201D;</article-title> in <source>Intelligent Computing Theories and Application: 13th International Conference, ICIC 2017, Liverpool, UK, August 7-10, 2017, Proceedings, Part II 13</source> (<publisher-loc>Cham</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>261</fpage>&#x02013;<lpage>269</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-319-63312-1_24</pub-id></citation>
</ref>
<ref id="B27">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sommer</surname> <given-names>F.</given-names></name> <name><surname>B&#x000E4;ckhed</surname> <given-names>F.</given-names></name></person-group> (<year>2013</year>). <article-title>The gut microbiota&#x02013;masters of host development and physiology</article-title>. <source>Nat. Rev. Microbiol</source>. <volume>11</volume>, <fpage>227</fpage>&#x02013;<lpage>238</lpage>. <pub-id pub-id-type="doi">10.1038/nrmicro2974</pub-id><pub-id pub-id-type="pmid">23435359</pub-id></citation></ref>
<ref id="B28">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sun</surname> <given-names>F.</given-names></name> <name><surname>Sun</surname> <given-names>J.</given-names></name> <name><surname>Zhao</surname> <given-names>Q.</given-names></name></person-group> (<year>2022</year>). <article-title>A deep learning method for predicting metabolite-disease associations via graph neural network</article-title>. <source>Brief. Bioinform</source>. <volume>23</volume>:<fpage>bbac266</fpage>. <pub-id pub-id-type="doi">10.1093/bib/bbac266</pub-id><pub-id pub-id-type="pmid">35817399</pub-id></citation></ref>
<ref id="B29">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tooley</surname> <given-names>K. L.</given-names></name></person-group> (<year>2020</year>). <article-title>Effects of the human gut microbiota on cognitive performance, brain structure and function: a narrative review</article-title>. <source>Nutrients</source> <volume>12</volume>:<fpage>3009</fpage>. <pub-id pub-id-type="doi">10.3390/nu12103009</pub-id><pub-id pub-id-type="pmid">33007941</pub-id></citation></ref>
<ref id="B30">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tseng</surname> <given-names>C.-H.</given-names></name> <name><surname>Wu</surname> <given-names>C.-Y.</given-names></name></person-group> (<year>2019</year>). <article-title>The gut microbiome in obesity</article-title>. <source>J. Formosan Med. Assoc</source>. <volume>118</volume>, <fpage>S3</fpage>&#x02013;<lpage>S9</lpage>. <pub-id pub-id-type="doi">10.1016/j.jfma.2018.07.009</pub-id><pub-id pub-id-type="pmid">30057153</pub-id></citation></ref>
<ref id="B31">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Veli&#x0010D;kovi&#x00107;</surname> <given-names>P.</given-names></name> <name><surname>Cucurull</surname> <given-names>G.</given-names></name> <name><surname>Casanova</surname> <given-names>A.</given-names></name> <name><surname>Romero</surname> <given-names>A.</given-names></name> <name><surname>Lio</surname> <given-names>P.</given-names></name> <name><surname>Bengio</surname> <given-names>Y.</given-names></name></person-group> (<year>2017</year>). <article-title>Graph attention networks</article-title>. <source>arXiv</source> [Preprint]. arXiv:1710.10903. <pub-id pub-id-type="doi">10.48550/arXiv.1710.10903</pub-id></citation>
</ref>
<ref id="B32">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>F.</given-names></name> <name><surname>Huang</surname> <given-names>Z.-A.</given-names></name> <name><surname>Chen</surname> <given-names>X.</given-names></name> <name><surname>Zhu</surname> <given-names>Z.</given-names></name> <name><surname>Wen</surname> <given-names>Z.</given-names></name> <name><surname>Zhao</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>LRLSHMDA: Laplacian regularized least squares for human microbe-disease association prediction</article-title>. <source>Sci. Rep</source>. <volume>7</volume>:<fpage>7601</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-017-08127-2</pub-id><pub-id pub-id-type="pmid">28790448</pub-id></citation></ref>
<ref id="B33">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>F.</given-names></name> <name><surname>Yang</surname> <given-names>H.</given-names></name> <name><surname>Wu</surname> <given-names>Y.</given-names></name> <name><surname>Peng</surname> <given-names>L.</given-names></name> <name><surname>Li</surname> <given-names>X.</given-names></name></person-group> (<year>2023</year>). <article-title>Saelgmda: identifying human microbe-disease associations based on sparse autoencoder and lightgbm</article-title>. <source>Front. Microbiol</source>. <volume>14</volume>:<fpage>1207209</fpage>. <pub-id pub-id-type="doi">10.3389/fmicb.2023.1207209</pub-id><pub-id pub-id-type="pmid">37415823</pub-id></citation></ref>
<ref id="B34">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>X.</given-names></name> <name><surname>He</surname> <given-names>X.</given-names></name> <name><surname>Cao</surname> <given-names>Y.</given-names></name> <name><surname>Liu</surname> <given-names>M.</given-names></name> <name><surname>Chua</surname> <given-names>T.-S.</given-names></name></person-group> (<year>2019</year>). <article-title>&#x0201C;KGAT: knowledge graph attention network for recommendation,&#x0201D;</article-title> in <source>Proceedings of the 25th ACM SIGKDD international conference on knowledge discovery</source> &#x00026; <italic>data mining</italic> (New York, NY: ACM), <fpage>950</fpage>&#x02013;<lpage>958</lpage>. <pub-id pub-id-type="doi">10.1145/3292500.3330989</pub-id><pub-id pub-id-type="pmid">37257707</pub-id></citation></ref>
<ref id="B35">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Xu</surname> <given-names>Z.</given-names></name> <name><surname>Jiang</surname> <given-names>W.</given-names></name> <name><surname>Huang</surname> <given-names>W.</given-names></name> <name><surname>Lin</surname> <given-names>Y.</given-names></name> <name><surname>Chan</surname> <given-names>F. K.</given-names></name> <name><surname>Ng</surname> <given-names>S. C.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Gut microbiota in patients with obesity and metabolic disorders&#x02013;a systematic review</article-title>. <source>Genes Nutr</source>. <volume>17</volume>:<fpage>2</fpage>. <pub-id pub-id-type="doi">10.1186/s12263-021-00703-6</pub-id><pub-id pub-id-type="pmid">35093025</pub-id></citation></ref>
<ref id="B36">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yan</surname> <given-names>C.</given-names></name> <name><surname>Duan</surname> <given-names>G.</given-names></name> <name><surname>Wu</surname> <given-names>F.-X.</given-names></name> <name><surname>Pan</surname> <given-names>Y.</given-names></name> <name><surname>Wang</surname> <given-names>J.</given-names></name></person-group> (<year>2019</year>). <article-title>Brwmda: predicting microbe-disease associations based on similarities and bi-random walk on disease and microbe networks</article-title>. <source>IEEE/ACM Trans. Comput. Biol. Bioinform</source>. <volume>17</volume>, <fpage>1595</fpage>&#x02013;<lpage>1604</lpage>. <pub-id pub-id-type="doi">10.1109/TCBB.2019.2907626</pub-id><pub-id pub-id-type="pmid">30932846</pub-id></citation></ref>
<ref id="B37">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhu</surname> <given-names>Y.</given-names></name> <name><surname>Xu</surname> <given-names>Y.</given-names></name> <name><surname>Yu</surname> <given-names>F.</given-names></name> <name><surname>Liu</surname> <given-names>Q.</given-names></name> <name><surname>Wu</surname> <given-names>S.</given-names></name> <name><surname>Wang</surname> <given-names>L.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Deep graph contrastive representation learning</article-title>. <source>arXiv</source> [Preprint]. arXiv:2006.04131. <pub-id pub-id-type="doi">10.48550/arXiv.2006.04131</pub-id></citation>
</ref>
<ref id="B38">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zou</surname> <given-names>S.</given-names></name> <name><surname>Zhang</surname> <given-names>J.</given-names></name> <name><surname>Zhang</surname> <given-names>Z.</given-names></name></person-group> (<year>2017</year>). <article-title>A novel approach for predicting microbe-disease associations by bi-random walk on the heterogeneous network</article-title>. <source>PLoS ONE</source> <volume>12</volume>:<fpage>e0184394</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0184394</pub-id><pub-id pub-id-type="pmid">28880967</pub-id></citation></ref>
</ref-list>
</back>
</article>