<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Pharmacol.</journal-id>
<journal-title>Frontiers in Pharmacology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Pharmacol.</abbrev-journal-title>
<issn pub-type="epub">1663-9812</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1205144</article-id>
<article-id pub-id-type="doi">10.3389/fphar.2023.1205144</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Pharmacology</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Drug&#x2013;disease association prediction with literature based multi-feature fusion</article-title>
<alt-title alt-title-type="left-running-head">Kang et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fphar.2023.1205144">10.3389/fphar.2023.1205144</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Kang</surname>
<given-names>Hongyu</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="fn" rid="fn1">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2276200/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Hou</surname>
<given-names>Li</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="fn" rid="fn1">
<sup>&#x2020;</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Gu</surname>
<given-names>Yaowen</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2169561/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Lu</surname>
<given-names>Xiao</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1987891/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Li</surname>
<given-names>Jiao</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Li</surname>
<given-names>Qin</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2004811/overview"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Department of Biomedical Engineering</institution>, <institution>School of Life Science</institution>, <institution>Beijing Institute of Technology</institution>, <addr-line>Beijing</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Institute of Medical Information</institution>, <institution>Chinese Academy of Medical Sciences and Peking Union Medical College</institution>, <addr-line>Beijing</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/685112/overview">Sajjad Gharaghani</ext-link>, University of Tehran, Iran</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1671399/overview">Yuchen Zhang</ext-link>, Northwest A&#x26;F University, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/311715/overview">Dong Li</ext-link>, Beijing Proteome Research Center, China</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Qin Li, <email>liqin@bit.edu.cn</email>; Jiao Li, <email>li.jiao@imicams.ac.cn</email>
</corresp>
<fn fn-type="equal" id="fn1">
<label>
<sup>&#x2020;</sup>
</label>
<p>These authors have contributed equally to this work and share first authorship</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>22</day>
<month>05</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2023</year>
</pub-date>
<volume>14</volume>
<elocation-id>1205144</elocation-id>
<history>
<date date-type="received">
<day>13</day>
<month>04</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>09</day>
<month>05</month>
<year>2023</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2023 Kang, Hou, Gu, Lu, Li and Li.</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Kang, Hou, Gu, Lu, Li and Li</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>
<bold>Introduction:</bold> Exploring the potential efficacy of a drug is a valid approach for drug development with shorter development times and lower costs. Recently, several computational drug repositioning methods have been introduced to learn multi-features for potential association prediction. However, fully leveraging the vast amount of information in the scientific literature to enhance drug-disease association prediction is a great challenge.</p>
<p>
<bold>Methods:</bold> We constructed a drug-disease association prediction method called Literature Based Multi-Feature Fusion (LBMFF), which effectively integrated known drugs, diseases, side effects and target associations from public databases as well as literature semantic features. Specifically, a pre-training and fine-tuning BERT model was introduced to extract literature semantic information for similarity assessment. Then, we revealed drug and disease embeddings from the constructed fusion similarity matrix by a graph convolutional network with an attention mechanism.</p>
<p>
<bold>Results:</bold> LBMFF achieved superior performance in drug-disease association prediction with an AUC value of 0.8818 and an AUPR value of 0.5916.</p>
<p>
<bold>Discussion:</bold> LBMFF achieved relative improvements of 31.67% and 16.09%, respectively, over the second-best results, compared to single feature methods and seven existing state-of-the-art prediction methods on the same test datasets. Meanwhile, case studies have verified that LBMFF can discover new associations to accelerate drug development. The proposed benchmark dataset and source code are available at: <ext-link ext-link-type="uri" xlink:href="https://github.com/kang-hongyu/LBMFF">https://github.com/kang-hongyu/LBMFF</ext-link>.</p>
</abstract>
<kwd-group>
<kwd>drug repositioning</kwd>
<kwd>drug</kwd>
<kwd>disease</kwd>
<kwd>association prediction</kwd>
<kwd>literature</kwd>
<kwd>multi-feature fusion</kwd>
</kwd-group>
<contract-sponsor id="cn001">National Social Science Fund of China<named-content content-type="fundref-id">10.13039/501100012456</named-content>
</contract-sponsor>
<contract-sponsor id="cn002">National Key Research and Development Program of China<named-content content-type="fundref-id">10.13039/501100012166</named-content>
</contract-sponsor>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Experimental Pharmacology and Drug Discovery</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>In recent decades, drug discovery techniques and biological systems have been intensively studied by multidisciplinary researchers. However, drug development remains a time-consuming, costly and labor-intensive process. It normally requires more than one to two billion dollars and an average of about 10&#x2013;15 years to discover a new drug (<xref ref-type="bibr" rid="B4">Berdigaliyev and Aljofan, 2020</xref>). Approximately 90% of experimental candidates fail to pass the clinical trials (<xref ref-type="bibr" rid="B39">Mullard, 2022</xref>; <xref ref-type="bibr" rid="B52">Sun et al., 2022</xref>), owing to the unpredictable adverse reactions from new molecular structures. Drug repositioning (commonly known as &#x201c;reuse of old drugs&#x201d;) is a strategy for identifying new uses for approved or investigational drugs that are outside the scope of the original medical indications (<xref ref-type="bibr" rid="B46">Pushpakom et al., 2019</xref>; <xref ref-type="bibr" rid="B47">Schcolnik-Cabrera and Ju&#xe1;rez-L&#xf3;pez, 2021</xref>). It can facilitate the drug development process, shorten the required time to 6.5 years and reduce the cost to 300 million dollars (<xref ref-type="bibr" rid="B42">Nosengo, 2016</xref>; <xref ref-type="bibr" rid="B5">Breckenridge and Jacob, 2019</xref>). From a drug safety perspective, repositioning drug candidates that have already passed early-stage clinical trials can sufficiently reduce the risk of failure.</p>
<p>In recent years, computational drug repositioning methods (<xref ref-type="bibr" rid="B62">Wu et al., 2013</xref>; <xref ref-type="bibr" rid="B11">Chan et al., 2019</xref>; <xref ref-type="bibr" rid="B13">Deng et al., 2022</xref>) have attracted continuous attention with explosive growth of large-scale genomic and phenotypic data. A variety of studies have confirmed the availability and desirable performances of computational drug repositioning (<xref ref-type="bibr" rid="B19">Giuliani et al., 2018</xref>; <xref ref-type="bibr" rid="B45">Piplani et al., 2021</xref>; <xref ref-type="bibr" rid="B17">Firoozbakht et al., 2022</xref>; <xref ref-type="bibr" rid="B24">Huang et al., 2022</xref>). Previous typical computational approaches include, but are not limited to, the following three: complex network methods, machine learning methods, and deep learning methods. In addition, knowledge organization methods have recently been gradually applied to the study of drug-disease relationship prediction.</p>
<p>Complex network methods refer to linking drugs to diseases through heterogeneous networks construction (<xref ref-type="bibr" rid="B23">Holzinger and Ritchie, 2012</xref>) with high-throughput omics data calculation (e.g., similarity calculation (<xref ref-type="bibr" rid="B35">Meng et al., 2021</xref>)). Network-based algorithms (e.g., random walk) have been demonstrated effective in drug-disease association prediction based on the topological characteristics in these heterogeneous networks. <xref ref-type="bibr" rid="B62">Wu et al. (2013)</xref> considered not only gene features, but also pathway, phenotype, biological process and other features in <xref ref-type="bibr" rid="B28">KEGG (2023)</xref> database to build a weighted disease-drug heterogeneous network, and predicted all possible drug-disease pairs through a clustering algorithm; <xref ref-type="bibr" rid="B8">Cami et al. (2013)</xref> constructed Predictive Pharmacointeraction Networks (PPINs) together with intrinsic and taxonomic properties of drugs and adverse events for drug-disease association prediction. Not limited to binary networks, <xref ref-type="bibr" rid="B57">Wang et al. (2014)</xref> calculated similarities through an iterative algorithm based on a three-layer heterogeneous graph of drugs, diseases and targets called TL-HGBI. <xref ref-type="bibr" rid="B33">Luo et al. (2016)</xref> first integrated comprehensive similarities of drugs and diseases and then identified potential indications of drugs with a double random walk method (MBIRW). In the follow-up study (<xref ref-type="bibr" rid="B32">Luo et al., 2019</xref>), they added phenotypes and genes into an upgraded drug repositioning recommendation system (DRRS) to predict novel drug indications with improved accuracy.</p>
<p>Machine learning methods have been established techniques in drug repositioning in recent years, which can be divided into two steps: first extracting biological features of drugs and diseases and then predicting novel drug-disease associations. <xref ref-type="bibr" rid="B20">Gottlieb et al. (2011)</xref> integrated multiple drug and disease similarity measurements and sorted predicted drug-disease pairs by logistic regression algorithm, which can be applied to large-scale data. Support vector machine (<xref ref-type="bibr" rid="B58">Wang et al., 2013</xref>) and random forest (<xref ref-type="bibr" rid="B29">Kim et al., 2019</xref>) are also considered brilliant methods for drug-disease association predictions and achieved good performance in early studies. <xref ref-type="bibr" rid="B40">Napolitano et al. (2013)</xref> reported a joint kernel based on drug-related data, such as gene expression, chemical structure and target information, in support vector machine classification to predict drug repositioning. Machine learning approaches are effective in integrating prior information. However, its biological interpretability is limited (<xref ref-type="bibr" rid="B48">Shah et al., 2021</xref>) and the performance is constrained by the sparsity of biological interactions. Also, due to the complexity of matrix operations, processing large-scale data is highly challenging.</p>
<p>The remarkable rise of deep learning has led to an overwhelming amount of new research. Long Short-Term Memory (<xref ref-type="bibr" rid="B34">Lyu et al., 2017</xref>), Bidirectional Encoder Representation from Transformers (<xref ref-type="bibr" rid="B30">Lee et al., 2020</xref>) and Graph Neural Network have provided significant improvements in biomedical information retrieval (<xref ref-type="bibr" rid="B51">Sun et al., 2021</xref>), question and answer systems (<xref ref-type="bibr" rid="B59">Wen et al., 2020</xref>) and image recognition (<xref ref-type="bibr" rid="B55">Vellal et al., 2021</xref>). In addition, several studies have described the use of these techniques for drug discovery. <xref ref-type="bibr" rid="B68">Zitnik et al. (2018)</xref> presented a graph convolution neural network to handle multimodal graphs with a large number of edge types including drug, protein, target and side effect. <xref ref-type="bibr" rid="B16">Fatehifar amd Karshenas (2012)</xref> proposed a BI-LSTM model and <xref ref-type="bibr" rid="B43">Pang et al. (2022)</xref> proposed a novel attention-mechanism-based multidimensional feature encoder to extract the drug-drug interaction, which performed better than some state-of-the-art methods. <xref ref-type="bibr" rid="B31">Li et al. (2020)</xref> acquired potential feature representations from miRNA and disease similarity network with graph convolutional network and developed a Neural Inductive Matrix Completion method for miRNA-disease association prediction. Graph Neural Network (GNN) (2023) performs particularly well in handling comprehensive information and heterogeneous semantically-rich graphs. The existing GNN processing methods (<xref ref-type="bibr" rid="B63">Wu et al., 2021</xref>) contain Graph Convolutional Network, Graph Sample and Aggregate, Graph Attention Network, <italic>etc.</italic> With the rapid accumulation of biological network data, GNN has become an effective tool in bioinformatics tasks (<xref ref-type="bibr" rid="B67">Zhang et al., 2021</xref>). Taking drug development as an example, it has been proven a practical way of achieving greater efficiency in drug attribute prediction (<xref ref-type="bibr" rid="B21">Gu et al., 2021</xref>), drug side effect prediction (<xref ref-type="bibr" rid="B68">Zitnik et al., 2018</xref>), relationship extraction (<xref ref-type="bibr" rid="B1">Al-Sabri et al., 2022</xref>), <italic>etc.</italic>
</p>
<p>Ontology (<xref ref-type="bibr" rid="B2">Bandrowski et al., 2016</xref>) and knowledge graph (<xref ref-type="bibr" rid="B41">Nicholson and Greene, 2020</xref>) can provide structured, computable organization and management of large amounts of data. Several biomedical ontologies have been proven useful in biomedical text mining studies (<xref ref-type="bibr" rid="B49">Shen and Lee, 2016</xref>; <xref ref-type="bibr" rid="B26">Kafkas and Hoehndorf, 2019</xref>), including <xref ref-type="bibr" rid="B14">Disease Ontology (2023)</xref>, Human Phenotype Ontology (HPO) (2023), UMLS, <italic>etc.</italic> Different ontologies can also be constructed based on their research objectives. <xref ref-type="bibr" rid="B6">Brown and Patel (2017)</xref> mined drug-drug links by mapping drug terminology to standardized terms from MeSH. <xref ref-type="bibr" rid="B27">Karim et al. (2019)</xref> extracted attribute and relationship embedding from a drug-adverse reaction knowledge graph they developed to infer drug-drug interactions according to biomedical databases and literature. <xref ref-type="bibr" rid="B38">Moon et al. (2021)</xref> constructed a knowledge graph to learn drug-disease-target embedding to inform drug repurposing hypotheses.</p>
<p>In addition to the drug-disease associations proved by clinical practice, physicians and researchers have conducted further studies and explorations into new drug combinations and drug indications. They detailed the entire process and reported it in a timely manner in the form of scientific literature. Compared with public databases such as <xref ref-type="bibr" rid="B15">Drugbank (2023)</xref>, <xref ref-type="bibr" rid="B44">PharmGKB (2023)</xref>, and <xref ref-type="bibr" rid="B37">MeSH (2023)</xref>, <italic>etc.</italic>, biomedical literature contains not only a massive number of biomedical entities (<xref ref-type="bibr" rid="B12">Chen et al., 2020</xref>), such as drugs, indications, side effects, and targets, but also associations have been discovered recently. Considering the vast amount of semantic information contained in scientific literature, current approaches need to improve the integration ability of validated relational features in public databases presented as structured data along with the newly discovered relational features and semantic features s in biomedical scientific literature.</p>
<p>To overcome the mentioned limitation, we proposed a novel drug-disease association prediction method called Literature Based Multi-Feature Fusion (LBMFF). LBMFF not only integrated multiple heterogeneous biological interactions (drug, disease, side effect and target), but also extracted semantic embeddings and contextual information from large-scale of scientific literature. Specifically, we constructed drug-drug similarities and disease-disease similarities based on multi-feature and associations from public databases and PubMed literature. Then, a GCN with an attention mechanism was employed to capture structural information from a comprehensive similarity matrix and known drug-disease associations. LBMFF achieved optimal results compared to single-feature methods, which demonstrated the significance of literature information and feature fusion. It also showed superior performance in drug-disease association prediction compared to 7 state-of-the-art methods.</p>
</sec>
<sec sec-type="materials|methods" id="s2">
<title>2 Materials and methods</title>
<sec id="s2-1">
<title>2.1 Dataset</title>
<p>In our study, the benchmark dataset downloaded from <xref ref-type="bibr" rid="B66">Zhang et al. (2018)</xref>, contains 269 drugs, 598 diseases and 18,416 drug-disease associations originated from Comparative Toxicology Database (CTD). What&#x2019;s more, we extracted drug chemical structures (represented by SMILES) and drug-target associations from Drugbank, drug-side effect associations from <xref ref-type="bibr" rid="B50">SIDER (2023)</xref> and diseases tree numbers from MeSH as multi-features for drug-drug similarities and disease-disease similarities calculation. Overall, in addition to the raw data from CTD, we extended the benchmark dataset to 269 drug SMILES sequences, 3,797 side effects and 43,508 drug-side effect associations, 266 targets and 722 drug-target associations.</p>
<p>More importantly, we searched and selected 673,665 full-text scientific literature, which titles or abstracts contained the drugs or diseases from the benchmark dataset. This vast literature serves as a corpus for the semantic similarity computation section based on a pre-training and fine-tuning BERT model.</p>
<p>Furthermore, we introduced a dataset from <xref ref-type="bibr" rid="B57">Wang et al. (2014)</xref>, named TL-HGBI, for method portability validation. It contains 963 drugs, 1,263 diseases and 54,921 drug-disease associations originating from CTD. Similarly, we also collected drug SMILES sequences, disease MeSH tree numbers, drug-side effect associations, drug-target associations and scientific literature.</p>
</sec>
<sec id="s2-2">
<title>2.2 Architecture of LBMFF</title>
<p>The LBMFF combined embeddings in drug-disease-target-side effect networks from public databases including CTD, Drugbank, SIDER and MeSH. What&#x2019;s more, semantic features from a vast amount of scientific literature were added to LBMFF as an improved approach. The workflow of LBMFF was briefly shown in <xref ref-type="fig" rid="F1">Figure 1</xref>.<list list-type="simple">
<list-item>
<p>&#x2022; Association and Semantic Feature Extraction. We integrated several measurements for drug and disease similarities computing, including drug SMILES sequences, disease MeSH tree numbers, drug-side effect associations, drug-target associations, and literature semantic information. A Pre-training and fine-tuning BERT model was introduced for semantic information recognition and understanding.</p>
</list-item>
<list-item>
<p>&#x2022; Similarity Calculation and Feature Representation. For feature fusion and similarity computing, an adjusted weight for each measurement was applied to achieve optimal performance by a step of 0.01. We then constructed a feature matrix based on the drug fusion similarity, disease fusion similarity and known drug-disease associations.</p>
</list-item>
<list-item>
<p>&#x2022; Association Prediction. We applied two GCN layers to learn the embeddings of drugs and diseases with an attention mechanism. An inner product decoder was used to discover unknown drug-disease associations.</p>
</list-item>
</list>
</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>The workflow of LBMFF.</p>
</caption>
<graphic xlink:href="fphar-14-1205144-g001.tif"/>
</fig>
</sec>
<sec id="s2-3">
<title>2.3 Feature extraction</title>
<sec id="s2-3-1">
<title>2.3.1 Drug chemical structure</title>
<p>Drugs can usually be characterized by biological or chemical descriptors, that is, molecular fingerprints. Molecular fingerprints are ways of encoding the structure of a molecule. The most common type of fingerprint is a series of binary digits (bits) that represent the presence or absence of particular substructures in the molecule (<xref ref-type="bibr" rid="B10">Cao et al., 2012</xref>). In this study, we adopt the drug SMILES sequences and generated their Morgan fingerprint to capture the molecular substructure and to calculate the chemical structure drug-drug similarities.</p>
<p>Based on this principle, we convert a drug into an n-dimensional fingerprint vector <inline-formula id="inf1">
<mml:math id="m1">
<mml:mrow>
<mml:mi mathvariant="bold-italic">X</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">n</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, where <italic>n</italic> is the number of all substructures. If there is a substructure in the drug, <inline-formula id="inf2">
<mml:math id="m2">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi mathvariant="bold-italic">n</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> will be 1, otherwise, it will be 0. In this section, we adopt the Jaccard index to calculate drug-drug similarity:<disp-formula id="e1">
<mml:math id="m3">
<mml:mrow>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mi>c</mml:mi>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac bevelled="true">
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">X</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mo>&#x2229;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">X</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">X</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mo>&#x222a;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">X</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>where <inline-formula id="inf3">
<mml:math id="m4">
<mml:mrow>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mi>c</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> is the similarity between drug <inline-formula id="inf4">
<mml:math id="m5">
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and drug <inline-formula id="inf5">
<mml:math id="m6">
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf6">
<mml:math id="m7">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">X</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is an n-dimensional vector of drug <inline-formula id="inf7">
<mml:math id="m8">
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf8">
<mml:math id="m9">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">X</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is an n-dimensional vector of drug <inline-formula id="inf9">
<mml:math id="m10">
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</sec>
<sec id="s2-3-2">
<title>2.3.2 Drug-side effect interaction</title>
<p>From the view of &#x201c;if a drug has the same side effects, it may have the same indication&#x201d;, in 2008, <xref ref-type="bibr" rid="B9">Campillos et al. (2008)</xref> proposed a method to calculate drug similarity based on drug-side effect interaction, which has been widely used in subsequent studies. A k-dimensional drug vector <inline-formula id="inf10">
<mml:math id="m11">
<mml:mrow>
<mml:mi mathvariant="bold-italic">Y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">y</mml:mi>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">y</mml:mi>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">y</mml:mi>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> can be generated based on the known drug-side effect interaction from SIDER, where k is the number of related side effects. If there is an interaction between the drug and side effects, <inline-formula id="inf11">
<mml:math id="m12">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">y</mml:mi>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> will be 1, otherwise, it will be 0. We also adopt the Jaccard index to calculate drug-drug similarity <inline-formula id="inf12">
<mml:math id="m13">
<mml:mrow>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mi>s</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> on this dimension.</p>
</sec>
<sec id="s2-3-3">
<title>2.3.3 Drug-target interaction</title>
<p>Similarly, drug-target interactions are also a valid approach for drug similarity calculations. We extract this information from Drugbank and adopt the Jaccard index to measure drug-drug similarity <inline-formula id="inf13">
<mml:math id="m14">
<mml:mrow>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mi>t</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</sec>
<sec id="s2-3-4">
<title>2.3.4 MeSH semantic attribute for disease</title>
<p>The Medical Subject Headings (MeSH) thesaurus is a controlled and hierarchically-organized vocabulary produced by the National Library of Medicine. <xref ref-type="bibr" rid="B56">Wang et al. (2010)</xref> proposed a disease semantic similarity method by using MeSH hierarchically organized information, which was regarded as a directed acyclic graph (DAG). For the disease <italic>d</italic>, we denote its DAG as <inline-formula id="inf14">
<mml:math id="m15">
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>A</mml:mi>
<mml:mi>G</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mi>&#x3b5;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula id="inf15">
<mml:math id="m16">
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is a node set including disease <italic>d</italic> and its ancestor nodes, <inline-formula id="inf16">
<mml:math id="m17">
<mml:mrow>
<mml:mi>&#x3b5;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is the set of direct links from parent nodes to child nodes in <inline-formula id="inf17">
<mml:math id="m18">
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. We define the semantic value of disease <italic>d</italic> as <inline-formula id="inf18">
<mml:math id="m19">
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, where semantic contribution decay factor <inline-formula id="inf19">
<mml:math id="m20">
<mml:mrow>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> can be formulated as:<disp-formula id="e2">
<mml:math id="m21">
<mml:mrow>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="{" close="" separators="|">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mtext>&#x2003;</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mi>f</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>n</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi mathvariant="italic">max</mml:mi>
<mml:mrow>
<mml:mfenced open="{" close="}" separators="|">
<mml:mrow>
<mml:mn>0.5</mml:mn>
<mml:mo>&#x2a;</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msup>
<mml:mi>n</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x7c;</mml:mo>
<mml:msup>
<mml:mi>n</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>c</mml:mi>
<mml:mi>h</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>d</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>n</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>o</mml:mi>
<mml:mi>f</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mtext>&#x2002;</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mi>f</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>n</mml:mi>
<mml:mo>&#x2260;</mml:mo>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>
</p>
<p>Based on this definition, disease semantic similarity can be presented as follow:<disp-formula id="e3">
<mml:math id="m22">
<mml:mrow>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mi>d</mml:mi>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2229;</mml:mo>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>D</mml:mi>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>where <inline-formula id="inf20">
<mml:math id="m23">
<mml:mrow>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mi>d</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> is the similarity between disease <inline-formula id="inf21">
<mml:math id="m24">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and disease <inline-formula id="inf22">
<mml:math id="m25">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. Intuitively, two diseases with more ancestor nodes tend to have a higher semantic similarity.</p>
</sec>
<sec id="s2-3-5">
<title>2.3.5 Literature semantic similarity based on BERT</title>
<p>
<xref ref-type="bibr" rid="B60">BERT (2023)</xref>, which represents Bidirectional Encoder Representations from Transformers, is based on a multi-layer bidirectional Transformer model in which every output element is connected to every input element, and the weightings between them are dynamically calculated based upon their connection. The transformer mechanism gives BERT its increased capacity to understand context and ambiguity in language.</p>
<p>The model architecture was shown in <xref ref-type="fig" rid="F2">Figure 2</xref>.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Pre-training and fine-tuning BERT.</p>
</caption>
<graphic xlink:href="fphar-14-1205144-g002.tif"/>
</fig>
<p>The BERT model is firstly pre-trained on the dataset containing 673,665 full-text scientific literature downloaded from PubMed, which includes drugs and diseases mentioned in the benchmark dataset. We implement parameters during the pre-training step, with training epochs of 10,000, vector dimension of 128, learning rate of 0.01and dropout of 0.1.</p>
<p>After pre-training the BERT model, we used fine-tuning to train a binary classification model via five-fold cross validation. Fine-tuning is a method of making small adjustments to a pre-trained model for a specific task. In the binary classification task, our objective was to minimize the cross-entropy loss function, which can be represented as following:<disp-formula id="e4">
<mml:math id="m26">
<mml:mrow>
<mml:mi mathvariant="normal">L</mml:mi>
<mml:mi mathvariant="normal">o</mml:mi>
<mml:mi mathvariant="normal">s</mml:mi>
<mml:mi mathvariant="normal">s</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mi mathvariant="normal">y</mml:mi>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>log</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi mathvariant="normal">y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi>log</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>where <inline-formula id="inf23">
<mml:math id="m27">
<mml:mrow>
<mml:mi mathvariant="normal">y</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the label (in our case, either 0 or 1), and <inline-formula id="inf24">
<mml:math id="m28">
<mml:mrow>
<mml:mi mathvariant="normal">p</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the predicted probability of the label being 1. The cross-entropy loss function is used to measure the difference between the predicted probability distribution and the true label, allowing us to optimize the model&#x2019;s parameters to minimize error. During the fine-tuning process, we used the training dataset to adjust the BERT model&#x2019;s parameters, improving the model&#x2019;s classification accuracy. Using this approach, we were able to apply the BERT model to predict associations between drugs and diseases and calculate similarities between drug-drug and disease-disease pairs. In this binary classification task, when a drug is associated with a disease, it is 1. Otherwise, it is 0.</p>
<p>The training procedure uses only drug-disease associations from the training set and no associations from the test set. Specifically, we use this fine-tuning process to improve the accuracy of semantic comprehension and ability of similarity calculation for drugs and diseases. When we feed drug-drug or disease-disease pairs into the BERT-model, it can compute the corresponding drug-drug similarities and disease-disease similarities (semantically similar to the binary classification task of associations) based on the previous mentioned fine-tuning BETT-model.</p>
</sec>
</sec>
<sec id="s2-4">
<title>2.4 Similarity matrix fusion</title>
<p>Based on the above four characteristics (chemical structure, drug-side effect, drug-target, literature-based semantic representation) of drugs, drug similarity <inline-formula id="inf25">
<mml:math id="m29">
<mml:mrow>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mi>r</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> between drug <inline-formula id="inf26">
<mml:math id="m30">
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and drug <inline-formula id="inf27">
<mml:math id="m31">
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> based on multi-feature fusion can be expressed as:<disp-formula id="e5">
<mml:math id="m32">
<mml:mrow>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mi>r</mml:mi>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3b1;</mml:mi>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mi>c</mml:mi>
</mml:msubsup>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3b2;</mml:mi>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mi>s</mml:mi>
</mml:msubsup>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3b3;</mml:mi>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mi>t</mml:mi>
</mml:msubsup>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>&#x3b4;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
<label>(5)</label>
</disp-formula>where <inline-formula id="inf28">
<mml:math id="m33">
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3b2;</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3b3;</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>&#x3b4;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>. The optimization step for the combination is 0.01.</p>
<p>Similarly, disease similarity <inline-formula id="inf29">
<mml:math id="m34">
<mml:mrow>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mi>d</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> between disease <inline-formula id="inf30">
<mml:math id="m35">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and disease <inline-formula id="inf31">
<mml:math id="m36">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> based on MeSH and literature semantic features can be expressed as:<disp-formula id="e6">
<mml:math id="m37">
<mml:mrow>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mi>d</mml:mi>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mi>d</mml:mi>
</mml:msubsup>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>&#x3b4;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
<label>(6)</label>
</disp-formula>where <inline-formula id="inf32">
<mml:math id="m38">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>&#x3b4;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</sec>
<sec id="s2-5">
<title>2.5 Feature representation</title>
<p>In this paper, we construct the association feature representation between drug and diseases based on drug comprehensive similarity matrix, disease comprehensive similarity matrix and known drug-disease associations. Binary matrix <inline-formula id="inf33">
<mml:math id="m39">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mi>&#x3f5;</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mfenced open="{" close="}" separators="|">
<mml:mrow>
<mml:mn>0,1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#x2a;</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> represents drug-disease associations, where <italic>m</italic> is the number of drugs and <italic>n</italic> is the number of diseases. When drug <inline-formula id="inf34">
<mml:math id="m40">
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is associated with disease <inline-formula id="inf35">
<mml:math id="m41">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf36">
<mml:math id="m42">
<mml:mrow>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>; otherwise, <inline-formula id="inf37">
<mml:math id="m43">
<mml:mrow>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>The adjacency matrix of drug-disease association features can be expressed as:<disp-formula id="e7">
<mml:math id="m44">
<mml:mrow>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>H</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mo>&#x223c;</mml:mo>
<mml:msup>
<mml:mi>S</mml:mi>
<mml:mi>r</mml:mi>
</mml:msup>
</mml:mrow>
</mml:mtd>
<mml:mtd>
<mml:mi>A</mml:mi>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:msup>
<mml:mi>A</mml:mi>
<mml:mi>T</mml:mi>
</mml:msup>
</mml:mtd>
<mml:mtd>
<mml:mrow>
<mml:mo>&#x223c;</mml:mo>
<mml:msup>
<mml:mi>S</mml:mi>
<mml:mi>d</mml:mi>
</mml:msup>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(7)</label>
</disp-formula>where <inline-formula id="inf38">
<mml:math id="m45">
<mml:mrow>
<mml:mo>&#x223c;</mml:mo>
<mml:msup>
<mml:mi>S</mml:mi>
<mml:mi>r</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf39">
<mml:math id="m46">
<mml:mrow>
<mml:mo>&#x223c;</mml:mo>
<mml:msup>
<mml:mi>S</mml:mi>
<mml:mi>d</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> are the normalization matrix of comprehensive similarity matrix <inline-formula id="inf40">
<mml:math id="m47">
<mml:mrow>
<mml:msup>
<mml:mi>S</mml:mi>
<mml:mi>r</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf41">
<mml:math id="m48">
<mml:mrow>
<mml:msup>
<mml:mi>S</mml:mi>
<mml:mi>d</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> through Laplace transformation.<disp-formula id="e8">
<mml:math id="m49">
<mml:mrow>
<mml:mo>&#x223c;</mml:mo>
<mml:msup>
<mml:mi>S</mml:mi>
<mml:mi>r</mml:mi>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mi>D</mml:mi>
<mml:mi>r</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:msubsup>
<mml:msup>
<mml:mi>S</mml:mi>
<mml:mi>r</mml:mi>
</mml:msup>
<mml:msubsup>
<mml:mi>D</mml:mi>
<mml:mi>r</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
<label>(8)</label>
</disp-formula>
<disp-formula id="e9">
<mml:math id="m50">
<mml:mrow>
<mml:mo>&#x223c;</mml:mo>
<mml:msup>
<mml:mi>S</mml:mi>
<mml:mi>d</mml:mi>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mi>D</mml:mi>
<mml:mi>d</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:msubsup>
<mml:msup>
<mml:mi>S</mml:mi>
<mml:mi>d</mml:mi>
</mml:msup>
<mml:msubsup>
<mml:mi>D</mml:mi>
<mml:mi>d</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
<label>(9)</label>
</disp-formula>where <inline-formula id="inf42">
<mml:math id="m51">
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>d</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>g</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mo>&#x2211;</mml:mo>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is the degree matrix.</p>
</sec>
<sec id="s2-6">
<title>2.6 Associated prediction based on GCN</title>
<p>GCN is a multilayer connected neural network architecture used to learn low-dimensional representations of nodes from graph-structured data (<xref ref-type="bibr" rid="B54">Thomas and Kipf, 2017</xref>).</p>
<sec id="s2-6-1">
<title>2.6.1 Encoder</title>
<p>The adjacency matrix <inline-formula id="inf43">
<mml:math id="m52">
<mml:mrow>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>H</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> mentioned above is introduced into the GCN encoder to extract drug and disease embeddings respectively. We initialize the embeddings of drugs and diseases as <inline-formula id="inf44">
<mml:math id="m53">
<mml:mrow>
<mml:msup>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mn>0</mml:mn>
</mml:mtd>
<mml:mtd>
<mml:mi>A</mml:mi>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:msup>
<mml:mi>A</mml:mi>
<mml:mi>T</mml:mi>
</mml:msup>
</mml:mtd>
<mml:mtd>
<mml:mn>0</mml:mn>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, and set the associations in the test dataset to 0. Subsequently, the GCN layer is denoted as:<disp-formula id="e10">
<mml:math id="m54">
<mml:mrow>
<mml:msup>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msup>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:mi>A</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>R</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>L</mml:mi>
<mml:mi>U</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msup>
<mml:mover accent="true">
<mml:mi>D</mml:mi>
<mml:mo>&#x223c;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:msup>
<mml:mi>A</mml:mi>
<mml:msup>
<mml:mover accent="true">
<mml:mi>D</mml:mi>
<mml:mo>&#x223c;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:msup>
<mml:msup>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(10)</label>
</disp-formula>where <inline-formula id="inf45">
<mml:math id="m55">
<mml:mrow>
<mml:msup>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is the embedding at the lth-layer, <inline-formula id="inf46">
<mml:math id="m56">
<mml:mrow>
<mml:mover accent="true">
<mml:mi>D</mml:mi>
<mml:mo>&#x223c;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula> is the degree matrix of <inline-formula id="inf47">
<mml:math id="m57">
<mml:mrow>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>H</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf48">
<mml:math id="m58">
<mml:mrow>
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is a trainable weight matrix of the lth-layer. At the same time, we use ReLU as an activation function.</p>
</sec>
<sec id="s2-6-2">
<title>2.6.2 Decoder</title>
<p>Furthermore, we introduce a layer attention mechanism (<xref ref-type="bibr" rid="B54">Thomas and Kipf, 2017</xref>) in LBMFF to fully utilize the drug and disease embedding and adaptively adjust their importance weights dynamically of different GCN layers.</p>
<p>The final embedding of drugs and diseases is denoted as follows:<disp-formula id="e11">
<mml:math id="m59">
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mi>r</mml:mi>
</mml:msub>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mo>&#x2211;</mml:mo>
<mml:mi>l</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
</mml:msub>
<mml:msup>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
<label>(11)</label>
</disp-formula>where <inline-formula id="inf49">
<mml:math id="m60">
<mml:mrow>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mi>r</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#x2a;</mml:mo>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is the final embedding of drugs, <inline-formula id="inf50">
<mml:math id="m61">
<mml:mrow>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#x2a;</mml:mo>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is the final embedding of drugs. <inline-formula id="inf51">
<mml:math id="m62">
<mml:mrow>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is a weight initialized as <inline-formula id="inf52">
<mml:math id="m63">
<mml:mrow>
<mml:mfrac bevelled="true">
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</inline-formula> and auto-learned by neural networks thereafter.</p>
<p>To reconstruct the adjacency matrix for drug-disease associations, we introduce sigmoid as the activation function into the GCN decoder, and the predicted association matrix can be expressed as:<disp-formula id="e12">
<mml:math id="m64">
<mml:mrow>
<mml:msup>
<mml:mi>A</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>g</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>d</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mi>r</mml:mi>
</mml:msub>
<mml:mo>&#x2219;</mml:mo>
<mml:mi>W</mml:mi>
<mml:mo>&#x2219;</mml:mo>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(12)</label>
</disp-formula>where <inline-formula id="inf53">
<mml:math id="m65">
<mml:mrow>
<mml:msubsup>
<mml:mi>A</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> is the associated prediction score of drug <inline-formula id="inf54">
<mml:math id="m66">
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and disease <inline-formula id="inf55">
<mml:math id="m67">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf56">
<mml:math id="m68">
<mml:mrow>
<mml:mi>W</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mo>&#x2a;</mml:mo>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is the trainable parameter matrix.</p>
<p>Due to the fast training speed of the GCN model, it can be retrained when new drug/disease nodes were added.</p>
</sec>
</sec>
</sec>
<sec id="s3">
<title>3 Result and discussion</title>
<sec id="s3-1">
<title>3.1 Experimental setup and performance evaluation</title>
<p>We constructed five-fold cross validation to evaluate the performance of LBMFF in our study. All known drug-disease associations were randomly divided into five mutually exclusive subsets of the same size, that is, four subsets were selected as the training set each time, while the remaining one was used as the test set. Each round of training started from the initial state and the association prediction was performed on the test set after training. At last, we adopted the average of the five training performances as the final results.</p>
<p>Area Under Curve (AUC) and Area Under Precision/Recall Curve (AUPR) were used as the primary metrics to evaluate the prediction performances. In addition, we also take several binary classification metrics into consideration, including accuracy (Acc), recall (Rec), specificity (Spe), precision (Pre) and F1-score (F1).</p>
</sec>
<sec id="s3-2">
<title>3.2 Performances of literature based multi-feature fusion (LBMFF)</title>
<p>In this study, we considered multi-features of drugs and diseases, which were chemical structure, drug-side effect association, drug-target association, disease similarity from MeSH, and especially semantic similarity supported by a large scale of literature. The weights of each feature were optimized at a step of 0.01 during the process of feature fusion. The optimal prediction results appeared with the fusion coefficients of <inline-formula id="inf57">
<mml:math id="m69">
<mml:mrow>
<mml:mi mathvariant="normal">&#x3b1;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.08</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="normal">&#x3b2;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.16</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="normal">&#x3b3;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0,16</mml:mn>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi mathvariant="normal">&#x3b4;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.60</mml:mn>
<mml:mo>;</mml:mo>
<mml:mi mathvariant="normal">&#x3b8;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.40</mml:mn>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi mathvariant="normal">&#x3b4;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.60</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>. Our predictive model achieved AUC and AUPR of 0.8743 and 0.5694 in these cases.</p>
</sec>
<sec id="s3-3">
<title>3.3 Ablation study</title>
<p>To demonstrate the significance of the vast amount of literature texting mining for association prediction, we did ablation studies that compared LBMFF with single-feature methods and Multi-Feature Fusion (MFF) method in <xref ref-type="table" rid="T1">Table 1</xref>.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Algorithm performance Comparison between multi-attribute fusion and single attribute.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th rowspan="3" align="center">Methods</th>
<th colspan="5" align="center">Features</th>
<th rowspan="3" align="center">AUPR</th>
<th rowspan="3" align="center">AUC</th>
<th rowspan="3" align="center">F1</th>
<th rowspan="3" align="center">Acc</th>
<th rowspan="3" align="center">Rec</th>
<th rowspan="3" align="center">Spe</th>
<th rowspan="3" align="center">Pre</th>
</tr>
<tr>
<th rowspan="2" align="center">Chemical structure</th>
<th rowspan="2" align="center">Target</th>
<th rowspan="2" align="center">Side effect</th>
<th align="center">Literature Semantic</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">1</td>
<td align="center">&#x221a;</td>
<td align="left"/>
<td align="left"/>
<td colspan="2" align="left"/>
<td align="center">0.5799 &#xb1; 0.001</td>
<td align="center">0.8711 &#xb1; 0.003</td>
<td align="center">0.5541 &#xb1; 0.017</td>
<td align="center">0.8904 &#xb1; 0.000</td>
<td align="center">0.5907 &#xb1; 0.002</td>
<td align="center">
<bold>0.9294 &#xb1; 0.008</bold>
</td>
<td align="center">0.5220 &#xb1; 0.000</td>
</tr>
<tr>
<td align="center">2</td>
<td align="left"/>
<td align="center">&#x221a;</td>
<td align="left"/>
<td colspan="2" align="left"/>
<td align="center">0.5852 &#xb1; 0.000</td>
<td align="center">0.8707 &#xb1; 0.001</td>
<td align="center">0.5598 &#xb1; 0.002</td>
<td align="center">
<bold>0.8910 &#xb1; 0.016</bold>
</td>
<td align="center">0.6007 &#xb1; 0.000</td>
<td align="center">
<underline>0.9289 &#xb1; 0.036</underline>
</td>
<td align="center">
<bold>0.5244 &#xb1; 0.021</bold>
</td>
</tr>
<tr>
<td align="center">3</td>
<td align="left"/>
<td align="left"/>
<td align="center">&#x221a;</td>
<td colspan="2" align="left"/>
<td align="center">0.5787 &#xb1; 0.001</td>
<td align="center">0.8688 &#xb1; 0.005</td>
<td align="center">0.5534 &#xb1; 0.003</td>
<td align="center">0.8847 &#xb1; 0.002</td>
<td align="center">
<underline>0.6194 &#xb1; 0.000</underline>
</td>
<td align="center">0.9193 &#xb1; 0.020</td>
<td align="center">0.5009 &#xb1; 0.002</td>
</tr>
<tr>
<td align="center">4</td>
<td align="left"/>
<td align="left"/>
<td align="left"/>
<td colspan="2" align="center">&#x221a;</td>
<td align="center">
<underline>0.5928 &#xb1; 0.002</underline>
</td>
<td align="center">
<underline>0.8779 &#xb1; 0.000</underline>
</td>
<td align="center">
<underline>0.5644 &#xb1; 0.021</underline>
</td>
<td align="center">
<underline>0.8934 &#xb1; 0.003</underline>
</td>
<td align="center">0.5986 &#xb1; 0.013</td>
<td align="center">0.9277 &#xb1; 0.001</td>
<td align="center">
<underline>0.5242 &#xb1; 0.056</underline>
</td>
</tr>
<tr>
<td align="center">MFF</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
<td colspan="2" align="left"/>
<td align="center">0.5897 &#xb1; 0.002</td>
<td align="center">0.8769 &#xb1; 0.002</td>
<td align="center">0.5623 &#xb1; 0.030</td>
<td align="center">0.8904 &#xb1; 0.000</td>
<td align="center">0.6095 &#xb1; 0.001</td>
<td align="center">0.9270 &#xb1; 0.005</td>
<td align="center">0.5230 &#xb1; 0.0003</td>
</tr>
<tr>
<td align="center">LBMFF</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
<td align="center">&#x221a;</td>
<td colspan="2" align="center">&#x221a;</td>
<td align="center">
<bold>0.5961 &#xb1; 0.001</bold>
</td>
<td align="center">
<bold>0.8818 &#xb1; 0.003</bold>
</td>
<td align="center">
<bold>0.5655 &#xb1; 0.001</bold>
</td>
<td align="center">0.8885 &#xb1; 0.001</td>
<td align="center">
<bold>0.6287 &#xb1; 0.005</bold>
</td>
<td align="center">0.9224 &#xb1; 0.000</td>
<td align="center">0.5154 &#xb1; 0.031</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The best results are in bold faces and the second-best results are underlined.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>Firstly, it should be noted the significance of literature-based semantic feature on the prediction of unknown relations compared to other single-feature methods, with the most relative improvements of 2.4% on AUPR. Secondly, the results also indicated that MFF outperforms the single-feature methods in terms of AUPR, AUC and Acc. Thirdly, LBMFF reached the best performance: AUC &#x3d; 0.8818, which achieved relative improvements of 1.23%, 1.28%, 0.49%, 0.56% higher than single-feature methods and 0.56% higher than MFF; AUPR &#x3d; 0.5961, achieving relative improvements of 2.79%, 1.87%, 3.02%, 0.44% higher than single-feature methods and 1.09% higher than MFF. We reached the best performance in both F1 and Rec.</p>
<p>According to the aforementioned performance metrics, MFF first achieved better performance than single-feature methods due to the adjusted weights for each measurement. It demonstrated that feature fusion played an essential role in drug-disease association prediction due to the integrated information from different dimensions. LBMFF further extracted semantic information from large-scale literature, which then led to improved performance of MFF-based model.</p>
</sec>
<sec id="s3-4">
<title>3.4 Comparison with state-of-the-art methods</title>
<p>In this section, we compared LBMFF with seven state-of-the-art association prediction methods by using the same dataset, Bdataset:</p>
<p>Specifically, we listed these methods as follows.<list list-type="simple">
<list-item>
<p>&#x2022; BNNR (<xref ref-type="bibr" rid="B64">Yang et al., 2019</xref>) was a bounded nuclear norm regularization method carried out on an adjacency matrix of a heterogeneous drug-disease network.</p>
</list-item>
<list-item>
<p>&#x2022; DRHGCN (<xref ref-type="bibr" rid="B7">Cai et al., 2021</xref>) and DRWBNCF (<xref ref-type="bibr" rid="B36">Meng et al., 2022</xref>) partly used GCN-based, deep-learning methodology and weighted bilinear neural collaborative filtering based on heterogeneous information fusion for the drug repositioning approach.</p>
</list-item>
<list-item>
<p>&#x2022; LAGCN (<xref ref-type="bibr" rid="B65">Yu et al., 2022</xref>) predicted drug-disease associations through a layer attention graph convolutional network.</p>
</list-item>
<list-item>
<p>&#x2022; NIMCGCN (<xref ref-type="bibr" rid="B31">Li et al., 2020</xref>) was a novel method of neural inductive matrix completion with GCN for miRNA-disease association prediction.</p>
</list-item>
<list-item>
<p>&#x2022; DDA-SKF (<xref ref-type="bibr" rid="B18">Gao et al., 2021</xref>) constructed multiple similarity kernels for drugs and diseases, and the Laplacian regularized least squares algorithms were used to obtain the association matrix.</p>
</list-item>
<list-item>
<p>&#x2022; REDDA (<xref ref-type="bibr" rid="B22">Gu et al., 2022</xref>) proposed a general heterogeneous GCN-based node embedding block, a topological subnet embedding block, a graph attention block, and a layer attention block.</p>
</list-item>
</list>
</p>
<p>According to <xref ref-type="table" rid="T2">Table 2</xref>; <xref ref-type="fig" rid="F3">Figure 3</xref>, LBMFF achieved the best performance in terms of all the evaluation metrics. LBMFF achieved an AUC value of 0.8818, which was higher than the seven state-of-the-art methods with AUC values of 0.8561, 0.7006, 0.8529, 0.8045, 0.6684, 0.8375, and 0.8466. Meanwhile, our method significantly outperformed all baseline methods on AUPR. More specifically, LBMFF achieved an AUPR value of 0.5961 and achieved a relative improvement of 16.09% compared to the second-best result of 0.5135 from LAGCN. Focusing on the F1 and precision (Pre), our method had distinct advantages over all the baseline methods with relative improvements of 10.71% and 10.51% to the second-best results of BNNR. Even though our method achieved slightly better performance in terms of recall (Rec &#x3d; 0.6287) and specificity (Spe &#x3d; 0.9224) than the second-best results (Rec &#x3d; 0.6005, Spe &#x3d; 0.9166), these two evaluation metrics were significantly better than the other methods with the average relative improvements of 24.31% and 6.84%. We extracted multiple heterogeneous biological interactions and semantic embeddings to improve prediction accuracy. These results tended to indicate that LBMFF had a state-of-the-art performance against all baseline methods in novel drug-disease association prediction, owing to the superior integration ability of multi-feature from not only public databases and scientific literature.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Performance compared with 7 baseline methods.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Methods</th>
<th align="left">AURP</th>
<th align="left">AUC</th>
<th align="left">F1</th>
<th align="left">Acc</th>
<th align="left">Rec</th>
<th align="left">Spe</th>
<th align="left">Pre</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">BNNR</td>
<td align="left">0.5166 &#xb1; 0.008</td>
<td align="left">
<underline>0.8561 &#xb1; 0.002</underline>
</td>
<td align="left">
<underline>0.5108 &#xb1; 0.001</underline>
</td>
<td align="left">
<underline>0.8761 &#xb1; 0.036</underline>
</td>
<td align="left">0.5649 &#xb1; 0.002</td>
<td align="left">0.9164 &#xb1; 0.000</td>
<td align="left">
<underline>0.4664 &#xb1; 0.001</underline>
</td>
</tr>
<tr>
<td align="left">DDA-SKF</td>
<td align="left">0.2521 &#xb1; 0.001</td>
<td align="left">0.7006 &#xb1; 0.000</td>
<td align="left">0.3281 &#xb1; 0.002</td>
<td align="left">0.7900 &#xb1; 0.007</td>
<td align="left">0.4478 &#xb1; 0.005</td>
<td align="left">0.8342 &#xb1; 0.004</td>
<td align="left">0.2591 &#xb1; 0.001</td>
</tr>
<tr>
<td align="left">DRHGCN</td>
<td align="left">0.5063 &#xb1; 0.002</td>
<td align="left">0.8529 &#xb1; 0.004</td>
<td align="left">0.5013 &#xb1; 0.000</td>
<td align="left">0.8746 &#xb1; 0.001</td>
<td align="left">0.5503 &#xb1; 0.022</td>
<td align="left">
<underline>0.9166 &#xb1; 0.006</underline>
</td>
<td align="left">0.4604 &#xb1; 0.002</td>
</tr>
<tr>
<td align="left">LAGCN</td>
<td align="left">
<underline>0.5135 &#xb1; 0.000</underline>
</td>
<td align="left">0.8045 &#xb1; 0.002</td>
<td align="left">0.4699 &#xb1; 0.005</td>
<td align="left">0.7966 &#xb1; 0.000</td>
<td align="left">
<underline>0.6005 &#xb1; 0.008</underline>
</td>
<td align="left">0.8220 &#xb1; 0.052</td>
<td align="left">0.4198 &#xb1; 0.002</td>
</tr>
<tr>
<td align="left">NIMCGCN</td>
<td align="left">0.2316 &#xb1; 0.004</td>
<td align="left">0.6684 &#xb1; 0.000</td>
<td align="left">0.2889 &#xb1; 0.007</td>
<td align="left">0.7611 &#xb1; 0.026</td>
<td align="left">0.4227 &#xb1; 0.001</td>
<td align="left">0.8049 &#xb1; 0.003</td>
<td align="left">0.2199 &#xb1; 0.006</td>
</tr>
<tr>
<td align="left">DRWBNCF</td>
<td align="left">0.4552 &#xb1; 0.035</td>
<td align="left">0.8375 &#xb1; 0.020</td>
<td align="left">0.4739 &#xb1; 0.001</td>
<td align="left">0.8646 &#xb1; 0.001</td>
<td align="left">0.5321 &#xb1; 0.000</td>
<td align="left">0.9076 &#xb1; 0.002</td>
<td align="left">0.4280 &#xb1; 0.000</td>
</tr>
<tr>
<td align="left">REDDA</td>
<td align="left">0.4903 &#xb1; 0.000</td>
<td align="left">0.8466 &#xb1; 0.016</td>
<td align="left">0.4936 &#xb1; 0.045</td>
<td align="left">0.8693 &#xb1; 0.004</td>
<td align="left">0.5562 &#xb1; 0.003</td>
<td align="left">0.9098 &#xb1; 0.000</td>
<td align="left">0.4440 &#xb1; 0.002</td>
</tr>
<tr>
<td align="left">LBMFF</td>
<td align="left">
<bold>0.5961 &#xb1; 0.001</bold>
</td>
<td align="left">
<bold>0.8818 &#xb1; 0.003</bold>
</td>
<td align="left">
<bold>0.5655 &#xb1; 0.001</bold>
</td>
<td align="left">
<bold>0.8885 &#xb1; 0.001</bold>
</td>
<td align="left">
<bold>0.6287 &#xb1; 0.005</bold>
</td>
<td align="left">
<bold>0.9224 &#xb1; 0.000</bold>
</td>
<td align="left">
<bold>0.5154 &#xb1; 0.031</bold>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The best results are in bold faces and the second-best results are underlined.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>AUC and AUPR metrics of 8 methods.</p>
</caption>
<graphic xlink:href="fphar-14-1205144-g003.tif"/>
</fig>
<p>To further verify the effectiveness of LBMFF, we denoted another public benchmark, TL-HGBI with 963 drugs, 1,263 diseases and 54,921 drug-disease associations, into LBMFF and the seven baseline methods mentioned above. The results in <xref ref-type="table" rid="T3">Table 3</xref>; <xref ref-type="fig" rid="F4">Figure 4</xref> verified the superior predictive solidarity of our method. REDDA respectively achieved excellent performance measured by AURP, F1 and Rec. What&#x2019;s more, AUPR was 5.86% higher than the second best method LAGCN.</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Performance compared with 7 baseline methods on TL-HGBI dataset.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Methods</th>
<th align="left">AURP</th>
<th align="left">AUC</th>
<th align="left">F1</th>
<th align="left">Acc</th>
<th align="left">Rec</th>
<th align="left">Spe</th>
<th align="left">Pre</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">BNNR</td>
<td align="left">0.4502 &#xb1; 0.002</td>
<td align="left">0.9065 &#xb1; 0.001</td>
<td align="left">
<underline>0.4640 &#xb1; 0.003</underline>
</td>
<td align="left">
<bold>0.9462 &#xb1; 0.000</bold>
</td>
<td align="left">0.5098 &#xb1; 0.020</td>
<td align="left">
<bold>0.9671 &#xb1; 0.015</bold>
</td>
<td align="left">
<bold>0.4261 &#xb1; 0.004</bold>
</td>
</tr>
<tr>
<td align="left">DRHGCN</td>
<td align="left">0.4824 &#xb1; 0.000</td>
<td align="left">
<bold>0.9295 &#xb1; 0.032</bold>
</td>
<td align="left">0.4723 &#xb1; 0.016</td>
<td align="left">
<underline>0.9442 &#xb1; 0.035</underline>
</td>
<td align="left">0.5459 &#xb1; 0.004</td>
<td align="left">
<underline>0.9633 &#xb1; 0.000</underline>
</td>
<td align="left">
<underline>0.4161 &#xb1; 0.000</underline>
</td>
</tr>
<tr>
<td align="left">DRWBNCF</td>
<td align="left">0.3432 &#xb1; 0.001</td>
<td align="left">0.8927 &#xb1; 0.004</td>
<td align="left">0.4013 &#xb1; 0.000</td>
<td align="left">0.9306 &#xb1; 0.002</td>
<td align="left">0.5090 &#xb1; 0.002</td>
<td align="left">0.9508 &#xb1; 0.030</td>
<td align="left">0.3314 &#xb1; 0.001</td>
</tr>
<tr>
<td align="left">LAGCN</td>
<td align="left">
<underline>0.4970 &#xb1; 0.004</underline>
</td>
<td align="left">0.9155 &#xb1; 0.005</td>
<td align="left">0.4586 &#xb1; 0.007</td>
<td align="left">0.9413 &#xb1; 0.002</td>
<td align="left">
<underline>0.5440 &#xb1; 0.033</underline>
</td>
<td align="left">0.9603 &#xb1; 0.000</td>
<td align="left">0.3968 &#xb1; 0.001</td>
</tr>
<tr>
<td align="left">NIMCGCN</td>
<td align="left">0.1532 &#xb1; 0.003</td>
<td align="left">0.7490 &#xb1; 0.018</td>
<td align="left">0.2317 &#xb1; 0.020</td>
<td align="left">0.9012 &#xb1; 0.000</td>
<td align="left">0.3265 &#xb1; 0.005</td>
<td align="left">0.9287 &#xb1; 0.001</td>
<td align="left">0.1802 &#xb1; 0.006</td>
</tr>
<tr>
<td align="left">DDA-SKF</td>
<td align="left">0.2266 &#xb1; 0.015</td>
<td align="left">0.8608 &#xb1; 0.007</td>
<td align="left">0.3136 &#xb1; 0.000</td>
<td align="left">0.9071 &#xb1; 0.001</td>
<td align="left">0.4646 &#xb1; 0.003</td>
<td align="left">0.9283 &#xb1; 0.002</td>
<td align="left">0.2368 &#xb1; 0.003</td>
</tr>
<tr>
<td align="left">REDDA</td>
<td align="left">0.4243 &#xb1; 0.000</td>
<td align="left">
<underline>0.9225 &#xb1; 0.001</underline>
</td>
<td align="left">0.4493 &#xb1; 0.013</td>
<td align="left">0.9406 &#xb1; 0.001</td>
<td align="left">0.5308 &#xb1; 0.002</td>
<td align="left">0.9602 &#xb1; 0.025</td>
<td align="left">0.3898 &#xb1; 0.010</td>
</tr>
<tr>
<td align="left">LBMFF</td>
<td align="left">
<bold>0.5261 &#xb1; 0.003</bold>
</td>
<td align="left">0.9160 &#xb1; 0.000</td>
<td align="left">
<bold>0.4821 &#xb1; 0.002</bold>
</td>
<td align="left">0.9429 &#xb1; 0.003</td>
<td align="left">
<bold>0.5898 &#xb1; 0.002</bold>
</td>
<td align="left">0.9596 &#xb1; 0.005</td>
<td align="left">0.4078 &#xb1; 0.007</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The best results are in bold faces and the second-best results are underlined.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>AUC and AUPR metrics of 8 methods on TL-HGBI dataset.</p>
</caption>
<graphic xlink:href="fphar-14-1205144-g004.tif"/>
</fig>
</sec>
<sec id="s3-5">
<title>3.5 Case study</title>
<p>To demonstrate the capability of LBMFF to discover new indications and new therapies, all known drug-disease associations have been used to predict unknown drug-disease associations with this model. We conducted case studies with verification from clinical indications that were already in use, ClinicalTrials, CTD and public literature. ClinicalTrials is the largest clinical trials database run by the United States National Library of Medicine (NLM), holding registrations from over 329,000 trials from 209 countries. CTD is a publicly available research resource that curates scientific data describing relationships between chemicals, genes and human diseases by professional curators. In our study, we constructed three kinds of case studies to verify the predictive capability: 1) the top 10 drug-disease associations predicted by LBMFF in <xref ref-type="table" rid="T4">Table 4</xref>, <xref ref-type="table" rid="T2">2</xref> the top 10 associated diseases for given drugs predicted by LBMFF in <xref ref-type="table" rid="T5">Table 5</xref>, <xref ref-type="table" rid="T3">3</xref> the top 10 associated drugs for given diseases predicted by LBMFF in <xref ref-type="table" rid="T6">Table 6</xref>.</p>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>Top 10 drug-disease associations.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">No.</th>
<th align="left">MESH ID</th>
<th align="left">Drug Name</th>
<th align="left">MESH ID</th>
<th align="left">Disease Name</th>
<th align="left">Evidence</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">1</td>
<td align="left">C055162</td>
<td align="left">Clopidogrel</td>
<td align="left">D006973</td>
<td align="left">Hypertension</td>
<td align="left">ClinicalTrials/CTD</td>
</tr>
<tr>
<td align="left">2</td>
<td align="left">D002738</td>
<td align="left">Chloroquine</td>
<td align="left">D018771</td>
<td align="left">Arthralgia</td>
<td align="left">NA</td>
</tr>
<tr>
<td align="left">3</td>
<td align="left">D012293</td>
<td align="left">Rifampin</td>
<td align="left">D011014</td>
<td align="left">Pneumonia</td>
<td align="left">CTD/PMID: 28870736</td>
</tr>
<tr>
<td align="left">4</td>
<td align="left">D019808</td>
<td align="left">Losartan</td>
<td align="left">D001281</td>
<td align="left">Atrial Fibrillation</td>
<td align="left">ClinicalTrials/PMID: 25787020</td>
</tr>
<tr>
<td align="left">5</td>
<td align="left">D019821</td>
<td align="left">Simvastatin</td>
<td align="left">D010190</td>
<td align="left">Pancreatic Neoplasms</td>
<td align="left">ClinicalTrials/CTD/PMID: 32402990</td>
</tr>
<tr>
<td align="left">6</td>
<td align="left">D009270</td>
<td align="left">Naloxone</td>
<td align="left">D007859</td>
<td align="left">Learning Disorders</td>
<td align="left">CTD/Clinical indications</td>
</tr>
<tr>
<td align="left">7</td>
<td align="left">D002927</td>
<td align="left">Cimetidine</td>
<td align="left">D006331</td>
<td align="left">Heart Diseases</td>
<td align="left">Clinical indications</td>
</tr>
<tr>
<td align="left">8</td>
<td align="left">D002927</td>
<td align="left">Cimetidine</td>
<td align="left">D007249</td>
<td align="left">Inflammation</td>
<td align="left">Clinical indications</td>
</tr>
<tr>
<td align="left">9</td>
<td align="left">D011239</td>
<td align="left">Prednisolone</td>
<td align="left">D008582</td>
<td align="left">Meningitis</td>
<td align="left">CTD/PMID: 33260200</td>
</tr>
<tr>
<td align="left">10</td>
<td align="left">D004294</td>
<td align="left">Domperidone</td>
<td align="left">D012640</td>
<td align="left">Seizures</td>
<td align="left">NA</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T5" position="float">
<label>TABLE 5</label>
<caption>
<p>Top 10 drug-disease association prediction for dexamethasone and doxorubicin.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Drug Name</th>
<th align="left">No.</th>
<th align="left">MESH ID</th>
<th align="left">Disease Name</th>
<th align="left">Evidence</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="10" align="left">Dexamethasone MeSH ID: D003907</td>
<td align="left">1</td>
<td align="left">D004342</td>
<td align="left">Drug Hypersensitivity</td>
<td align="left">ClinicalTrials/CTD/PMID: 28704328</td>
</tr>
<tr>
<td align="left">2</td>
<td align="left">D000743</td>
<td align="left">Anemia, Hemolytic</td>
<td align="left">CTD/PMID: 21848879</td>
</tr>
<tr>
<td align="left">3</td>
<td align="left">D004417</td>
<td align="left">Dyspnea</td>
<td align="left">ClinicalTrials/PMID: 27330023</td>
</tr>
<tr>
<td align="left">4</td>
<td align="left">D029424</td>
<td align="left">Chronic Obstructive Pulmonary</td>
<td align="left">Clinical indications</td>
</tr>
<tr>
<td align="left">5</td>
<td align="left">D008581</td>
<td align="left">Meningitis</td>
<td align="left">NA</td>
</tr>
<tr>
<td align="left">6</td>
<td align="left">D002637</td>
<td align="left">Chest Pain</td>
<td align="left">ClinicalTrials/CTD/PMID: 21799397</td>
</tr>
<tr>
<td align="left">7</td>
<td align="left">D010190</td>
<td align="left">Pancreatic Neoplasms</td>
<td align="left">ClinicalTrials/CTD/PMID: 32619553</td>
</tr>
<tr>
<td align="left">8</td>
<td align="left">D002318</td>
<td align="left">Cardiovascular Diseases</td>
<td align="left">NA</td>
</tr>
<tr>
<td align="left">9</td>
<td align="left">D009205</td>
<td align="left">Myocarditis</td>
<td align="left">NA</td>
</tr>
<tr>
<td align="left">10</td>
<td align="left">D012141</td>
<td align="left">Respiratory Tract Infections</td>
<td align="left">ClinicalTrials</td>
</tr>
<tr>
<td rowspan="9" align="left">Doxorubicin MeSH ID:D004317</td>
<td align="left">1</td>
<td align="left">D002289</td>
<td align="left">Carcinoma, Non-Small-Cell Lung</td>
<td align="left">ClinicalTrials/CTD/PMID: 33075540</td>
</tr>
<tr>
<td align="left">2</td>
<td align="left">D014652</td>
<td align="left">Vascular Diseases</td>
<td align="left">ClinicalTrials/Clinical indications</td>
</tr>
<tr>
<td align="left">3</td>
<td align="left">D009190</td>
<td align="left">Myelodysplastic Syndromes</td>
<td align="left">ClinicalTrials/CTD/PMID: 27299619</td>
</tr>
<tr>
<td align="left">4</td>
<td align="left">D006463</td>
<td align="left">Hemolytic-Uremic Syndrome</td>
<td align="left">NA</td>
</tr>
<tr>
<td align="left">5</td>
<td align="left">D002543</td>
<td align="left">Cerebral Hemorrhage</td>
<td align="left">NA</td>
</tr>
<tr>
<td align="left">6</td>
<td align="left">D015473</td>
<td align="left">Leukemia</td>
<td align="left">ClinicalTrials/CTD/PMID: 32949646/Clinical indications</td>
</tr>
<tr>
<td align="left">7</td>
<td align="left">D017202</td>
<td align="left">Myocardial Ischemia</td>
<td align="left">NA</td>
</tr>
<tr>
<td align="left">8</td>
<td align="left">D011658</td>
<td align="left">Pulmonary Fibrosis</td>
<td align="left">ClinicalTrials/CTD/PMID: 22607134</td>
</tr>
<tr>
<td align="left">9</td>
<td align="left">D050197</td>
<td align="left">Atherosclerosis</td>
<td align="left">NA</td>
</tr>
<tr>
<td align="left"/>
<td align="left">10</td>
<td align="left">D005910</td>
<td align="left">Glioma</td>
<td align="left">ClinicalTrials/CTD/PMID: 33475372</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T6" position="float">
<label>TABLE 6</label>
<caption>
<p>Top 10 drug-disease association prediction for seizures and hypertension.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Disease Name</th>
<th align="left">No.</th>
<th align="left">MESH ID</th>
<th align="left">Drug Name</th>
<th align="left">Evidence</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="5" align="left">Seizures</td>
<td align="left">1</td>
<td align="left">D002034</td>
<td align="left">Bumetanide</td>
<td align="left">ClinicalTrials/CTD/PMID: 33201535</td>
</tr>
<tr>
<td align="left">2</td>
<td align="left">D011239</td>
<td align="left">Prednisolone</td>
<td align="left">ClinicalTrials/CTD/PMID: 33359047</td>
</tr>
<tr>
<td align="left">3</td>
<td align="left">D020123</td>
<td align="left">Sirolimus</td>
<td align="left">ClinicalTrials/CTD/PMID: 35931213</td>
</tr>
<tr>
<td align="left">4</td>
<td align="left">C043211</td>
<td align="left">Carvedilol</td>
<td align="left">ClinicalTrials/CTD</td>
</tr>
<tr>
<td align="left">5</td>
<td align="left">D013752</td>
<td align="left">Tetracycline</td>
<td align="left">ClinicalTrials/CTD/PMID:22579030</td>
</tr>
<tr>
<td rowspan="5" align="left">MeSH ID: D012640</td>
<td align="left">6</td>
<td align="left">D011802</td>
<td align="left">Quinidine</td>
<td align="left">ClinicalTrials/CTD/PMID: 30112700</td>
</tr>
<tr>
<td align="left">7</td>
<td align="left">D014805</td>
<td align="left">Vitamin B 12</td>
<td align="left">ClinicalTrials/CTD/PMID: 29563977</td>
</tr>
<tr>
<td align="left">8</td>
<td align="left">D013739</td>
<td align="left">Testosterone</td>
<td align="left">ClinicalTrials/CTD</td>
</tr>
<tr>
<td align="left">9</td>
<td align="left">D017292</td>
<td align="left">Doxazosin</td>
<td align="left">NA</td>
</tr>
<tr>
<td align="left">10</td>
<td align="left">D008691</td>
<td align="left">Methadone</td>
<td align="left">NA</td>
</tr>
<tr>
<td rowspan="5" align="left">Hypertension</td>
<td align="left">1</td>
<td align="left">C055162</td>
<td align="left">Clopidogrel</td>
<td align="left">ClinicalTrials/CTD/PMID: 35656824</td>
</tr>
<tr>
<td align="left">2</td>
<td align="left">C060836</td>
<td align="left">Pioglitazone</td>
<td align="left">ClinicalTrials/CTD/PMID: 31712626</td>
</tr>
<tr>
<td align="left">3</td>
<td align="left">C065180</td>
<td align="left">Fluvastatin</td>
<td align="left">ClinicalTrials/CTD/PMID: 17666915</td>
</tr>
<tr>
<td align="left">4</td>
<td align="left">D000086</td>
<td align="left">Acetazolamide</td>
<td align="left">ClinicalTrials/PMID: 26154918</td>
</tr>
<tr>
<td align="left">5</td>
<td align="left">D002738</td>
<td align="left">Chloroquine</td>
<td align="left">ClinicalTrials/CTD</td>
</tr>
<tr>
<td rowspan="5" align="left">MeSH ID: D006973</td>
<td align="left">6</td>
<td align="left">D004155</td>
<td align="left">Diphenhydramine</td>
<td align="left">ClinicalTrials</td>
</tr>
<tr>
<td align="left">7</td>
<td align="left">D004958</td>
<td align="left">Estradiol</td>
<td align="left">NA</td>
</tr>
<tr>
<td align="left">8</td>
<td align="left">D013629</td>
<td align="left">Tamoxifen</td>
<td align="left">ClinicalTrials/CTD</td>
</tr>
<tr>
<td align="left">9</td>
<td align="left">D015283</td>
<td align="left">Citalopram</td>
<td align="left">NA</td>
</tr>
<tr>
<td align="left">10</td>
<td align="left">D000068877</td>
<td align="left">Imatinib Mesylate</td>
<td align="left">ClinicalTrials/CTD</td>
</tr>
</tbody>
</table>
</table-wrap>
<sec id="s3-5-1">
<title>3.5.1 Top 10 drug-disease associations</title>
<p>We listed the top 10 drug-disease associations predicted by LBMFF in <xref ref-type="table" rid="T4">Table 4</xref>, and eight out of them can be demonstrated by the four verification methods mentioned above. For example, we found evidence from public literature for rifampin combinations for treating pneumonia (PMID: 28870736), losartan for the prevention of paroxysmal atrial fibrillation in patients with sick sinus syndrome (PMID: 25787020) and simvastatin for improving the early survival rate of patients with pancreatic cancer (PMID: 32402990). As a prospective study, the combination therapy of prednisolone and azathioprine for steroid-responsive meningitis-arteritis treatment in dogs also appeared to be effective for primary treatment. Besides, several predictions have been confirmed effective by ClinicalTrials and CTD records, such as clopidogrel in patients with idiopathic pulmonary arterial hypertension and simvastatin in patients with advanced pancreatic cancer. We further verified three of the predictions have been applied as mature clinical treatments by passing clinical trials and safety tests. Naloxone is used to relieve respiratory depression and wake people up. Cimetidine is indicated for the treatment of arrhythmia and chronic hepatitis B hepatitis. This is consistent with the predicted treatment of heart disease and inflammation.</p>
</sec>
<sec id="s3-5-2">
<title>3.5.2 Top 10 associated diseases for given drugs</title>
<p>We selected dexamethasone (MeSH ID: D003907) and doxorubicin (MeSH ID: D004317) as two drug cases to validate the ability to discover new indications. For each drug, the top 10 candidate diseases are ranked according to the prediction scores as shown in <xref ref-type="table" rid="T5">Table 5</xref>. We also visualized the predicted relationships (<xref ref-type="fig" rid="F5">Figure 5</xref>) with different colors and types of lines to represent different validation methods. The more lines between two nodes, the more evidences there were for this relationship.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Top 10 associated diseases for given drugs. Different validation methods are represented with different colors and types of lines. The more lines between two nodes, the more evidences there are for this relationship.</p>
</caption>
<graphic xlink:href="fphar-14-1205144-g005.tif"/>
</fig>
<p>Dexamethasone is a corticosteroid that prevents the release of substances in the body that cause inflammation, such as allergic disorders and skin conditions. It is also used to treat ulcerative colitis, arthritis, lupus, psoriasis, and respiratory disorders. Seven of the top 10 predicted associations have been confirmed by databases, literature and clinical use of dexamethasone. According to literature, dexamethasone works against paclitaxel drug allergy (PMID: 28704328), chest syndrome in patients with sickle cell disease (PMID: 21799397) and dyspnea in cancer patients (PMID: 27330023). What&#x2019;s more, <xref ref-type="bibr" rid="B61">WHO (2023)</xref> welcomes preliminary results about dexamethasone use in treating critically ill COVID-19 patients, as evidence of respiratory tract infection treatment.</p>
<p>Doxorubicin is an anthracycline type of chemotherapy that is used to treat several different types of cancer. Six of the top ten predicted associations have been confirmed in this section. It is approved for the treatment of non-small cell lung cancer, glioma, hematologic tumors and acute lymphoblastic leukemia, either alone or in combination with other drugs. Additionally, a combination of prednisone, azathioprine, and N-acetylcysteine (NAC) has also been used as a treatment for idiopathic pulmonary fibrosis (<xref ref-type="bibr" rid="B3">Behr, 2012</xref>).</p>
<p>The remaining associations predicted by the LBMFF model have not received much attention so far, providing an avenue for new indications to be discovered.</p>
</sec>
<sec id="s3-5-3">
<title>3.5.3 Top 10 associated drugs for given diseases</title>
<p>Furthermore, we conducted two detailed case studies to further verify the capability of new therapies discovery, and the chosen diseases were seizures (MeSH ID: D012640) and hypertension (MeSH ID: D006973). The top 10 related drugs for both diseases were listed in <xref ref-type="table" rid="T6">Table 6</xref>. We also visualized the predicted relationships (<xref ref-type="fig" rid="F6">Figure 6</xref>) with different colors and types of lines to represent different validation methods. The more lines between two nodes, the more evidences there were for this relationship.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Top 10 associated drugs for given diseases. Different validation methods are represented with different colors and types of lines. The more lines between two nodes, the more evidences there are for this relationship.</p>
</caption>
<graphic xlink:href="fphar-14-1205144-g006.tif"/>
</fig>
<p>In the section on seizures, we confirmed eight of the top 10 results through database and literature evidence. Specifically, quinidine significantly reduced the seizure burden (by about 90%). Tetracycline-class antibiotics were protective against partial seizures <italic>in vivo</italic>. The Drug combinations of bumetanide plus phenobarbital and vitamin B12 plus carbamazepine have been proven effective in treating seizures (PMID: 29563977). Moreover, animal models in mice presented with prednisolone or sirolimus had less severe seizures than the negative control group.</p>
<p>In the section on hypertension, we found evidence for eight drug candidates in the top ten through databases and literature. For example, pioglitazone modulated the vascular contractility in hypertension by interference with the ET-1 pathway (PMID: 31712626), and acetazolamide leads to more effective control of increased intracranial pressure (PMID: 26154918). Furthermore, ClinicalTrials and CTD proved the possibility of the other six new drug-disease associations.</p>
</sec>
</sec>
</sec>
<sec sec-type="conclusion" id="s4">
<title>4 Conclusion</title>
<p>In this study, we proposed a method called LBMFF for drug-disease association prediction. Due to the huge amount of information contained in both biomedical public databases and scientific literature, we computed drug-drug and disease-disease similarities by multi-feature fusion and utilized two GCN layers to capture structural embeddings from the association feature matrix. Concretely, the association feature matrix consisted a drug comprehensive similarity matrix, a disease comprehensive similarity matrix and a known drug-disease association. Moreover, an attention mechanism was denoted into the GCN model to extract information more effectively. The proposed method achieved excellent performance compared to seven state-of-the-art methods on the same test datasets, and we demonstrated its potential for identifying new drug-disease associations for practical use.</p>
<p>However, there are still some limitations in our work that require an in-depth investigation. First, more association features should be further considered in our work. We can collect more prior biological knowledge from literature, such as drug-protein, drug-gene, disease-gene and drug-pathway, to improve similarity accuracy. Second, the two-layer GCN is a basic model for learning on graph-structured data, while some other graph neural network models are worth investigating in the future.</p>
<p>Above all, LBMFF is able to learn scattered information from both public databases and scientific literature to identify the latent drug-disease associations. It gives researchers, pharmacologists, and pharmaceutical companies a tremendous opportunity to study and validate predictive associations that are more likely to exist. We expect LBMFF to be an efficient approach that can further improve drug repositioning and shorten its cost and time.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s5">
<title>Data availability statement</title>
<p>The datasets presented in this study can be found in online repositories. The names of the repository/repositories and accession number(s) can be found in the article/supplementary material.</p>
</sec>
<sec id="s6">
<title>Author contributions</title>
<p>HK collected the data, designed the algorithm, performed the experiments, analyzed the results, and wrote the manuscript; LH collected the data, implemented the algorithm, performed the experiments, analyzed the results; YG analyzed the results and partially wrote the manuscript; XL partially collected the data and partially analyzed the results; QL and JL directed the whole study, conceptualized the algorithm, supervised the experiments, and revised the manuscript. All authors contributed to the article and approved the submitted version.</p>
</sec>
<sec id="s7">
<title>Funding</title>
<p>This work was supported by The National Social Science Fund of China (22CTQ024), Innovation Project of Chinese Academy of Medical Sciences (2021-I2M-1-001, 2021-I2M-1-056), The National Key Research and Development Program of China (2022YFB2702801), China Knowledge Center for Engineering Sciences and Technology (Medical Knowledge Service System) (CKCEST-2022-1-6).</p>
</sec>
<sec sec-type="COI-statement" id="s8">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s9">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Al-Sabri</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Oloulade</surname>
<given-names>B. M.</given-names>
</name>
<name>
<surname>Lyu</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Multi-view graph neural architecture search for biomedical entity and relation extraction</article-title>. <source>IEEE/ACM Trans. Comput. Biol. Bioinform.</source> <volume>99</volume>, <fpage>1</fpage>&#x2013;<lpage>13</lpage>.</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bandrowski</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Brinkman</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Brochhausen</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Brush</surname>
<given-names>M. H.</given-names>
</name>
<name>
<surname>Bug</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Chibucos</surname>
<given-names>M. C.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>The Ontology for biomedical investigations</article-title>. <source>PLoS One</source> <volume>11</volume>, <fpage>e0154556</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0154556</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Behr</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Prednisone, azathioprine, and N-acetylcysteine for pulmonary fibrosis</article-title>. <source>N. Engl. J. Med.</source> <volume>366</volume>, <fpage>870</fpage>&#x2013;<lpage>871</lpage>. <pub-id pub-id-type="doi">10.1056/NEJMc1207471</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Berdigaliyev</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Aljofan</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>An overview of drug discovery and development</article-title>. <source>Future Med. Chem.</source> <volume>12</volume>, <fpage>939</fpage>&#x2013;<lpage>947</lpage>. <pub-id pub-id-type="doi">10.4155/fmc-2019-0307</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Breckenridge</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Jacob</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Overcoming the legal and regulatory barriers to drug repurposing</article-title>. <source>Nat. Rev. Drug Discov.</source> <volume>18</volume>, <fpage>1</fpage>&#x2013;<lpage>2</lpage>. <pub-id pub-id-type="doi">10.1038/nrd.2018.92</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Brown</surname>
<given-names>A. S.</given-names>
</name>
<name>
<surname>Patel</surname>
<given-names>C. J.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>MeSHDD: Literature-based drug-drug similarity for drug repositioning</article-title>. <source>J. Am. Med. Inf. Assoc.</source> <volume>24</volume>, <fpage>614</fpage>&#x2013;<lpage>618</lpage>. <pub-id pub-id-type="doi">10.1093/jamia/ocw142</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cai</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Meng</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Fu</surname>
<given-names>X.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Drug repositioning based on the heterogeneous information fusion graph convolutional network</article-title>. <source>Brief. Bioinform</source> <volume>22</volume>, <fpage>bbab319</fpage>. <pub-id pub-id-type="doi">10.1093/bib/bbab319</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cami</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Manzi</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Arnold</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Reis</surname>
<given-names>B. Y.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Pharmacointeraction network models predict unknown drug-drug interactions</article-title>. <source>PLoS One</source> <volume>8</volume>, <fpage>e61468</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0061468</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Campillos</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Michael</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Gavin</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Jensen</surname>
<given-names>L. J.</given-names>
</name>
<name>
<surname>Bork</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Drug target identification using side-effect similarity</article-title>. <source>Science</source> <volume>321</volume>, <fpage>263</fpage>&#x2013;<lpage>266</lpage>. <pub-id pub-id-type="doi">10.1126/science.1158140</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cao</surname>
<given-names>D. S.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>Q. S.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>H. M.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>J. H.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>Q. N.</given-names>
</name>
<etal/>
</person-group> (<year>2012</year>). <article-title>Large-scale prediction of drug-target interactions using protein sequences and drug topological structures</article-title>. <source>Anal. Chim. Acta</source> <volume>8</volume>, <fpage>1</fpage>&#x2013;<lpage>10</lpage>. <pub-id pub-id-type="doi">10.1016/j.aca.2012.09.021</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chan</surname>
<given-names>H. C. S.</given-names>
</name>
<name>
<surname>Shan</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Dahoun</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Vogel</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Yuan</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Advancing drug discovery via artificial intelligence</article-title>. <source>Trends Pharmacol. Sci.</source> <volume>40</volume>, <fpage>592</fpage>&#x2013;<lpage>604</lpage>.</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Yan</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Wei</surname>
<given-names>C. H.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>BioConceptVec: Creating and evaluating literature-based biomedical concept embeddings on a large scale</article-title>. <source>PLoS Comput. Biol.</source> <volume>23</volume>, <fpage>e1007617</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pcbi.1007617</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Deng</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Ojima</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Samaras</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Artificial intelligence in drug discovery: Applications and techniques</article-title>. <source>Brief. Bioinform</source>. <volume>23</volume>, <fpage>bbab430</fpage>.</citation>
</ref>
<ref id="B14">
<citation citation-type="book">
<collab>Disease Ontology</collab> (<year>2023</year>). <source>Disease Ontology</source>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://disease-ontology.org/">https://disease-ontology.org/</ext-link> (accessed March 28, 2023)</comment>.</citation>
</ref>
<ref id="B15">
<citation citation-type="book">
<collab>Drugbank</collab> (<year>2023</year>). <source>Drugbank</source>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://go.drugbank.com/">https://go.drugbank.com/</ext-link> (accessed March 29, 2023)</comment>.</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fatehifar</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Karshenas</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Drug-Drug interaction extraction using a position and similarity fusion-based attention mechanism</article-title>. <source>J. Biomed. Inf.</source> <volume>115</volume>, <fpage>103707</fpage>. <pub-id pub-id-type="doi">10.1016/j.jbi.2021.103707</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Firoozbakht</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Rezaeian</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Rueda</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Ngom</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Computationally repurposing drugs for breast cancer subtypes using a network-based approach</article-title>. <source>BMC Bioinforma.</source> <volume>23</volume>, <fpage>143</fpage>&#x2013;<lpage>155</lpage>. <pub-id pub-id-type="doi">10.1186/s12859-022-04662-6</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gao</surname>
<given-names>C. Q.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>Y. K.</given-names>
</name>
<name>
<surname>Xin</surname>
<given-names>X. H.</given-names>
</name>
<name>
<surname>Min</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Du</surname>
<given-names>P. F.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>DDA-SKF: Predicting drug-disease associations using similarity kernel fusion</article-title>. <source>Front. Pharmacol.</source> <volume>12</volume>, <fpage>784171</fpage>. <pub-id pub-id-type="doi">10.3389/fphar.2021.784171</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Giuliani</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Silva</surname>
<given-names>A. C.</given-names>
</name>
<name>
<surname>Borba</surname>
<given-names>J. V. V. B.</given-names>
</name>
<name>
<surname>Ramos</surname>
<given-names>P. I. P.</given-names>
</name>
<name>
<surname>Paveley</surname>
<given-names>R. A.</given-names>
</name>
<name>
<surname>Muratov</surname>
<given-names>E. N.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>Computationally-guided drug repurposing enables the discovery of kinase targets and inhibitors as new schistosomicidal agents</article-title>. <source>PLoS Comput. Biol.</source> <volume>14</volume>, <fpage>e1006515</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pcbi.1006515</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gottlieb</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Stein</surname>
<given-names>G. Y.</given-names>
</name>
<name>
<surname>Ruppin</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Sharan</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Predict: A method for inferring novel drug indications with application to personalized medicine</article-title>. <source>Mol. Syst. Biol.</source> <volume>7</volume>, <fpage>496</fpage>. <pub-id pub-id-type="doi">10.1038/msb.2011.26</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>CurrMG: A curriculum learning approach for graph based molecular property prediction</article-title>. <source>IEEE Int. Conf. Bioinforma. Biomed. (BIBM)</source> <volume>12</volume>, <fpage>2686</fpage>&#x2013;<lpage>2693</lpage>.</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Yin</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Redda: Integrating multiple biological relations to heterogeneous graph neural network for drug-disease association prediction</article-title>. <source>Comput. Biol. Med.</source> <volume>15</volume>, <fpage>0106127</fpage>. <pub-id pub-id-type="doi">10.1016/j.compbiomed.2022.106127</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Holzinger</surname>
<given-names>E. R.</given-names>
</name>
<name>
<surname>Ritchie</surname>
<given-names>M. D.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Integrating heterogeneous high-throughput data for meta-dimensional pharmacogenomics and disease-related studies</article-title>. <source>Pharmacogenomics</source> <volume>13</volume>, <fpage>213</fpage>&#x2013;<lpage>222</lpage>. <pub-id pub-id-type="doi">10.2217/pgs.11.145</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Shi</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Rapid repurposing of novel combination drugs for the treatment of heart failure via a computationally guided network screening approach</article-title>. <source>J. Chem. Inf. Model.</source> <volume>62</volume>, <fpage>5223</fpage>&#x2013;<lpage>5232</lpage>. <pub-id pub-id-type="doi">10.1021/acs.jcim.1c00132</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="book">
<collab>Introduction to Graph Neural Network</collab> (<year>2023</year>). <source>Introduction to graph neural network</source>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://www.analyticssteps.com/blogs/introduction-graph-neural-network-gnn">https://www.analyticssteps.com/blogs/introduction-graph-neural-network-gnn</ext-link> (accessed March 28, 2023)</comment>.</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kafkas</surname>
<given-names>&#x15e;.</given-names>
</name>
<name>
<surname>Hoehndorf</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Ontology based mining of pathogen-disease associations from literature</article-title>. <source>J. Biomed. Semant.</source> <volume>10</volume>, <fpage>15</fpage>&#x2013;<lpage>32</lpage>. <pub-id pub-id-type="doi">10.1186/s13326-019-0208-2</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Karim</surname>
<given-names>M. R.</given-names>
</name>
<name>
<surname>CochezJares</surname>
<given-names>M. J. B.</given-names>
</name>
<name>
<surname>Uddin</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Beyan</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Decker</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Drug-drug interaction prediction based on knowledge graph embeddings and convolutional-LSTM network</article-title>. <source>ACM</source> <volume>19</volume>, <fpage>113</fpage>&#x2013;<lpage>123</lpage>. <pub-id pub-id-type="doi">10.1145/3307339.3342161</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="book">
<collab>KEGG</collab> (<year>2023</year>). <source>Kyoto encyclopedia of genes and genomes</source>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://www.genome.jp/kegg/">https://www.genome.jp/kegg/</ext-link> (accessed March 28, 2023)</comment>.</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kim</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Choi</surname>
<given-names>A. S.</given-names>
</name>
<name>
<surname>Nam</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Drug repositioning of herbal compounds via a machine-learning approach</article-title>. <source>BMC Bioinforma.</source> <volume>20</volume>, <fpage>247</fpage>. <pub-id pub-id-type="doi">10.1186/s12859-019-2811-8</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lee</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Yoon</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>So</surname>
<given-names>C. H.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>BioBERT: A pre-trained biomedical language representation model for biomedical text mining</article-title>. <source>Bioinformatics</source> <volume>36</volume>, <fpage>1234</fpage>&#x2013;<lpage>1240</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btz682</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Ning</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Neural inductive matrix completion with graph convolutional networks for miRNA-disease association prediction</article-title>. <source>Bioinformatics</source> <volume>36</volume>, <fpage>2538</fpage>&#x2013;<lpage>2546</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btz965</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Luo</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Ni</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>K.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Computational drug repositioning with random walk on a heterogeneous network</article-title>. <source>IEEE/ACM Trans. Comput. Biol. Bioinform.</source> <volume>16</volume>, <fpage>1890</fpage>&#x2013;<lpage>1900</lpage>. <pub-id pub-id-type="doi">10.1109/TCBB.2018.2832078</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Luo</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Peng</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>F. X.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>Drug repositioning based on comprehensive similarity measures and Bi-Random walk algorithm</article-title>. <source>Bioinformatics</source> <volume>32</volume>, <fpage>2664</fpage>&#x2013;<lpage>2671</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btw228</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lyu</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Ren</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Ji</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Long short-term memory RNN for biomedical named entity recognition</article-title>. <source>BMC Bioinforma.</source> <volume>18</volume>, <fpage>462</fpage>. <pub-id pub-id-type="doi">10.1186/s12859-017-1868-5</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Meng</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Jin</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Drug repositioning based on similarity constrained probabilistic matrix factorization: COVID-19 as a case study</article-title>. <source>Appl. Soft Comput.</source> <volume>103</volume>, <fpage>107135</fpage>. <pub-id pub-id-type="doi">10.1016/j.asoc.2021.107135</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Meng</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Jin</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zeng</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>A weighted bilinear neural collaborative filtering approach for drug repositioning</article-title>. <source>Brief. Bioinform</source> <volume>23</volume>, <fpage>bbab581</fpage>. <pub-id pub-id-type="doi">10.1093/bib/bbab581</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="book">
<collab>MeSH</collab> (<year>2023</year>). <source>MeSH</source>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/mesh">https://www.ncbi.nlm.nih.gov/mesh</ext-link> (accessed March 29, 2023)</comment>.</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Moon</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Jin</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Dong</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Abrar</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Chirkova</surname>
<given-names>R. Y.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Learning Drug-Disease-Target Embedding (DDTE) from knowledge graphs to inform drug repurposing hypotheses</article-title>. <source>J. Biomed. Inf.</source> <volume>119</volume>, <fpage>103838</fpage>. <pub-id pub-id-type="doi">10.1016/j.jbi.2021.103838</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mullard</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>2021 FDA approvals</article-title>. <source>Nat. Rev. Drug Discov.</source> <volume>21</volume>, <fpage>83</fpage>&#x2013;<lpage>88</lpage>. <pub-id pub-id-type="doi">10.1038/d41573-022-00001-9</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Napolitano</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Moreira</surname>
<given-names>V. M.</given-names>
</name>
<name>
<surname>Tagliaferri</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Kere</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>D&#x27;Amato</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2013</year>). <article-title>Drug repositioning: A machine-learning approach through data integration</article-title>. <source>J. Cheminform</source> <volume>5</volume>, <fpage>30</fpage>. <pub-id pub-id-type="doi">10.1186/1758-2946-5-30</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nicholson</surname>
<given-names>D. N.</given-names>
</name>
<name>
<surname>Greene</surname>
<given-names>C. S.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Constructing knowledge graphs and their biomedical applications</article-title>. <source>Comput. Struct. Biotechnol. J.</source> <volume>18</volume>, <fpage>1414</fpage>&#x2013;<lpage>1428</lpage>. <pub-id pub-id-type="doi">10.1016/j.csbj.2020.05.017</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nosengo</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Can you teach old drugs new tricks?</article-title> <source>Nature</source> <volume>16</volume>, <fpage>314</fpage>&#x2013;<lpage>316</lpage>. <pub-id pub-id-type="doi">10.1038/534314a</pub-id>
</citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Rodriguez-Pat&#xf3;n</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Amde: A novel attention-mechanism-based multidimensional feature encoder for drug-drug interaction prediction</article-title>. <source>Brief. Bioinform</source> <volume>23</volume>, <fpage>bbab545</fpage>. <comment>bbab545</comment>. <pub-id pub-id-type="doi">10.1093/bib/bbab545</pub-id>
</citation>
</ref>
<ref id="B44">
<citation citation-type="book">
<collab>PharmGKB</collab> (<year>2023</year>). <source>PharmGKB</source>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://www.pharmgkb.org">https://www.pharmgkb.org</ext-link> (accessed March 29, 2023)</comment>.</citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Piplani</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Singh</surname>
<given-names>P. K.</given-names>
</name>
<name>
<surname>Winkler</surname>
<given-names>D. A.</given-names>
</name>
<name>
<surname>Petrovsky</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Computationally repurposed drugs and natural products against RNA dependent RNA polymerase as potential COVID-19 therapies</article-title>. <source>Mol. Biomed.</source> <volume>2</volume>, <fpage>28</fpage>&#x2013;<lpage>40</lpage>. <pub-id pub-id-type="doi">10.1186/s43556-021-00050-3</pub-id>
</citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pushpakom</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Iorio</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Eyers</surname>
<given-names>P. A.</given-names>
</name>
<name>
<surname>Escott</surname>
<given-names>K. J.</given-names>
</name>
<name>
<surname>Hopper</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Wells</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Drug repurposing: Progress, challenges and recommendations</article-title>. <source>Nat. Rev. Drug Discov.</source> <volume>10</volume>, <fpage>1841</fpage>&#x2013;<lpage>1858</lpage>. <pub-id pub-id-type="doi">10.1038/nrd.2018.168</pub-id>
</citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schcolnik-Cabrera</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Ju&#xe1;rez-L&#xf3;pez</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Perspectives on drug repurposing</article-title>. <source>Curr. Med. Chem.</source> <volume>28</volume>, <fpage>2085</fpage>&#x2013;<lpage>2099</lpage>. <pub-id pub-id-type="doi">10.2174/0929867327666200831141337</pub-id>
</citation>
</ref>
<ref id="B48">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shah</surname>
<given-names>H. A.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Feng</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Review of machine learning methods for the prediction and reconstruction of metabolic pathways</article-title>. <source>Front. Mol. Biosci.</source> <volume>17</volume>, <fpage>634141</fpage>. <pub-id pub-id-type="doi">10.3389/fmolb.2021.634141</pub-id>
</citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shen</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Knowledge discovery from biomedical ontologies in cross domains</article-title>. <source>PLoS One</source> <volume>11</volume>, <fpage>e0160005</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0160005</pub-id>
</citation>
</ref>
<ref id="B50">
<citation citation-type="book">
<collab>SIDER</collab> (<year>2023</year>). <source>Sider</source>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="http://sideeffects.embl.de/">http://sideeffects.embl.de/</ext-link> (accessed March 29, 2023)</comment>.</citation>
</ref>
<ref id="B51">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sun</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Biomedical named entity recognition using BERT in the machine reading comprehension framework</article-title>. <source>J. Biomed. Inf.</source> <volume>118</volume>, <fpage>103799</fpage>. <pub-id pub-id-type="doi">10.1016/j.jbi.2021.103799</pub-id>
</citation>
</ref>
<ref id="B52">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sun</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Why 90% of clinical drug development fails and how to improve it?</article-title> <source>Acta Pharm. Sin. B</source> <volume>12</volume>, <fpage>3049</fpage>&#x2013;<lpage>3062</lpage>. <pub-id pub-id-type="doi">10.1016/j.apsb.2022.02.002</pub-id>
</citation>
</ref>
<ref id="B53">
<citation citation-type="book">
<collab>The Human Phenotype Ontology</collab> (<year>2023</year>). <source>The human phenotype Ontology</source>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://hpo.jax.org/app/">https://hpo.jax.org/app/</ext-link>
</comment>(<comment>accessed March 29, 2023)</comment>.</citation>
</ref>
<ref id="B54">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Thomas</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Kipf</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Semi-supervised classification with graph convolutional networks</article-title>. <source>Int. Conf. Learn. Represent. (ICLR)</source>. <pub-id pub-id-type="doi">10.48550/arXiv.1609.02907</pub-id>
</citation>
</ref>
<ref id="B55">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Vellal</surname>
<given-names>A. D.</given-names>
</name>
<name>
<surname>Sirinukunwattan</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Kensler</surname>
<given-names>K. H.</given-names>
</name>
</person-group> (<year>2021</year>). <source>Deep learning image analysis of benign breast disease to identify subsequent risk of breast cancer</source>, <volume>5</volume>. <publisher-loc>Bethesda, MD</publisher-loc>: <publisher-name>JNCI Cancer Spectrum</publisher-name>, <fpage>pkaa119</fpage>.</citation>
</ref>
<ref id="B56">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Cui</surname>
<given-names>Q.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Inferring the human microRNA functional similarity and functional network based on microRNA-associated diseases</article-title>. <source>Bioinformatics</source> <volume>26</volume>, <fpage>1644</fpage>&#x2013;<lpage>1650</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btq241</pub-id>
</citation>
</ref>
<ref id="B57">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Drug repositioning by integrating target information through a heterogeneous network model</article-title>. <source>Bioinformatics</source> <volume>30</volume>, <fpage>2923</fpage>&#x2013;<lpage>2930</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btu403</pub-id>
</citation>
</ref>
<ref id="B58">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Deng</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Drug repositioning by kernel-based integration of molecular structure, molecular activity. and phenotype data</article-title>. <source>PLoS One</source> <volume>8</volume>, <fpage>e78518</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0078518</pub-id>
</citation>
</ref>
<ref id="B59">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wen</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Elwazir</surname>
<given-names>M. Y.</given-names>
</name>
<name>
<surname>Moon</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Fan</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Adapting and evaluating a deep learning language model for clinical why-question answering</article-title>. <source>JAMIA Open</source> <volume>3</volume>, <fpage>16</fpage>&#x2013;<lpage>20</lpage>. <pub-id pub-id-type="doi">10.1093/jamiaopen/ooz072</pub-id>
</citation>
</ref>
<ref id="B60">
<citation citation-type="book">
<collab>What is BERT (Language Model) and How Does It Work?</collab> (<year>2023</year>). <source>What is BERT (language model) and how does it work?</source> <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://www.techtarget.com/searchenterpriseai/definition/BERT-language-model">https://www.techtarget.com/searchenterpriseai/definition/BERT-language-model</ext-link> (accessed March 30, 2023)</comment>.</citation>
</ref>
<ref id="B61">
<citation citation-type="book">
<collab>WHO welcomes preliminary results about dexamethasone use in treating critically ill COVID-19 patients</collab> (<year>2023</year>). <source>WHO welcomes preliminary results about dexamethasone use in treating critically ill COVID-19 patients</source>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://www.who.int/news/item/16-06-2020-who-welcomes-preliminary-results-about-dexamethasone-use-in-treating-critically-ill-covid-19-patients">https://www.who.int/news/item/16-06-2020-who-welcomes-preliminary-results-about-dexamethasone-use-in-treating-critically-ill-covid-19-patients</ext-link> (accessed March 30, 2023)</comment>.</citation>
</ref>
<ref id="B62">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Gudivada</surname>
<given-names>R. C.</given-names>
</name>
<name>
<surname>Aronow</surname>
<given-names>B. J.</given-names>
</name>
<name>
<surname>Jegga</surname>
<given-names>A. G.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Computational drug repositioning through heterogeneous network clustering</article-title>. <source>BMC Syst. Biol.</source> <volume>Suppl 5</volume>, <fpage>S6</fpage>. <pub-id pub-id-type="doi">10.1186/1752-0509-7-S5-S6</pub-id>
</citation>
</ref>
<ref id="B63">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Pan</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Long</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>P. S.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>A comprehensive survey on graph neural networks</article-title>. <source>IEEE Trans. Neural Netw. Learn Syst.</source> <volume>32</volume>, <fpage>4</fpage>&#x2013;<lpage>24</lpage>. <pub-id pub-id-type="doi">10.1109/TNNLS.2020.2978386</pub-id>
</citation>
</ref>
<ref id="B64">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Drug repositioning based on bounded nuclear norm regularization</article-title>. <source>Bioinformatics</source> <volume>35</volume>, <fpage>i455</fpage>&#x2013;<lpage>i463</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btz331</pub-id>
</citation>
</ref>
<ref id="B65">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Xiao</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Predicting drug-disease associations through layer attention graph convolutional network</article-title>. <source>Brief. Bioinform</source> <volume>22</volume>, <fpage>bbaa243</fpage>. <pub-id pub-id-type="doi">10.1093/bib/bbaa243</pub-id>
</citation>
</ref>
<ref id="B66">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Yue</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>F.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>Predicting drug-disease associations by using similarity constrained matrix factorization</article-title>. <source>BMC Bioinforma.</source> <volume>19</volume>, <fpage>233</fpage>. <pub-id pub-id-type="doi">10.1186/s12859-018-2220-4</pub-id>
</citation>
</ref>
<ref id="B67">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>X. M.</given-names>
</name>
<name>
<surname>Liang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>M. J.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Graph neural networks and their current applications in bioinformatics</article-title>. <source>Front. Genet.</source> <volume>2021 29</volume>, <fpage>690049</fpage>. <pub-id pub-id-type="doi">10.3389/fgene.2021.690049</pub-id>
</citation>
</ref>
<ref id="B68">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zitnik</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Agrawal</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Leskovec</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Modeling polypharmacy side effects with graph convolutional networks</article-title>. <source>Bioinformatics</source> <volume>34</volume>, <fpage>i457</fpage>&#x2013;<lpage>i466</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/bty294</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>