<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Archiving and Interchange DTD v2.3 20070202//EN" "archivearticle.dtd">
<article article-type="methods-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Genet.</journal-id>
<journal-title>Frontiers in Genetics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Genet.</abbrev-journal-title>
<issn pub-type="epub">1664-8021</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1623008</article-id>
<article-id pub-id-type="doi">10.3389/fgene.2025.1623008</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Genetics</subject>
<subj-group>
<subject>Methods</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>GTMALoc: prediction of miRNA subcellular localization based on graph transformer and multi-head attention mechanism</article-title>
<alt-title alt-title-type="left-running-head">Huang et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fgene.2025.1623008">10.3389/fgene.2025.1623008</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Huang</surname>
<given-names>Xindi</given-names>
</name>
<uri xlink:href="https://loop.frontiersin.org/people/3055906/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Jiang</surname>
<given-names>Jipu</given-names>
</name>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Shi</surname>
<given-names>Lifen</given-names>
</name>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Yan</surname>
<given-names>Cheng</given-names>
</name>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1952028/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
</contrib-group>
<aff>
<institution>School of Informatics</institution>, <institution>Hunan University of Chinese Medicine</institution>, <addr-line>Changsha</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/173846/overview">Federica Calore</ext-link>, The Ohio State University, United States</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1744381/overview">Kai Zheng</ext-link>, Central South University, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1843023/overview">Xinfei Wang</ext-link>, Jilin University, China</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Cheng Yan, <email>yancheng01@hnucm.edu.cn</email>
</corresp>
</author-notes>
<pub-date pub-type="epub">
<day>19</day>
<month>06</month>
<year>2025</year>
</pub-date>
<pub-date pub-type="collection">
<year>2025</year>
</pub-date>
<volume>16</volume>
<elocation-id>1623008</elocation-id>
<history>
<date date-type="received">
<day>05</day>
<month>05</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>10</day>
<month>06</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2025 Huang, Jiang, Shi and Yan.</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Huang, Jiang, Shi and Yan</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>MicroRNAs (miRNAs) play a crucial role in regulating gene expression, and their subcellular localization is essential for understanding their biological functions. However, accurately predicting miRNA subcellular localization remains a challenging task due to their short sequences, complex structures, and diverse functions. To improve prediction accuracy, this study proposes a novel model based on a graph transformer and a multi-head attention mechanism. The model integrates multi-source features which include the miRNA sequence similarity network, miRNA functional similarity network, miRNA&#x2013;mRNA association network, miRNA&#x2013;drug association network, and miRNA&#x2013;disease association network. Specifically, we first apply the node2vec algorithm to extract features from these biological networks. Then, we use a graph transformer to capture relationships between nodes within the networks, enabling a better understanding of miRNA functions across different biological contexts. Next, a multi-head attention mechanism is implemented to combine miRNA features from multiple networks, allowing the model to capture deeper feature relationships and enhance prediction performance. Performance evaluation shows that the proposed method achieves significant improvements over current approaches on open-access datasets, achieving high performance with an AUC (area of receiver operating characteristic curve) of 0.9108 and AUPR(area of precision-recall curve) of 0.8102. It not only significantly improves prediction accuracy but also exhibits strong generalization and stability.</p>
</abstract>
<kwd-group>
<kwd>miRNA</kwd>
<kwd>subcellular localization</kwd>
<kwd>graph transformer</kwd>
<kwd>multi-head attention mechanism</kwd>
<kwd>multi-source features</kwd>
</kwd-group>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>RNA</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>MicroRNAs (miRNAs) are a class of small non-coding RNAs widely distributed in eukaryotic cells, typically around 22 nucleotides in length. They mainly regulate gene expression through the post-transcriptional processes <xref ref-type="bibr" rid="B17">Hombach and Kretz (2016)</xref>; <xref ref-type="bibr" rid="B16">Holley and Topkara (2011)</xref>. In organisms, miRNAs bind to specific target sites on mRNAs, causing their subsequent degradation or translational inhibition, thereby modulating key fundamental physiological processes like cell proliferation, differentiation, apoptosis, and immune system activation <xref ref-type="bibr" rid="B4">Bartel (2009)</xref>. Recent studies have shown that miRNAs have indispensable functions in a variety of human diseases, including cancer, neurodegenerative disorders, and cardiovascular diseases <xref ref-type="bibr" rid="B26">Li et al. (2023)</xref>; <xref ref-type="bibr" rid="B12">Dugger and Dickson (2017)</xref>. They also show great potential in drug response prediction, resistance mechanisms, and therapeutic target discovery <xref ref-type="bibr" rid="B28">Miska (2007)</xref>; <xref ref-type="bibr" rid="B30">Small and Olson (2011)</xref>. In this context, studying the subcellular localization of miRNAs is of great significance for understanding their regulatory networks and functional mechanisms <xref ref-type="bibr" rid="B23">Kabekkodu et al. (2018)</xref>; <xref ref-type="bibr" rid="B6">Catalanotto et al. (2016)</xref>; <xref ref-type="bibr" rid="B15">Gurtan and Sharp (2013)</xref>. Different subcellular localizations often suggest that miRNAs are involved in distinct biological processes. Accurate localization prediction not only facilitates the understanding of functional diversification of miRNAs but also provides theoretical support for early disease diagnosis and targeted therapy <xref ref-type="bibr" rid="B22">Jie et al. (2021)</xref>. Although conventional experimental methods, such as fluorescence <italic>in situ</italic> hybridization and subcellular fractionation combined with high-throughput sequencing, can directly determine miRNA distributions, these techniques are often complex, expensive, and lack scalability for large-scale samples <xref ref-type="bibr" rid="B33">Thomson and Dinger (2016)</xref>. Therefore, developing computational methods to efficiently predict miRNA subcellular localization has become a focal point in bioinformatics research. Currently, researchers have developed various machine learning models based on sequence information to explore potential miRNA localization patterns. For example, <xref ref-type="bibr" rid="B20">Huang et al. (2007)</xref> proposed a prediction framework combining k-mer frequency patterns with a Support Vector Machine (SVM) classifier, showing the feasibility of using sequence information for localization recognition. However, due to the short length and complex structure of miRNAs, as well as their heterogeneity across different tissues or disease states, models relying solely on sequence-level features often fail to capture the complete biological semantics, resulting in limited accuracy and generalization <xref ref-type="bibr" rid="B27">Li et al. (2014)</xref>. To address this, some studies have incorporated biological network information, such as the miRNA-mRNA interaction network <xref ref-type="bibr" rid="B18">Hsu et al. (2011)</xref>, the miRNA-disease association network <xref ref-type="bibr" rid="B21">Jiang et al. (2010)</xref>, and the miRNA-drug association network <xref ref-type="bibr" rid="B7">Chen H. et al. (2019)</xref>, to improve prediction accuracy. For instance, Xie et al. constructed a miRNA-target gene interaction network using Graph Convolutional Networks (GCNs) and applied deep learning to predict miRNA functions within cells <xref ref-type="bibr" rid="B14">Guan et al. (2022)</xref>. Li et al. integrated miRNA, disease, and drug information through a heterogeneous network and used graph embedding techniques for feature learning <xref ref-type="bibr" rid="B32">Sun et al. (2020)</xref>. Over the past few years, deep learning innovations have significantly advanced bioinformatics research. Convolutional Neural Networks (CNNs) have been used to retrieve sequence features of miRNAs&#x2014;such as in the DeepMirTar model, which utilized CNNs to improve target gene prediction accuracy <xref ref-type="bibr" rid="B38">Wen et al. (2018)</xref>. Recurrent Neural Networks (RNNs) have also been employed to capture sequential dependencies, as seen in MirLocNet, which uses Long Short-Term Memory (LSTM) networks to computationally infer miRNA subcellular localization <xref ref-type="bibr" rid="B9">Chen Q. et al. (2019)</xref>. Graph Neural Networks (GNNs) are widely applied in modeling biological networks. As proposed by Gao et al. (2022), a novel Graph Attention Networks (GATs) combined with biological network information to improve miRNA function prediction <xref ref-type="bibr" rid="B41">Zhao et al. (2022)</xref>.</p>
<p>In the field of miRNA subcellular localization, researchers have developed various models to enhance prediction accuracy and biological interpretability. MiRLoc <xref ref-type="bibr" rid="B39">Xu et al. (2022)</xref>, for instance, inferred miRNA spatial distribution by leveraging known mRNA localization and their interaction with miRNAs, reflecting their role in post-transcriptional regulation. MirLocPredictor <xref ref-type="bibr" rid="B1">Asim et al. (2020)</xref> incorporated CNNs and positional encoding of k-mers to enhance sequence representation for multi-label localization tasks. DAmirLocGNet <xref ref-type="bibr" rid="B2">Bai et al. (2023a)</xref> integrated Graph Convolutional Networks and autoencoders to jointly model miRNA sequence features, disease associations, and disease semantic networks, learning high-level representations from complex graph structures. Some existing excellent models provide us with references. For example, <xref ref-type="bibr" rid="B36">Wang X.-F. et al. (2024)</xref> proposed a multi-channel graph neural network framework that integrates multimodal similarity information with hypergraph contrastive learning, effectively identifying novel cancer biomarkers. <xref ref-type="bibr" rid="B35">Wang X.- et al. (2024)</xref> designed a directed graph neural network-based multi-view learning model capable of systematically extracting regulatory feature signals from multiple biological layers, enhancing the model&#x2019;s representational power. Additionally, <xref ref-type="bibr" rid="B37">Wang et al. (2022)</xref> developed KGDCMI, a method that integrates multi-source biological information with deep learning techniques to accurately predict interactions between circRNA and miRNA. Comparatively, PMiSLocMF <xref ref-type="bibr" rid="B8">Chen et al. (2024)</xref> fused heterogeneous data such as miRNA-mRNA, miRNA-drug, and miRNA-disease networks using a graph attention mechanism, achieving robust performance even in scenarios with sparse data or incomplete labels. Despite improvements, present architectures still face challenges such as inadequate information integration and underutilization of multi-head feature relationships. Effectively integrating multi-source information and building more expressive feature representations to improve miRNA subcellular localization prediction remains an urgent and critical problem.</p>
<p>To overcome these limitations, this study proposes a novel miRNA subcellular localization prediction model named GTMALoc, based on graph transformer and multi-head attention mechanisms. This approach effectively incorporates miRNA sequence information and their roles across different biological networks to improve prediction performance. Specifically, we first extract miRNA features from multiple biological networks&#x2013;including miRNA sequence similarity, miRNA-mRNA associations, miRNA-disease associations, and miRNA-drug associations&#x2014;using node2vec. Then, a graph transformer framework is applied to infer latent node correlations, offering better insight into miRNA functionality in different contexts. A multi-head attention mechanism is subsequently employed to integrate miRNA features across networks, capturing deeper, multi-head relational patterns and enhancing predictive performance. The evaluations show that our model outperforms mainstream methods in terms of accuracy, generalization, and stability on public datasets, demonstrating its effectiveness and feasibility in the miRNA subcellular localization task. Key improvements over existing methods provided by this study are:<list list-type="simple">
<list-item>
<p>(1) We propose a new miRNA subcellular localization prediction model that leverages graph transformer and multi-head attention mechanisms to integrate multi-source biological network information.</p>
</list-item>
<list-item>
<p>(2) Complex relationships within biological networks are modeled using node2vec and graph transformer to improve high-dimensional representations of miRNA features.</p>
</list-item>
<list-item>
<p>(3) A multi-head attention mechanism is employed to fuse heterogeneous network information, thereby strengthening inter-feature relationships and improving the prediction accuracy and generalization ability of model.</p>
</list-item>
</list>
</p>
</sec>
<sec sec-type="materials|methods" id="s2">
<title>2 Materials and methods</title>
<sec id="s2-1">
<title>2.1 Datasets</title>
<p>The dataset used in this study is sourced from version 2.0 of the RNALocate database <xref ref-type="bibr" rid="B10">Cui et al. (2022)</xref>, which systematically compiles a large number of experimentally support RNA subcellular localization records. From this database, we select a subset containing 1,041 miRNAs to construct and evaluate our model. To ensure biological consistency, all select miRNAs are included in the miRNA functional similarity network established in the MiRLoc <xref ref-type="bibr" rid="B39">Xu et al. (2022)</xref> study, facilitating the exploration of potential functional associations. In terms of localization annotation, these miRNAs are assigned to seven subcellular compartments: cytoplasm, nucleus, nucleolus, mitochondrion, exosome, microvesicle, and extracellular vesicle. The specific numbers are as follows: 870 exosomes, 825 microvesicle, 499 nucleus, 308 cytoplasms, 259 mitochondrion, 102 extracellular vesicle, and 67 nucleolus. This categorization not only covers the major cellular structures where miRNAs may reside but also reflects their diverse roles in intracellular and intercellular communication, providing a rich and challenging dataset for multi-label classification tasks.</p>
</sec>
<sec id="s2-2">
<title>2.2 Methods</title>
<p>In this study, we develop a multi-source feature fusion model, GTMALoc, for miRNA subcellular localization prediction, aiming to comprehensively capture miRNA characteristics in various biological contexts. This process is illustrated in <xref ref-type="fig" rid="F1">Figure 1</xref>. First, we extract structural features from several biological networks, including the miRNA sequence similarity network, miRNA&#x2013;mRNA regulatory network, miRNA&#x2013;disease association network, and miRNA&#x2013;drug interaction network. To preserve both local and global structural information within each network, we apply the node2vec algorithm to perform embedding learning on these heterogeneous graphs, thereby obtaining a representation vector for each miRNA under different semantic relations&#x2014;reflecting its functional characteristics in diverse biological environments. Next, we utilize the graph transformer model to process the graph embedding features. Leveraging its built-in structural awareness and self-attention mechanism, the model captures complex and variable dependencies among nodes, enabling a deeper understanding of miRNA behavior and influence across different networks. To achieve effective multi-source information fusion, we further introduce a multi-head attention mechanism to align and integrate miRNA representations from various networks. This allows the model to automatically uncover important cross-network interactions and latent high-level semantic relationships. The fusion strategy not only enhances the model&#x2019;s sensitivity to critical features but also significantly improves overall prediction accuracy and generalization performance.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>The architecture of the GTMALoc model.</p>
</caption>
<graphic xlink:href="fgene-16-1623008-g001.tif">
<alt-text content-type="machine-generated">Diagram illustrating a machine learning model for miRNA analysis. It features networks for miRNA sequence and function similarity, drug, mRNA, and disease associations processed through node2vec and Graph Transformers. Outputs are miRNA similarity, drug, mRNA, and disease association features. A Graph Transformer, employing multi-head attention and feed-forward layers, feeds into a concatenation and encoding process for predictive analysis of cellular localizations like cytoplasm and mitochondrion. Various network connections and transformations are visualized.</alt-text>
</graphic>
</fig>
</sec>
</sec>
<sec id="s3">
<title>3 miRNA networks</title>
<sec id="s3-1">
<title>3.1 miRNA sequence similarity network and miRNA functional similarity network</title>
<p>All miRNA sequence data are obtained from the authoritative database miRBase (version 22) <xref ref-type="bibr" rid="B24">Kozomara et al. (2019)</xref>, which provides experimentally validated miRNA sequences from humans and other species, and is widely used in miRNA research. To construct the miRNA sequence similarity network, we employ the Smith&#x2013;Waterman algorithm <xref ref-type="bibr" rid="B31">Smith and Waterman, (1981)</xref>, a classical local sequence alignment technique that precisely evaluates the similarity between two miRNA sequences in terms of base composition and order. Specifically, the algorithm uses dynamic programming to find optimal local alignments based on base matches, mismatches, and gap penalties, thereby computing a similarity score for each miRNA pair (<xref ref-type="disp-formula" rid="e1">Equation 1</xref>).<disp-formula id="e1">
<mml:math id="m1">
<mml:mrow>
<mml:mi mathvariant="normal">S</mml:mi>
<mml:mi mathvariant="normal">W</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>p</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:msqrt>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>p</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>s</mml:mi>
<mml:mi>p</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msqrt>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>
</p>
<p>
<inline-formula id="inf1">
<mml:math id="m2">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>p</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> represents the local alignment fraction of the two sequences, and a symmetrical similarity matrix can be obtained by performing the above alignment process between all miRNAs. The miRNA sequence similarity network was created according to the similarity matrix by assigning the miRNA as the node and the similarity score assigned to the corresponding edge as its weight. In order to develop a functional similarity network of miRNAs, we initially used the association data between miRNAs and diseases to construct a disease hierarchy with the help of medical subject headings (MeSH). Specifically, each disease <inline-formula id="inf2">
<mml:math id="m3">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is represented in an acyclic diagram (DAG) by a subgraph that includes the <inline-formula id="inf3">
<mml:math id="m4">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, and all of its higher-level diseases. For each disease <inline-formula id="inf4">
<mml:math id="m5">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> in the subgraph, its contribution to <inline-formula id="inf5">
<mml:math id="m6">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> can be expressed as (<xref ref-type="disp-formula" rid="e2">Equation 2</xref>):<disp-formula id="e2">
<mml:math id="m7">
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>a</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mtext>Depth</mml:mtext>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>where <inline-formula id="inf6">
<mml:math id="m8">
<mml:mrow>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>a</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> is the adjustment parameter, which <inline-formula id="inf7">
<mml:math id="m9">
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> represents the hierarchical distance between <inline-formula id="inf8">
<mml:math id="m10">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf9">
<mml:math id="m11">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. Next, the semantic value of disease <inline-formula id="inf10">
<mml:math id="m12">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is established by aggregating all node contributions within its subgraph (<xref ref-type="disp-formula" rid="e3">Equation 3</xref>):<disp-formula id="e3">
<mml:math id="m13">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>V</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>S</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:mi>C</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>let <inline-formula id="inf11">
<mml:math id="m14">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> denote the set containing disease <inline-formula id="inf12">
<mml:math id="m15">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and all of its ancestor diseases. The semantic similarity between two diseases, <inline-formula id="inf13">
<mml:math id="m16">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf14">
<mml:math id="m17">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, denoted as <inline-formula id="inf15">
<mml:math id="m18">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>m</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, can be calculated based on the overlap of their semantic values, which reflects their semantic similarity. For two miRNAs, <inline-formula id="inf16">
<mml:math id="m19">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf17">
<mml:math id="m20">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, let their associated disease sets be <inline-formula id="inf18">
<mml:math id="m21">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf19">
<mml:math id="m22">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, respectively. Their initial functional similarity can then be defined as the average semantic similarity between diseases in <inline-formula id="inf20">
<mml:math id="m23">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and those in <inline-formula id="inf21">
<mml:math id="m24">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> (<xref ref-type="disp-formula" rid="e4">Equation 4</xref>):<disp-formula id="e4">
<mml:math id="m25">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>S</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2229;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x7c;</mml:mo>
</mml:mrow>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2229;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:mtext>Sim</mml:mtext>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>
</p>
<p>The challenge of similarity underestimation arising from disease set sparsity is resolved through linear combination with miRNA GIP kernel similarity, generating robust functional similarity estimates (<xref ref-type="disp-formula" rid="e5">Equation 5</xref>).<disp-formula id="e5">
<mml:math id="m26">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
<mml:mtext>&#x2003;</mml:mtext>
<mml:mi>F</mml:mi>
<mml:mi>S</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2b;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mtext>&#x2003;</mml:mtext>
<mml:mi>G</mml:mi>
<mml:mi>I</mml:mi>
<mml:mi>P</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(5)</label>
</disp-formula>where <inline-formula id="inf22">
<mml:math id="m27">
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is a fusion parameter, and <inline-formula id="inf23">
<mml:math id="m28">
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mi>I</mml:mi>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> represents the similarity between the two miRNAs under the GIP kernel. Specifically, <inline-formula id="inf24">
<mml:math id="m29">
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is used to balance the contributions of functional similarity and GIP kernel similarity. We perform a grid search over the values [0.1, 0.3, 0.5, 0.7, 0.9], using multi-label classification performance as the evaluation criteria. The optimal value is found to be <inline-formula id="inf25">
<mml:math id="m30">
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> &#x3d; 0.5, which offered a good trade-off between generalization and robustness. For the threshold T used to binarize the similarity matrix, we adopt an empirical approach <xref ref-type="bibr" rid="B34">Wang et al. (2010)</xref>, adjusting T to control the sparsity of the resulting adjacency matrix. We ultimately set T to 0.6 to ensure a reasonable balance between sparsity and connectivity in the resulting graph. After calculating the similarity between all miRNA pairs, a similarity matrix is constructed. This matrix is then binarized using a predefined threshold T (<xref ref-type="disp-formula" rid="e6">Equation 6</xref>):<disp-formula id="e6">
<mml:math id="m31">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>ij</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="{" close="">
<mml:mrow>
<mml:mtable class="cases">
<mml:mtr>
<mml:mtd columnalign="left">
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mspace width="1em"/>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mi mathvariant="normal">i</mml:mi>
<mml:mi mathvariant="normal">f</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi mathvariant="normal">F</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="normal">S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="normal">m</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="normal">m</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="normal">j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3e;</mml:mo>
<mml:mi mathvariant="normal">T</mml:mi>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="left">
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mspace width="1em"/>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mi mathvariant="normal">o</mml:mi>
<mml:mi mathvariant="normal">t</mml:mi>
<mml:mi mathvariant="normal">h</mml:mi>
<mml:mi mathvariant="normal">e</mml:mi>
<mml:mi mathvariant="normal">r</mml:mi>
<mml:mi mathvariant="normal">w</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
<mml:mi mathvariant="normal">s</mml:mi>
<mml:mi mathvariant="normal">e</mml:mi>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
<label>(6)</label>
</disp-formula>
</p>
<p>The resulting adjacency matrix <inline-formula id="inf26">
<mml:math id="m32">
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> defines the miRNA functional similarity network, where nodes represent miRNAs and edges reflect their functional similarity.</p>
</sec>
<sec id="s3-2">
<title>3.2 miRNA-mRNA association network</title>
<p>The miRNA&#x2013;mRNA regulatory network in this study is primarily based on data from the authoritative miRTarBase (2020 version) <xref ref-type="bibr" rid="B19">Huang et al. (2020)</xref>, supplemented by a curated dataset of validated interactions compiled by <xref ref-type="bibr" rid="B39">Xu et al. (2022)</xref>. miRTarBase is known for its high-quality data, integrating miRNA&#x2013;target gene interactions supported by both low- and high-throughput experimental evidence, such as reporter gene assays, qRT-PCR, and Western blot. The constructed network contains 8,254 high-confidence regulatory relationships between 1,041 non-coding miRNAs and 2,836 protein-coding genes.</p>
</sec>
<sec id="s3-3">
<title>3.3 miRNA-drug association network</title>
<p>The miRNA&#x2013;drug association network is based on data from ncDR, a drug resistance research database <xref ref-type="bibr" rid="B11">Dai et al. (2017)</xref>, which collects experimentally verified and predicted interactions between non-coding RNAs and drugs. The data are standardized as follows: First, the 1,041 miRNAs involved in previous studies are matched based on miRBase nomenclature. Second, only interactions with clearly annotated drug resistance evidence (including preclinical or cell line experiments) are retained. This results in 3,305 high-confidence miRNA&#x2013;drug interactions involving 130 commonly used clinical drugs, such as cisplatin and gefitinib.</p>
</sec>
<sec id="s3-4">
<title>3.4 miRNA-diease association network</title>
<p>To construct the required network, this study references the dataset from HMDD v3.2 <xref ref-type="bibr" rid="B3">Bai et al. (2023b)</xref>, a widely-used human microRNA disease database. After curation and filtering, 15,547 miRNA&#x2013;disease association pairs are obtained, covering 1,041 miRNAs and 640 human diseases.</p>
</sec>
</sec>
<sec id="s4">
<title>4 Node2vec algorithm</title>
<p>Network modeling has emerged as a pivotal paradigm in biomedical research due to its intuitive representation of complex relationships, particularly in systematic miRNA analysis involving multimodal correlations. This study integrates four critical biological networks: the miRNA sequence similarity network (quantifying functional conservation), the miRNA&#x2013;disease association network (revealing pathological regulation), the miRNA&#x2013;drug interaction network (reflecting therapeutic targeting), and the miRNA&#x2013;mRNA regulatory network (decoding genetic circuitry). To effectively capture topological features from these non-Euclidean spatial data, we employ the node2vec algorithm <xref ref-type="bibr" rid="B13">Grover and Leskovec (2016)</xref>, a graph embedding approach based on adaptive random walk strategies. By tuning search parameters&#x2014;the return parameter p controlling local neighborhood sampling and the in-out parameter q governing global structural exploration&#x2014;this approach generates semantically preserved node sequences, subsequently vectorized through Skip-Gram modeling. Notably, we implement dimension-specific embedding strategies tailored to distinct network characteristics: 64-dimensional representations in sequence similarity networks to resolve fine-grained patterns of conserved functional motifs, <italic>versus</italic> 128-dimensional high-capacity embeddings in the three heterogeneous association networks to capture complex multi-hop interactions. This hierarchical embedding mechanism simultaneously reduces feature redundancy while preserving network-specific information, establishing an interpretable mathematical foundation for subsequent multi-view feature fusion.</p>
</sec>
<sec id="s5">
<title>5 Graph transformer</title>
<p>This study proposes a structure-aware graph neural network, the graph transformer, to learn high-quality node embeddings from graph structures. Unlike traditional GNNs, which struggle with sparse or heterogeneous structures, the graph transformer incorporates multi-head attention and a structure reconstruction loss, enabling better modeling of local and global graph information. First, the input miRNA functional similarity matrix and association matrix are feature fused. After generating the node feature matrix <inline-formula id="inf27">
<mml:math id="m33">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> at layer <inline-formula id="inf28">
<mml:math id="m34">
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, it dynamically rebuilds the attention weights matrix <inline-formula id="inf29">
<mml:math id="m35">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> through dot products of <inline-formula id="inf30">
<mml:math id="m36">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf31">
<mml:math id="m37">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>, enabling the joint evolution of topology and features. This gradient-preserving update critically suppresses false-positive edges caused by experimental noise. The model adopts Pre-Layer Normalization (Pre-LN), normalizing features before multi-head attention and feed-forward operations rather than after, which better accommodates high-dimensional biological feature propagation and curbs gradient vanishing in deep training. For extremely sparse data like miRNA-drug networks, binary attention masking embedded in multi-head layers automatically blocks unobserved associations (e.g., unknown miRNA-drug interactions), reducing computational complexity from <inline-formula id="inf32">
<mml:math id="m38">
<mml:mrow>
<mml:mi>O</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> to <inline-formula id="inf33">
<mml:math id="m39">
<mml:mrow>
<mml:mi>O</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>E</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> (where <inline-formula id="inf34">
<mml:math id="m40">
<mml:mrow>
<mml:mi>E</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> denotes valid edges) with less GPU memory consumption, while preventing noise from distorting attention weights. These improvements jointly optimize the model, and the output layer further sharpens the precision-recall curve through dot product similarity and L2 normalized feature constraints in the range of [0,1]. During embedding learning, the model stacks L layers of graph attention modules, where each node&#x2019;s representation is updated by aggregating the representations of its neighbors (<xref ref-type="disp-formula" rid="e7">Equation 7</xref>):<disp-formula id="e7">
<mml:math id="m41">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">h</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="normal">l</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi mathvariant="normal">&#x3c3;</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="normal">j</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">A</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="normal">i</mml:mi>
<mml:mi mathvariant="normal">j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="normal">l</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msubsup>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="normal">W</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="normal">l</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msup>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">h</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="normal">j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="normal">l</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn mathvariant="normal">1</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
<label>(7)</label>
</disp-formula>Here, <inline-formula id="inf35">
<mml:math id="m42">
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is an activation function, and <inline-formula id="inf36">
<mml:math id="m43">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>W</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is a learnable weight matrix. The attention weights <inline-formula id="inf37">
<mml:math id="m44">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> are calculated as (<xref ref-type="disp-formula" rid="e8">Equation 8</xref>):<disp-formula id="e8">
<mml:math id="m45">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="normal">i</mml:mi>
<mml:mi mathvariant="normal">j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi mathvariant="normal">l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>exp</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi mathvariant="normal">L</mml:mi>
<mml:mi mathvariant="normal">e</mml:mi>
<mml:mi mathvariant="normal">a</mml:mi>
<mml:mi mathvariant="normal">k</mml:mi>
<mml:mi mathvariant="normal">y</mml:mi>
<mml:mi mathvariant="normal">R</mml:mi>
<mml:mi mathvariant="normal">e</mml:mi>
<mml:mi mathvariant="normal">L</mml:mi>
<mml:mi mathvariant="normal">U</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="normal">a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x22a4;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="normal">W</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi mathvariant="normal">l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">h</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi mathvariant="normal">l</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn mathvariant="normal">1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo stretchy="false">&#x2016;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="normal">W</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi mathvariant="normal">l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">h</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="normal">j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi mathvariant="normal">l</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn mathvariant="normal">1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="normal">k</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>N</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>exp</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi mathvariant="normal">L</mml:mi>
<mml:mi mathvariant="normal">e</mml:mi>
<mml:mi mathvariant="normal">a</mml:mi>
<mml:mi mathvariant="normal">k</mml:mi>
<mml:mi mathvariant="normal">y</mml:mi>
<mml:mi mathvariant="normal">R</mml:mi>
<mml:mi mathvariant="normal">e</mml:mi>
<mml:mi mathvariant="normal">L</mml:mi>
<mml:mi mathvariant="normal">U</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="normal">a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x22a4;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="normal">W</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi mathvariant="normal">l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">h</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi mathvariant="normal">l</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn mathvariant="normal">1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo stretchy="false">&#x2016;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="normal">W</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi mathvariant="normal">l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">h</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="normal">k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi mathvariant="normal">l</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn mathvariant="normal">1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(8)</label>
</disp-formula>where <inline-formula id="inf38">
<mml:math id="m46">
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the attention weight vector, which <inline-formula id="inf39">
<mml:math id="m47">
<mml:mrow>
<mml:mo stretchy="false">&#x2016;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> represents the vector splicing operation. Graph transformer further introduces a multi-head attention mechanism, using K attention heads in parallel in each layer, and finally integrating their output splicing or averaging into the input of the next layer (<xref ref-type="disp-formula" rid="e9">Equation 9</xref>):<disp-formula id="e9">
<mml:math id="m48">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">h</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="normal">l</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn mathvariant="normal">1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="normal">K</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="normal">k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="normal">1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="normal">K</mml:mi>
</mml:mrow>
</mml:munderover>
</mml:mstyle>
<mml:mi mathvariant="normal">&#x3c3;</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="normal">j</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="normal">i</mml:mi>
<mml:mi mathvariant="normal">j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="normal">l</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="normal">k</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msubsup>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="normal">W</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="normal">l</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="normal">k</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msup>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">h</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="normal">j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="normal">l</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn mathvariant="normal">1</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
<label>(9)</label>
</disp-formula>In order to enhance the model&#x2019;s ability to express structural information, graph transformer also introduces structural reconstruction loss as the training goal. In the unsupervised setting, the model scores the node pairs of the real edges in the input graph, and defines the structural reconstruction similarity as (<xref ref-type="disp-formula" rid="e10">Equation 10</xref>):<disp-formula id="e10">
<mml:math id="m49">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="normal">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="normal">i</mml:mi>
<mml:mi mathvariant="normal">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="normal">&#x3c3;</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">h</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x22a4;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="normal">h</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="normal">j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(10)</label>
</disp-formula>where <inline-formula id="inf40">
<mml:math id="m50">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> are the final embeddings of nodes i and j, and <inline-formula id="inf41">
<mml:math id="m51">
<mml:mrow>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> denotes the sigmoid function. The structural reconstruction loss is formulated as (<xref ref-type="disp-formula" rid="e11">Equation 11</xref>):<disp-formula id="e11">
<mml:math id="m52">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>reconstruct</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>E</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2b;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:mi>log</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold">h</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x22a4;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold">h</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2212;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>E</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:mi>log</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold">h</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x22a4;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold">h</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(11)</label>
</disp-formula>where <inline-formula id="inf42">
<mml:math id="m53">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="normal">E</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2b;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> represents the set of true edges, and <inline-formula id="inf43">
<mml:math id="m54">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="normal">E</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> denotes negative-sampled non-edges to prevent degenerate solutions where all nodes become indistinguishable. This objective effectively preserves high separability of node connectivity patterns in the embedding space. The final node embeddings <inline-formula id="inf44">
<mml:math id="m55">
<mml:mrow>
<mml:mi>H</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> undergo L2-normalization for downstream tasks.</p>
</sec>
<sec id="s6">
<title>6 Multi-head attention mechanism</title>
<p>To capture cross-modal dependencies and interactions among various biological features, a Multi-Head Attention (MHA) module is introduced as the core feature interaction component in the fusion model. Based on the transformer encoder, MHA computes attention across multiple subspaces in parallel to enhance local and global correlation modeling. Four input feature types&#x2014;miRNA sequence, drug features, mRNA features, and disease features are first projected to a common 128-dimensional space using fully connected layers with L2 regularization. These are concatenated and reshaped into a 2D sequence format before being passed into the MHA module. The Multi-Head Attention Mechanism is calculated as follows (<xref ref-type="disp-formula" rid="e12">Equations 12</xref>, <xref ref-type="disp-formula" rid="e13">13</xref>):<disp-formula id="e12">
<mml:math id="m56">
<mml:mrow>
<mml:mi mathvariant="bold">Q</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="bold">H</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="bold">W</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>Q</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:mspace width="1em"/>
<mml:mi mathvariant="bold">K</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="bold">H</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="bold">W</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>K</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:mspace width="1em"/>
<mml:mi mathvariant="bold">V</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="bold">H</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="bold">W</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>V</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(12)</label>
</disp-formula>
<disp-formula id="e13">
<mml:math id="m57">
<mml:mrow>
<mml:mtext>Attention</mml:mtext>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi mathvariant="bold">Q</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold">K</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold">V</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mtext>softmax</mml:mtext>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mi mathvariant="bold">Q</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="bold">K</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x22a4;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mrow>
<mml:msqrt>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msqrt>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mfenced>
<mml:mi mathvariant="bold">V</mml:mi>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
<label>(13)</label>
</disp-formula>The outputs of all heads are concatenated and transformed, the formula is as follows (<xref ref-type="disp-formula" rid="e14">Equations 14</xref>, <xref ref-type="disp-formula" rid="e15">15</xref>):<disp-formula id="e14">
<mml:math id="m58">
<mml:mrow>
<mml:mi mathvariant="normal">M</mml:mi>
<mml:mi mathvariant="normal">H</mml:mi>
<mml:mi mathvariant="normal">A</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>H</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="normal">C</mml:mi>
<mml:mi mathvariant="normal">o</mml:mi>
<mml:mi mathvariant="normal">n</mml:mi>
<mml:mi mathvariant="normal">c</mml:mi>
<mml:mi mathvariant="normal">a</mml:mi>
<mml:mi mathvariant="normal">t</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="normal">h</mml:mi>
<mml:mi mathvariant="normal">e</mml:mi>
<mml:mi mathvariant="normal">a</mml:mi>
<mml:mi mathvariant="normal">d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="normal">h</mml:mi>
<mml:mi mathvariant="normal">e</mml:mi>
<mml:mi mathvariant="normal">a</mml:mi>
<mml:mi mathvariant="normal">d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:msup>
<mml:mrow>
<mml:mi>W</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(14)</label>
</disp-formula>
<disp-formula id="e15">
<mml:math id="m59">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>H</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="normal">L</mml:mi>
<mml:mi mathvariant="normal">a</mml:mi>
<mml:mi mathvariant="normal">y</mml:mi>
<mml:mi mathvariant="normal">e</mml:mi>
<mml:mi mathvariant="normal">r</mml:mi>
<mml:mi mathvariant="normal">N</mml:mi>
<mml:mi mathvariant="normal">o</mml:mi>
<mml:mi mathvariant="normal">r</mml:mi>
<mml:mi mathvariant="normal">m</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>H</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi mathvariant="normal">D</mml:mi>
<mml:mi mathvariant="normal">r</mml:mi>
<mml:mi mathvariant="normal">o</mml:mi>
<mml:mi mathvariant="normal">p</mml:mi>
<mml:mi mathvariant="normal">o</mml:mi>
<mml:mi mathvariant="normal">u</mml:mi>
<mml:mi mathvariant="normal">t</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi mathvariant="normal">M</mml:mi>
<mml:mi mathvariant="normal">H</mml:mi>
<mml:mi mathvariant="normal">A</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>H</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
<label>(15)</label>
</disp-formula>
</p>
<p>To further improve the representation capability, the multi-head attention output will be delivered through two layers of Feed-Forward Network (FFN), the formula is as follows (<xref ref-type="disp-formula" rid="e16">Equations 16</xref>, <xref ref-type="disp-formula" rid="e17">17</xref>):<disp-formula id="e16">
<mml:math id="m60">
<mml:mrow>
<mml:mi mathvariant="normal">F</mml:mi>
<mml:mi mathvariant="normal">F</mml:mi>
<mml:mi mathvariant="normal">N</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>H</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="normal">R</mml:mi>
<mml:mi mathvariant="normal">e</mml:mi>
<mml:mi mathvariant="normal">L</mml:mi>
<mml:mi mathvariant="normal">U</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>H</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>W</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:msub>
<mml:mrow>
<mml:mi>W</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(16)</label>
</disp-formula>
<disp-formula id="e17">
<mml:math id="m61">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>H</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>fusion</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="normal">L</mml:mi>
<mml:mi mathvariant="normal">a</mml:mi>
<mml:mi mathvariant="normal">y</mml:mi>
<mml:mi mathvariant="normal">e</mml:mi>
<mml:mi mathvariant="normal">r</mml:mi>
<mml:mi mathvariant="normal">N</mml:mi>
<mml:mi mathvariant="normal">o</mml:mi>
<mml:mi mathvariant="normal">r</mml:mi>
<mml:mi mathvariant="normal">m</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>H</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2b;</mml:mo>
<mml:mi mathvariant="normal">D</mml:mi>
<mml:mi mathvariant="normal">r</mml:mi>
<mml:mi mathvariant="normal">o</mml:mi>
<mml:mi mathvariant="normal">p</mml:mi>
<mml:mi mathvariant="normal">o</mml:mi>
<mml:mi mathvariant="normal">u</mml:mi>
<mml:mi mathvariant="normal">t</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi mathvariant="normal">F</mml:mi>
<mml:mi mathvariant="normal">F</mml:mi>
<mml:mi mathvariant="normal">N</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>H</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
<label>(17)</label>
</disp-formula>
</p>
<p>The final output represents the fused multimodal semantic embedding features, which are used as the input of the subsequent self-supervised learning projection head and the multi-label classification header, which not only retains the information of the original modal features, but also integrates the high-order correlation between them.</p>
</sec>
<sec id="s7">
<title>7 Prediction of miRNA subcellular localization</title>
<p>During forward propagation, the fused high-dimensional features are processed through the MHA and FFN modules. Finally, the classification head maps the features to predicted subcellular localization probabilities (<xref ref-type="disp-formula" rid="e18">Equation 18</xref>):<disp-formula id="e18">
<mml:math id="m62">
<mml:mrow>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>o</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">cls</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(18)</label>
</disp-formula>where <inline-formula id="inf45">
<mml:math id="m63">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">cls</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is a linear mapping and <inline-formula id="inf46">
<mml:math id="m64">
<mml:mrow>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>o</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> is the Sigmoid activation function that converts the output into a probability vector over [0, 1]. For each class, if the predicted probability exceeds the threshold (0.5), the class is labeled as positive; otherwise, negative&#x2013;resulting in the final binary classification output.</p>
</sec>
<sec sec-type="results" id="s8">
<title>8 Results</title>
<sec id="s8-1">
<title>8.1 10-Fold cross-validation</title>
<p>In our experiments, we employ 10-fold cross-validation to comprehensively assess the generalization ability of the model. The dataset is randomly shuffled and evenly divided into 10 subsets, each fold rotation assigned one decile to testing and nine to training, ensuring comprehensive parameter optimization. After training, the model generates predicted probabilities for each class on the test set, which are then mapped to the [0,1] range using the Sigmoid activation function and binarized with a threshold of 0.5. For each fold, we calculate the Area Under the ROC Curve (AUC) and the Area Under the Precision-Recall Curve (AUPR) as evaluation metrics, and record the results for each class. As shown in <xref ref-type="fig" rid="F2">Figures 2</xref>, <xref ref-type="fig" rid="F3">3</xref>, our model achieves an average AUC of 0.9108 and an average AUPR of 0.8102 on the multi-label subcellular localization task, fully demonstrating the model&#x2019;s effectiveness and robustness in capturing multimodal features and their high-order interactions.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>The Model GTMALoc 10-fold cross-validation AUC value.</p>
</caption>
<graphic xlink:href="fgene-16-1623008-g002.tif">
<alt-text content-type="machine-generated">ROC curves display the true positive rate versus the false positive rate for different classes, including cytoplasm, exosome, nucleolus, nucleus, extracellular vesicle, microvesicle, and mitochondrion. The AUC values range from 0.8843 for mitochondrion to 0.9260 for cytoplasm, indicating varying classification performance.</alt-text>
</graphic>
</fig>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>The Model GTMALoc 10-fold cross-validation AUPR value.</p>
</caption>
<graphic xlink:href="fgene-16-1623008-g003.tif">
<alt-text content-type="machine-generated">Precision-recall curves for eight classes: Cytoplasm (AUPR 0.9099), Exosome (0.9902), Nucleolus (0.5029), Nucleus (0.9359), Extracellular Vesicle (0.5289), Microvesicle (0.9902), Mitochondrion (0.8139). The curves illustrate the precision-recall trade-off for each class.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s8-2">
<title>8.2 Comparative experiments</title>
<p>To comprehensively evaluate the performance of the GTMALoc model, we use both 5-fold and 10-fold cross-validation strategies and systematically compare it with four existing methods (MiRLoc, MirLocPredictor, DAmiRLocGNet, and PMiSLocMF). The evaluation metrics include AUC and AUPR to thoroughly assess the model&#x2019;s effectiveness.</p>
<p>As shown in <xref ref-type="table" rid="T1">Table 1</xref>, GTMALoc achieves an average AUC score of 0.9094 under 5-fold cross-validation, outperforming the other methods across most subcellular localization categories. It performs particularly well in structurally complex or sparsely connected categories such as cytoplasm (0.9240), extracellular vesicle (0.9115), and microvesicle (0.9113), demonstrating its strength in integrating high-dimensional heterogeneous information and modeling complex relationships. <xref ref-type="table" rid="T2">Table 2</xref> presents the comparison based on AUPR, which primarily reflects the model&#x2019;s robustness in class-imbalanced scenarios. GTMALoc also achieves the highest average AUPR of 0.8044, showing excellent performance in critical functional regions such as exosome (0.9900), nucleus (0.9248), and microvesicle (0.9900). Although the score slightly decreases in the nucleolus (0.5142), where signals are sparse, the overall performance remains superior.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>AUC Performance Comparison of miRNA Subcellular Localization Models Based on 5-Fold Cross-Validation.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Subcellular localization</th>
<th align="center">MiRLoc</th>
<th align="center">MirLocPredictor</th>
<th align="center">DAmiRLocGNet</th>
<th align="center">PMiSLocMF</th>
<th align="center">GTMALoc</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Cytoplasm</td>
<td align="center">0.8356</td>
<td align="center">0.5740</td>
<td align="center">0.8601</td>
<td align="center">0.8901</td>
<td align="center">0.9240</td>
</tr>
<tr>
<td align="left">Exosome</td>
<td align="center">0.7391</td>
<td align="center">0.5839</td>
<td align="center">0.7043</td>
<td align="center">0.9503</td>
<td align="center">0.9251</td>
</tr>
<tr>
<td align="left">Nucleolus</td>
<td align="center">0.9080</td>
<td align="center">0.5289</td>
<td align="center">0.9280</td>
<td align="center">0.9254</td>
<td align="center">0.9271</td>
</tr>
<tr>
<td align="left">Nucleus</td>
<td align="center">0.7766</td>
<td align="center">0.6755</td>
<td align="center">0.7955</td>
<td align="center">0.8745</td>
<td align="center">0.8854</td>
</tr>
<tr>
<td align="left">Extracellular vesicle</td>
<td align="center">0.8001</td>
<td align="center">0.6330</td>
<td align="center">0.8317</td>
<td align="center">0.8634</td>
<td align="center">0.9115</td>
</tr>
<tr>
<td align="left">Microvesicle</td>
<td align="center">0.5093</td>
<td align="center">0.5967</td>
<td align="center">0.6729</td>
<td align="center">0.9369</td>
<td align="center">0.9113</td>
</tr>
<tr>
<td align="left">Mitochondrion</td>
<td align="center">0.7691</td>
<td align="center">0.6742</td>
<td align="center">0.8321</td>
<td align="center">0.8689</td>
<td align="center">0.8815</td>
</tr>
<tr>
<td align="left">Average AUC</td>
<td align="center">0.7625</td>
<td align="center">0.6094</td>
<td align="center">0.8035</td>
<td align="center">0.9013</td>
<td align="center">0.9094</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>AUPR Performance Comparison of miRNA Subcellular Localization Models Based on 5-Fold Cross-Validation.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Subcellular localization</th>
<th align="center">MiRLoc</th>
<th align="center">MirLocPredictor</th>
<th align="center">DAmiRLocGNet</th>
<th align="center">PMiSLocMF</th>
<th align="center">GTMALoc</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Cytoplasm</td>
<td align="center">0.7239</td>
<td align="center">0.8472</td>
<td align="center">0.7621</td>
<td align="center">0.8149</td>
<td align="center">0.8957</td>
</tr>
<tr>
<td align="left">Exosome</td>
<td align="center">0.9822</td>
<td align="center">0.8219</td>
<td align="center">0.9233</td>
<td align="center">0.9897</td>
<td align="center">0.9900</td>
</tr>
<tr>
<td align="left">Nucleolus</td>
<td align="center">0.4141</td>
<td align="center">0.4901</td>
<td align="center">0.5732</td>
<td align="center">0.5199</td>
<td align="center">0.5142</td>
</tr>
<tr>
<td align="left">Nucleus</td>
<td align="center">0.8111</td>
<td align="center">0.4313</td>
<td align="center">0.7945</td>
<td align="center">0.8869</td>
<td align="center">0.9248</td>
</tr>
<tr>
<td align="left">Extracellular vesicle</td>
<td align="center">0.2902</td>
<td align="center">0.3464</td>
<td align="center">0.4600</td>
<td align="center">0.4682</td>
<td align="center">0.5153</td>
</tr>
<tr>
<td align="left">Microvesicle</td>
<td align="center">0.9201</td>
<td align="center">0.2443</td>
<td align="center">0.8872</td>
<td align="center">0.9854</td>
<td align="center">0.9900</td>
</tr>
<tr>
<td align="left">Mitochondrion</td>
<td align="center">0.5189</td>
<td align="center">0.3111</td>
<td align="center">0.6867</td>
<td align="center">0.7275</td>
<td align="center">0.8009</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Furthermore, as indicated in <xref ref-type="table" rid="T3">Table 3</xref>, GTMALoc&#x2019;s average AUC increases to 0.9108 under 10-fold cross-validation, further confirming the model&#x2019;s stability and generalization across different data splits. Its outstanding performance in categories such as nucleolus, cytoplasm, and exosome highlights its ability to accurately identify miRNAs localized in these regions. This advantage is largely attributed to the multi-head attention mechanism, which effectively captures complex sequence patterns and graph-structured information. As shown in <xref ref-type="table" rid="T4">Table 4</xref>, although GTMALoc&#x2019;s AUPR scores for some sub-tasks are slightly lower than those of PMiSLocMF, the overall average AUPR reaches 0.8102, demonstrating strong resilience to data imbalance. We observe significant performance differences across localization categories: exosome and microvesicle achieve near-perfect AUPR scores, indicating successful recognition of key regional features, while performance in nucleolus and extracellular vesicle is relatively lower, likely due to insufficient positive samples and data sparsity. This suggests that future work should focus on improving data quality or adopting sample augmentation strategies to enhance performance in low-signal categories. Overall, the experimental results demonstrate that GTMALoc consistently exhibits strong predictive power and generalization ability under various cross-validation strategies, confirming its feasibility and practicality as a reliable tool for miRNA subcellular localization prediction.</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>AUC Performance Comparison of miRNA Subcellular Localization Models Based on 10-Fold Cross-Validation.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Subcellular localization</th>
<th align="center">MiRLoc</th>
<th align="center">MirLocPredictor</th>
<th align="center">DAmiRLocGNet</th>
<th align="center">PMiSLocMF</th>
<th align="center">GTMALoc</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Cytoplasm</td>
<td align="center">0.8366</td>
<td align="center">0.5741</td>
<td align="center">0.8606</td>
<td align="center">0.8909</td>
<td align="center">
<bold>0.9260</bold>
</td>
</tr>
<tr>
<td align="left">Exosome</td>
<td align="center">0.7395</td>
<td align="center">0.5842</td>
<td align="center">0.7051</td>
<td align="center">
<bold>0.9513</bold>
</td>
<td align="center">0.9177</td>
</tr>
<tr>
<td align="left">Nucleolus</td>
<td align="center">0.9085</td>
<td align="center">0.5286</td>
<td align="center">
<bold>0.9289</bold>
</td>
<td align="center">0.9267</td>
<td align="center">0.9258</td>
</tr>
<tr>
<td align="left">Nucleus</td>
<td align="center">0.7765</td>
<td align="center">0.6752</td>
<td align="center">0.7960</td>
<td align="center">0.8764</td>
<td align="center">
<bold>0.8916</bold>
</td>
</tr>
<tr>
<td align="left">Extracellular Vesicle</td>
<td align="center">0.8003</td>
<td align="center">0.6335</td>
<td align="center">0.8350</td>
<td align="center">0.8574</td>
<td align="center">
<bold>0.9193</bold>
</td>
</tr>
<tr>
<td align="left">Microvesicle</td>
<td align="center">0.5099</td>
<td align="center">0.5973</td>
<td align="center">0.6757</td>
<td align="center">
<bold>0.9502</bold>
</td>
<td align="center">0.9110</td>
</tr>
<tr>
<td align="left">Mitochondrion</td>
<td align="center">0.7694</td>
<td align="center">0.6758</td>
<td align="center">0.8332</td>
<td align="center">0.8702</td>
<td align="center">
<bold>0.8843</bold>
</td>
</tr>
<tr>
<td align="left">Average AUC</td>
<td align="center">0.7630</td>
<td align="center">0.6098</td>
<td align="center">0.8049</td>
<td align="center">0.9033</td>
<td align="center">
<bold>0.9108</bold>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The bold values indicate the best result.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>AUPR Performance Comparison of miRNA Subcellular Localization Models Based on 10-Fold Cross-Validation.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Subcellular localization</th>
<th align="center">MiRLoc</th>
<th align="center">MirLocPredictor</th>
<th align="center">DAmiRLocGNet</th>
<th align="center">PMiSLocMF</th>
<th align="center">GTMALoc</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Cytoplasm</td>
<td align="center">0.7258</td>
<td align="center">0.8391</td>
<td align="center">0.7636</td>
<td align="center">0.8192</td>
<td align="center">
<bold>0.9099</bold>
</td>
</tr>
<tr>
<td align="left">Exosome</td>
<td align="center">0.9892</td>
<td align="center">0.8248</td>
<td align="center">0.9248</td>
<td align="center">
<bold>0.9905</bold>
</td>
<td align="center">0.9902</td>
</tr>
<tr>
<td align="left">Nucleolus</td>
<td align="center">0.4148</td>
<td align="center">0.4925</td>
<td align="center">0.5739</td>
<td align="center">
<bold>0.5298</bold>
</td>
<td align="center">0.5029</td>
</tr>
<tr>
<td align="left">Nucleus</td>
<td align="center">0.8102</td>
<td align="center">0.4349</td>
<td align="center">0.7961</td>
<td align="center">0.8763</td>
<td align="center">
<bold>0.9359</bold>
</td>
</tr>
<tr>
<td align="left">Extracellular Vesicle</td>
<td align="center">0.2916</td>
<td align="center">0.3434</td>
<td align="center">0.4619</td>
<td align="center">0.4695</td>
<td align="center">
<bold>0.5289</bold>
</td>
</tr>
<tr>
<td align="left">Microvesicle</td>
<td align="center">0.9203</td>
<td align="center">0.2469</td>
<td align="center">0.8883</td>
<td align="center">0.9866</td>
<td align="center">
<bold>0.9902</bold>
</td>
</tr>
<tr>
<td align="left">Mitochondrion</td>
<td align="center">0.5277</td>
<td align="center">0.3113</td>
<td align="center">0.6882</td>
<td align="center">0.7294</td>
<td align="center">
<bold>0.8139</bold>
</td>
</tr>
<tr>
<td align="left">Average AUPR</td>
<td align="center">0.6689</td>
<td align="center">0.4990</td>
<td align="center">0.7281</td>
<td align="center">0.7716</td>
<td align="center">
<bold>0.8102</bold>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The bold values indicate the best result.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s8-3">
<title>8.3 Ablation study</title>
<p>To validate the contribution of each submodule within the overall architecture, we conduct detailed ablation studies by sequentially removing key components of the model and observing the resulting AUC performance on the multi-label subcellular localization task. As shown in <xref ref-type="fig" rid="F4">Figure 4</xref>, we design five ablation settings: removing the miRNA-disease association network, removing the miRNA-drug interaction network, removing the miRNA-mRNA regulatory network, removing the graph transformer module, and removing the multi-head attention mechanism. All other modules remain unchanged across experiments to ensure a consistent model structure and fair evaluation. Each module contributes positively to the model&#x2019;s overall performance, especially the graph transformer and multi-head attention modules, which play crucial roles in capturing high-order cross-modal interactions and both local and global structural features.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>The Model GTMALoc ablation experiment.</p>
</caption>
<graphic xlink:href="fgene-16-1623008-g004.tif">
<alt-text content-type="machine-generated">Bar chart titled &#x22;Ablation Study of GTMALoc Model&#x22; showing AUC scores. Bars represent different components: &#x22;w/o miRNA-Disease&#x22; (0.9101), &#x22;w/o miRNA-Drug&#x22; (0.9104), &#x22;w/o miRNA-mRNA&#x22; (0.9105), &#x22;w/o GraphTransformer&#x22; (0.9095), &#x22;w/o MultiHeadAttn&#x22; (0.9098), and &#x22;GTMALoc&#x22; (0.9108).</alt-text>
</graphic>
</fig>
</sec>
<sec id="s8-4">
<title>8.4 Parameter study</title>
<p>To investigate the effect of the number of attention heads on model performance, we systematically evaluate different configurations (2, 4, and eight heads) on the validation set, as shown in <xref ref-type="fig" rid="F5">Figure 5</xref>. A grid search strategy fixes other hyperparameters while varying the number of attention heads to observe sensitivity in the AUC metric. Results show that the model achieves peak performance (AUC &#x3d; 0.9108) when the number of heads is set to 4, outperforming the 2-head and 8-head configurations by 0.0005 and 0.0006, respectively. This can be attributed to two main factors: (1) a moderate number of heads helps capture complementary interaction patterns in parallel subspaces, enhancing the model&#x2019;s ability to fuse features across networks; (2) exceeding the optimal number of heads leads to redundancy in attention weights and an increased risk of local overfitting. Therefore, we adopt the 4-head configuration in the final architecture, balancing computational efficiency and predictive accuracy.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>The Model GTMALoc parameter experiment.</p>
</caption>
<graphic xlink:href="fgene-16-1623008-g005.tif">
<alt-text content-type="machine-generated">Bar chart displaying AUC scores for different numbers of attention heads. The scores are 0.9103 for 2 heads, 0.9108 for 4 heads, and 0.9102 for 8 heads, with little variation.</alt-text>
</graphic>
</fig>
</sec>
</sec>
<sec id="s9">
<title>9 Case studies</title>
<p>To further demonstrate the practical utility of GTMALoc in predicting miRNA subcellular localization, we conduct case studies across seven subcellular categories: cytoplasm, exosomes, nucleolus, nucleus, extracellular vesicles, microvesicles, and mitochondrion. For each compartment, we select the top five miRNAs with the highest predicted probabilities generated by GTMALoc. We then manually verify these predictions against experimental evidence reported in the scientific literature. In total, 35 miRNA&#x2013;localization associations are examined. As shown in <xref ref-type="table" rid="T4">Table 5</xref> of these are supported by published studies, while only five lack current experimental validation.</p>
<table-wrap id="T5" position="float">
<label>TABLE 5</label>
<caption>
<p>Case studies of miRNA subcellular localizations.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Rank</th>
<th align="center">miRNA</th>
<th align="center">Localization</th>
<th align="center">Evidence</th>
<th align="center">Rank</th>
<th align="center">miRNA</th>
<th align="center">Localization</th>
<th align="center">Evidence</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">1</td>
<td align="left">miR-122</td>
<td align="left">Cytoplasm</td>
<td align="left">PMID:34073601</td>
<td align="center">4</td>
<td align="left">miR-10a</td>
<td align="left">Nucleus</td>
<td align="left">PMID:30405557</td>
</tr>
<tr>
<td align="center">2</td>
<td align="left">miR-16</td>
<td align="left">Cytoplasm</td>
<td align="left">PMID:26304540</td>
<td align="center">5</td>
<td align="left">miR-26a</td>
<td align="left">Nucleus</td>
<td align="left">PMID:26304540</td>
</tr>
<tr>
<td align="center">3</td>
<td align="left">miR-34a</td>
<td align="left">Cytoplasm</td>
<td align="left">Unconfirmed</td>
<td align="center">1</td>
<td align="left">miR-142-3p</td>
<td align="left">Extracellular vesicle</td>
<td align="left">PMID:36277256</td>
</tr>
<tr>
<td align="center">4</td>
<td align="left">miR-146b-5p</td>
<td align="left">Cytoplasm</td>
<td align="left">PMID:37108595</td>
<td align="center">2</td>
<td align="left">miR-92a</td>
<td align="left">Extracellular vesicle</td>
<td align="left">PMID:22506055</td>
</tr>
<tr>
<td align="center">5</td>
<td align="left">miR-21</td>
<td align="left">Cytoplasm</td>
<td align="left">PMID:30405557</td>
<td align="center">3</td>
<td align="left">miR-221</td>
<td align="left">Extracellular vesicle</td>
<td align="left">PMID:28304367</td>
</tr>
<tr>
<td align="center">1</td>
<td align="left">miR-21</td>
<td align="left">Exosome</td>
<td align="left">PMID:29515311</td>
<td align="center">4</td>
<td align="left">miR-48</td>
<td align="left">Extracellular vesicle</td>
<td align="left">Unconfirmed</td>
</tr>
<tr>
<td align="center">2</td>
<td align="left">miR-126</td>
<td align="left">Exosome</td>
<td align="left">PMID:22506055</td>
<td align="center">5</td>
<td align="left">miR-155</td>
<td align="left">Extracellular vesicle</td>
<td align="left">PMID:22424232</td>
</tr>
<tr>
<td align="center">3</td>
<td align="left">miR-155</td>
<td align="left">Exosome</td>
<td align="left">PMID:22424232</td>
<td align="center">1</td>
<td align="left">miR-126</td>
<td align="left">Microvesicle</td>
<td align="left">PMID:34881308</td>
</tr>
<tr>
<td align="center">4</td>
<td align="left">miR-16</td>
<td align="left">Exosome</td>
<td align="left">PMID:22506055</td>
<td align="center">2</td>
<td align="left">miR-143</td>
<td align="left">Microvesicle</td>
<td align="left">PMID:31626610</td>
</tr>
<tr>
<td align="center">5</td>
<td align="left">miR-224</td>
<td align="left">Exosome</td>
<td align="left">PMID:30765428</td>
<td align="center">3</td>
<td align="left">miR-223</td>
<td align="left">Microvesicle</td>
<td align="left">PMID:31626610</td>
</tr>
<tr>
<td align="center">1</td>
<td align="left">miR-206</td>
<td align="left">Nucleolus</td>
<td align="left">PMID:19723800</td>
<td align="center">4</td>
<td align="left">miR-199a</td>
<td align="left">Microvesicle</td>
<td align="left">PMID:34881308</td>
</tr>
<tr>
<td align="center">2</td>
<td align="left">miR-340-5p</td>
<td align="left">Nucleolus</td>
<td align="left">PMID:19723800</td>
<td align="center">5</td>
<td align="left">miR-21</td>
<td align="left">Microvesicle</td>
<td align="left">PMID:31626610</td>
</tr>
<tr>
<td align="center">3</td>
<td align="left">miR-149</td>
<td align="left">Nucleolus</td>
<td align="left">PMID:31732639</td>
<td align="center">1</td>
<td align="left">miR-1</td>
<td align="left">Mitochondrion</td>
<td align="left">PMID:38205681</td>
</tr>
<tr>
<td align="center">4</td>
<td align="left">miR-21</td>
<td align="left">Nucleolus</td>
<td align="left">PMID:26674922</td>
<td align="center">2</td>
<td align="left">miR-365</td>
<td align="left">Mitochondrion</td>
<td align="left">PMID:19941672</td>
</tr>
<tr>
<td align="center">5</td>
<td align="left">miR-1</td>
<td align="left">Nucleolus</td>
<td align="left">Unconfirmed</td>
<td align="center">3</td>
<td align="left">miR-302a</td>
<td align="left">Mitochondrion</td>
<td align="left">PMID:19941672</td>
</tr>
<tr>
<td align="center">1</td>
<td align="left">miR-29b</td>
<td align="left">Nucleus</td>
<td align="left">PMID:26304540</td>
<td align="center">4</td>
<td align="left">miR-37</td>
<td align="left">Mitochondrion</td>
<td align="left">Unconfirmed</td>
</tr>
<tr>
<td align="center">2</td>
<td align="left">miR-320</td>
<td align="left">Nucleus</td>
<td align="left">PMID:26674922</td>
<td align="center">5</td>
<td align="left">miR-7</td>
<td align="left">Mitochondrion</td>
<td align="left">PMID:38205681</td>
</tr>
<tr>
<td align="center">3</td>
<td align="left">miR-9</td>
<td align="left">Nucleus</td>
<td align="left">Unconfirmed</td>
<td align="left"/>
<td align="left"/>
<td align="left"/>
<td align="left"/>
</tr>
</tbody>
</table>
</table-wrap>
<p>Taking miR-122 and miR-21 as representative examples, we analyze the alignment between GTMALoc&#x2019;s predictions and reported biological findings. miR-122 is a liver-specific miRNA that is highly enriched in hepatocytes, and its cytoplasmic localization is well supported by experimental evidence <xref ref-type="bibr" rid="B40">Zhang et al. (2021)</xref>. Previous studies indicate that miR-122 plays a crucial role in liver homeostasis by regulating lipid metabolism, cholesterol biosynthesis, and HCV replication through mRNA binding <xref ref-type="bibr" rid="B29">Ren et al. (2008)</xref>. GTMALoc assigns a high confidence score of 0.98 for its cytoplasmic localization and captures its interactions with liver metabolism-related mRNA nodes, highlighting the model&#x2019;s capacity to extract biologically meaningful features from the molecular network. In contrast, miR-21 is known for its multi-localization behavior and is highly expressed in various cancer types. It has been shown to be secreted via exosomes, contributing to immune modulation and tumor microenvironment remodeling <xref ref-type="bibr" rid="B25">Krishnamurthy et al. (2018)</xref>, and also localizes in the nucleolus, where it may influence non-coding RNA processing <xref ref-type="bibr" rid="B5">Beckett et al. (2015)</xref>. GTMALoc successfully predicts both localizations with high confidence and focuses attention on miR-21&#x2019;s connections to tumor-associated signaling pathways, consistent with its known roles in cell proliferation, anti-apoptosis, and inflammatory response. These case studies suggest that GTMALoc not only achieves accurate subcellular localization predictions but also provides biologically interpretable outputs, particularly for multi-localized miRNAs, offering valuable insights for downstream functional analysis and subcellular mechanism exploration.</p>
</sec>
<sec sec-type="conclusion" id="s10">
<title>10 Conclusion</title>
<p>In this study, we propose a computational model, GTMALoc, for predicting miRNA subcellular localization. GTMALoc combines graph transformers with a multi-head attention mechanism to fuse heterogeneous biological information from multiple sources. Specifically, the model effectively integrates miRNA sequence features, interaction network structures, and functional properties. Through graph-based modeling and dynamic attention weighting, GTMALoc learns more discriminative high-dimensional feature representations, significantly improving the accuracy of localization prediction. We conduct a comprehensive evaluation of GTMALoc on public datasets. The results show that GTMALoc outperforms existing methods on multiple performance metrics, especially in handling sparse graph structures and high-dimensional feature spaces. Ablation studies confirm the key contributions of each feature modality and attention component to the model&#x2019;s overall performance. Additionally, through representative case studies, we validate the biological interpretability of GTMALoc. The predicted subcellular localizations are not only consistent with known miRNA functions reported in the literature but also reveal potential regulatory modes that have not been fully explored. Given that miRNA localization is closely related to its regulatory roles in various cellular contexts, accurate localization prediction provides valuable insights into miRNA-mediated mechanisms under physiological and pathological conditions.</p>
<p>Although GTMALoc performs well in various experiments, it still faces some limitations. We integrate multiple heterogeneous features, such as sequence data, functional similarity, and molecular interaction networks; however, biological data often contain noise and incompleteness. For example, the functional annotations of many miRNAs remain incomplete, and some interaction networks may contain missing data or experimental biases, which can adversely affect the accuracy of feature learning. Additionally, differences among data sources in species, experimental conditions, or time points introduce biases and impair the model&#x2019;s generalizability. In future work, we plan to further refine the model architecture to improve its interpretability, adaptability, and applicability in real-world biomedical research scenarios.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s11">
<title>Data availability statement</title>
<p>Publicly available datasets were analyzed in this study. This data can be found here: The datasets for this study are openly available in the public domain: RNALocate v2.0 at <ext-link ext-link-type="uri" xlink:href="http://www.rnalocate.org/">http://www.rnalocate.org/</ext-link> or <ext-link ext-link-type="uri" xlink:href="http://www.rna-society.org/rnalocate/">http://www.rna-society.org/rnalocate/</ext-link>. The code and data that support the findings of this study are available at <ext-link ext-link-type="uri" xlink:href="https://github.com/27167199/GTMALoc">https://github.com/27167199/GTMALoc</ext-link>.</p>
</sec>
<sec sec-type="author-contributions" id="s12">
<title>Author contributions</title>
<p>XH: Conceptualization, Methodology, Writing &#x2013; original draft. JJ: Writing &#x2013; original draft, Software, Methodology. LS: Writing &#x2013; original draft. CY: Supervision, Project administration, Writing &#x2013; review and editing.</p>
</sec>
<sec sec-type="funding-information" id="s13">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research and/or publication of this article. Authors declare that the financial support is provided by the National Natural Science Foundation of China (Grant Nos. 62473149 and 61962050), the Natural Science Foundation of Hunan Province, China (Grant No. 2022JJ30428), and the Excellent Youth Funding Program of the Hunan Provincial Education Department (Grant No. 22B0372).</p>
</sec>
<sec sec-type="COI-statement" id="s14">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="s15">
<title>Generative AI statement</title>
<p>The author(s) declare that no Generative AI was used in the creation of this manuscript.</p>
</sec>
<sec sec-type="disclaimer" id="s16">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Asim</surname>
<given-names>M. N.</given-names>
</name>
<name>
<surname>Malik</surname>
<given-names>M. I.</given-names>
</name>
<name>
<surname>Zehe</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Trygg</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Dengel</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Ahmed</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Mirlocpredictor: a convnet-based multi-label microrna subcellular localization predictor by incorporating k-mer positional information</article-title>. <source>Genes</source> <volume>11</volume>, <fpage>1475</fpage>. <pub-id pub-id-type="doi">10.3390/genes11121475</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bai</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Yan</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2023a</year>). <article-title>Damirlocgnet: mirna subcellular localization prediction by combining mirna&#x2013;disease associations and graph convolutional networks</article-title>. <source>Briefings Bioinforma.</source> <volume>24</volume>, <fpage>bbad212</fpage>.</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bai</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Yan</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2023b</year>). <article-title>Damirlocgnet: mirna subcellular localization prediction by combining mirna&#x2013;disease associations and graph convolutional networks</article-title>. <source>Briefings Bioinforma.</source> <volume>24</volume>, <fpage>bbad212</fpage>. <pub-id pub-id-type="doi">10.1093/bib/bbad212</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bartel</surname>
<given-names>D. P.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Micrornas: target recognition and regulatory functions</article-title>. <source>cell</source> <volume>136</volume>, <fpage>215</fpage>&#x2013;<lpage>233</lpage>. <pub-id pub-id-type="doi">10.1016/j.cell.2009.01.002</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Beckett</surname>
<given-names>E. L.</given-names>
</name>
<name>
<surname>Martin</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Choi</surname>
<given-names>J. H.</given-names>
</name>
<name>
<surname>King</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Niblett</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Boyd</surname>
<given-names>L.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Folate status, folate-related genes and serum mir-21 expression: implications for mir-21 as a biomarker</article-title>. <source>BBA Clin.</source> <volume>4</volume>, <fpage>45</fpage>&#x2013;<lpage>51</lpage>. <pub-id pub-id-type="doi">10.1016/j.bbacli.2015.06.006</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Catalanotto</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Cogoni</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Zardo</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Microrna in control of gene expression: an overview of nuclear functions</article-title>. <source>Int. J. Mol. Sci.</source> <volume>17</volume>, <fpage>1712</fpage>. <pub-id pub-id-type="doi">10.3390/ijms17101712</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Feng</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2019a</year>). <article-title>Prediction and interpretation of mirna-disease associations based on mirna target genes using canonical correlation analysis</article-title>. <source>BMC Bioinforma.</source> <volume>20</volume>, <fpage>404</fpage>&#x2013;<lpage>408</lpage>. <pub-id pub-id-type="doi">10.1186/s12859-019-2998-8</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Gu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Pmislocmf: predicting mirna subcellular localizations by incorporating multi-source features of mirnas</article-title>. <source>Briefings Bioinforma.</source> <volume>25</volume>, <fpage>bbae386</fpage>. <pub-id pub-id-type="doi">10.1093/bib/bbae386</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Zhe</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Lan</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2019b</year>). <article-title>Identifying mirna-disease association based on integrating mirna topological similarity and functional similarity</article-title>. <source>Quant. Biol.</source> <volume>7</volume>, <fpage>202</fpage>&#x2013;<lpage>209</lpage>. <pub-id pub-id-type="doi">10.1007/s40484-019-0176-7</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cui</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Dou</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Tan</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Ni</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Rnalocate v2. 0: an updated resource for rna subcellular localization with increased coverage and annotation</article-title>. <source>Nucleic acids Res.</source> <volume>50</volume>, <fpage>D333</fpage>&#x2013;<lpage>D339</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkab825</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dai</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>An</surname>
<given-names>W.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>ncdr: a comprehensive resource of non-coding rnas involved in drug resistance</article-title>. <source>Bioinformatics</source> <volume>33</volume>, <fpage>4010</fpage>&#x2013;<lpage>4011</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btx523</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dugger</surname>
<given-names>B. N.</given-names>
</name>
<name>
<surname>Dickson</surname>
<given-names>D. W.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Pathology of neurodegenerative diseases</article-title>. <source>Cold Spring Harb. Perspect. Biol.</source> <volume>9</volume>, <fpage>a028035</fpage>. <pub-id pub-id-type="doi">10.1101/cshperspect.a028035</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Grover</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Leskovec</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>node2vec: scalable feature learning for networks</article-title>,&#x201d; in <source>Proceedings of the 22nd ACM SIGKDD international conference on Knowledge discovery and data mining</source>, <fpage>855</fpage>&#x2013;<lpage>864</lpage>.</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guan</surname>
<given-names>Y.-J.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>C.-Q.</given-names>
</name>
<name>
<surname>Qiao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>L.-P.</given-names>
</name>
<name>
<surname>You</surname>
<given-names>Z.-H.</given-names>
</name>
<name>
<surname>Ren</surname>
<given-names>Z.-H.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Mfidma: a multiple information integration model for the prediction of drug&#x2013;mirna associations</article-title>. <source>Biology</source> <volume>12</volume>, <fpage>41</fpage>. <pub-id pub-id-type="doi">10.3390/biology12010041</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gurtan</surname>
<given-names>A. M.</given-names>
</name>
<name>
<surname>Sharp</surname>
<given-names>P. A.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>The role of mirnas in regulating gene expression networks</article-title>. <source>J. Mol. Biol.</source> <volume>425</volume>, <fpage>3582</fpage>&#x2013;<lpage>3600</lpage>. <pub-id pub-id-type="doi">10.1016/j.jmb.2013.03.007</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Holley</surname>
<given-names>C. L.</given-names>
</name>
<name>
<surname>Topkara</surname>
<given-names>V. K.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>An introduction to small non-coding rnas: mirna and snorna</article-title>. <source>Cardiovasc. drugs Ther.</source> <volume>25</volume>, <fpage>151</fpage>&#x2013;<lpage>159</lpage>. <pub-id pub-id-type="doi">10.1007/s10557-011-6290-z</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Hombach</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Kretz</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>Non-coding rnas: classification, biology and functioning</article-title>,&#x201d; in <source>Non-coding RNAs in colorectal cancer</source>, <fpage>3</fpage>&#x2013;<lpage>17</lpage>.</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hsu</surname>
<given-names>S.-D.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>F.-M.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>W.-Y.</given-names>
</name>
<name>
<surname>Liang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>W.-C.</given-names>
</name>
<name>
<surname>Chan</surname>
<given-names>W.-L.</given-names>
</name>
<etal/>
</person-group> (<year>2011</year>). <article-title>mirtarbase: a database curates experimentally validated microrna&#x2013;target interactions</article-title>. <source>Nucleic acids Res.</source> <volume>39</volume>, <fpage>D163</fpage>&#x2013;<lpage>D169</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkq1107</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname>
<given-names>H.-Y.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>Y.-C.-D.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>K.-Y.</given-names>
</name>
<name>
<surname>Shrestha</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Hong</surname>
<given-names>H.-C.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Mirtarbase 2020: updates to the experimentally validated microrna&#x2013;target interaction database</article-title>. <source>Nucleic acids Res.</source> <volume>48</volume>, <fpage>D148</fpage>&#x2013;<lpage>D154</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkz896</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname>
<given-names>T.-H.</given-names>
</name>
<name>
<surname>Fan</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Rothschild</surname>
<given-names>M. F.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>Z.-L.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>S.-H.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>Mirfinder: an improved approach and software implementation for genome-wide fast microrna precursor scans</article-title>. <source>BMC Bioinforma.</source> <volume>8</volume>, <fpage>341</fpage>&#x2013;<lpage>10</lpage>. <pub-id pub-id-type="doi">10.1186/1471-2105-8-341</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jiang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Hao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Juan</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Teng</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2010</year>). <article-title>Prioritization of disease micrornas through a human phenome-micrornaome network</article-title>. <source>BMC Syst. Biol.</source> <volume>4</volume>, <fpage>S2</fpage>&#x2013;<lpage>S9</lpage>. <pub-id pub-id-type="doi">10.1186/1752-0509-4-S1-S2</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jie</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Feng</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Feng</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>H.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Subcellular localization of mirnas and implications in cellular homeostasis</article-title>. <source>Genes</source> <volume>12</volume>, <fpage>856</fpage>. <pub-id pub-id-type="doi">10.3390/genes12060856</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kabekkodu</surname>
<given-names>S. P.</given-names>
</name>
<name>
<surname>Shukla</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Varghese</surname>
<given-names>V. K.</given-names>
</name>
<name>
<surname>D&#x2019;Souza</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Chakrabarty</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Satyamoorthy</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Clustered mirnas and their role in biological functions and diseases</article-title>. <source>Biol. Rev.</source> <volume>93</volume>, <fpage>1955</fpage>&#x2013;<lpage>1986</lpage>. <pub-id pub-id-type="doi">10.1111/brv.12428</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kozomara</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Birgaoanu</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Griffiths-Jones</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>mirbase: from microrna sequences to function</article-title>. <source>Nucleic acids Res.</source> <volume>47</volume>, <fpage>D155</fpage>&#x2013;<lpage>D162</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gky1141</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Krishnamurthy</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Pavani</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Kurup</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Palanisamy</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Jagadeesh</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Sekar</surname>
<given-names>K.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>Cystinuria in a 13-month-old girl with absence of mutations in the slc3a1 and slc7a9 genes</article-title>. <source>Indian J. Nephrol.</source> <volume>28</volume>, <fpage>84</fpage>&#x2013;<lpage>85</lpage>. <pub-id pub-id-type="doi">10.4103/ijn.IJN_20_17</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Lei</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>The role of micrornas in neurodegenerative diseases: a review</article-title>. <source>Cell Biol. Toxicol.</source> <volume>39</volume>, <fpage>53</fpage>&#x2013;<lpage>83</lpage>. <pub-id pub-id-type="doi">10.1007/s10565-022-09761-x</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Qiu</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Tu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Geng</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>T.</given-names>
</name>
<etal/>
</person-group> (<year>2014</year>). <article-title>Hmdd v2. 0: a database for experimentally supported human microrna and disease associations</article-title>. <source>Nucleic acids Res.</source> <volume>42</volume>, <fpage>D1070</fpage>&#x2013;<lpage>D1074</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkt1023</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Miska</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2007</year>). &#x201c;<article-title>Microrna expression profiles classify human cancers</article-title>,&#x201d; in <source>Cytometry part B-clinical cytometry</source> (<publisher-loc>HOBOKEN, NJ 07030 USA</publisher-loc>: <publisher-name>WILEY-LISS DIV JOHN WILEY and SONS INC</publisher-name>), <volume>72</volume>, <fpage>126</fpage>.</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ren</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Vincenz</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Kerppola</surname>
<given-names>T. K.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Changes in the distributions and dynamics of polycomb repressive complexes during embryonic stem cell differentiation</article-title>. <source>Mol. Cell. Biol.</source> <volume>28</volume>, <fpage>2884</fpage>&#x2013;<lpage>2895</lpage>. <pub-id pub-id-type="doi">10.1128/MCB.00949-07</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Small</surname>
<given-names>E. M.</given-names>
</name>
<name>
<surname>Olson</surname>
<given-names>E. N.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Pervasive roles of micrornas in cardiovascular biology</article-title>. <source>Nature</source> <volume>469</volume>, <fpage>336</fpage>&#x2013;<lpage>342</lpage>. <pub-id pub-id-type="doi">10.1038/nature09783</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Smith</surname>
<given-names>T. F.</given-names>
</name>
<name>
<surname>Waterman</surname>
<given-names>M. S.</given-names>
</name>
</person-group> (<year>1981</year>). <article-title>Identification of common molecular subsequences</article-title>. <source>J. Mol. Biol.</source> <volume>147</volume>, <fpage>195</fpage>&#x2013;<lpage>197</lpage>. <pub-id pub-id-type="doi">10.1016/0022-2836(81)90087-5</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sun</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Yi</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Liang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Predicting citywide crowd flows in irregular regions using multi-view graph convolutional networks</article-title>. <source>IEEE Trans. Knowl. Data Eng.</source> <volume>34</volume>, <fpage>2348</fpage>&#x2013;<lpage>2359</lpage>. <pub-id pub-id-type="doi">10.1109/tkde.2020.3008774</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Thomson</surname>
<given-names>D. W.</given-names>
</name>
<name>
<surname>Dinger</surname>
<given-names>M. E.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Endogenous microrna sponges: evidence and controversy</article-title>. <source>Nat. Rev. Genet.</source> <volume>17</volume>, <fpage>272</fpage>&#x2013;<lpage>283</lpage>. <pub-id pub-id-type="doi">10.1038/nrg.2016.20</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Cui</surname>
<given-names>Q.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Inferring the human microrna functional similarity and functional network based on microrna-associated diseases</article-title>. <source>Bioinformatics</source> <volume>26</volume>, <fpage>1644</fpage>&#x2013;<lpage>1650</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btq241</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>X.-F.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Guan</surname>
<given-names>R.-C.</given-names>
</name>
<name>
<surname>You</surname>
<given-names>Z.-H.</given-names>
</name>
<name>
<surname>Sheng</surname>
<given-names>N.</given-names>
</name>
<etal/>
</person-group> (<year>2024a</year>). <article-title>Multi-view learning framework for predicting unknown types of cancer markers via directed graph neural networks fitting regulatory networks</article-title>. <source>Briefings Bioinforma.</source> <volume>25</volume>, <fpage>bbae546</fpage>. <pub-id pub-id-type="doi">10.1093/bib/bbae546</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>X.-F.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Guan</surname>
<given-names>R.-C.</given-names>
</name>
<name>
<surname>You</surname>
<given-names>Z.-H.</given-names>
</name>
<name>
<surname>Sheng</surname>
<given-names>N.</given-names>
</name>
<etal/>
</person-group> (<year>2024b</year>). <article-title>A multichannel graph neural network based on multisimilarity modality hypergraph contrastive learning for predicting unknown types of cancer biomarkers</article-title>. <source>Briefings Bioinforma.</source> <volume>25</volume>, <fpage>bbae575</fpage>. <pub-id pub-id-type="doi">10.1093/bib/bbae575</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>X.-F.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>C.-Q.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>L.-P.</given-names>
</name>
<name>
<surname>You</surname>
<given-names>Z.-H.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>W.-Z.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Y.-C.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Kgdcmi: a new approach for predicting circrna&#x2013;mirna interactions from multi-source information extraction and deep learning</article-title>. <source>Front. Genet.</source> <volume>13</volume>, <fpage>958096</fpage>. <pub-id pub-id-type="doi">10.3389/fgene.2022.958096</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wen</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Cong</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Deepmirtar: a deep-learning approach for predicting human mirna targets</article-title>. <source>Bioinformatics</source> <volume>34</volume>, <fpage>3781</fpage>&#x2013;<lpage>3787</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/bty424</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Pian</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Mirloc: predicting mirna subcellular localization by incorporating mirna&#x2013;mrna interactions and mrna subcellular localization</article-title>. <source>Briefings Bioinforma.</source> <volume>23</volume>, <fpage>bbac044</fpage>. <pub-id pub-id-type="doi">10.1093/bib/bbac044</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Ran</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Cui</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Amevor</surname>
<given-names>F. K.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>mir-21-5p regulates the proliferation and differentiation of skeletal muscle satellite cells by targeting klf3 in chicken</article-title>. <source>Genes</source> <volume>12</volume>, <fpage>814</fpage>. <pub-id pub-id-type="doi">10.3390/genes12060814</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>You</surname>
<given-names>Z.-H.</given-names>
</name>
<name>
<surname>Nie</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Zhong</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Predicting mirna-disease associations based on neighbor selection graph attention networks</article-title>. <source>IEEE/ACM Trans. Comput. Biol. Bioinforma.</source> <volume>20</volume>, <fpage>1298</fpage>&#x2013;<lpage>1307</lpage>. <pub-id pub-id-type="doi">10.1109/TCBB.2022.3204726</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>