<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Genet.</journal-id>
<journal-title>Frontiers in Genetics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Genet.</abbrev-journal-title>
<issn pub-type="epub">1664-8021</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1369811</article-id>
<article-id pub-id-type="doi">10.3389/fgene.2024.1369811</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Genetics</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Prediction of miRNAs and diseases association based on sparse autoencoder and MLP</article-title>
<alt-title alt-title-type="left-running-head">Sun et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fgene.2024.1369811">10.3389/fgene.2024.1369811</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Sun</surname>
<given-names>Si-Lin</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2633850/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhou</surname>
<given-names>Bing-Wei</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Liu</surname>
<given-names>Sheng-Zheng</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Xiu</surname>
<given-names>Yu-Han</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Bilal</surname>
<given-names>Anas</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2180939/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Long</surname>
<given-names>Hai-Xia</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/570151/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Department of Information Science Technology</institution>, <institution>Hainan Normal University</institution>, <addr-line>Haikou</addr-line>, <addr-line>Hainan</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Key Laboratory of Data Science and Smart Education</institution>, <institution>Ministry of Education</institution>, <institution>Hainan Normal University</institution>, <addr-line>Haikou</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/498015/overview">Chaoyang Zhang</ext-link>, University of Southern Mississippi, United States</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2346841/overview">Advait Balaji</ext-link>, Occidental Petroleum Corporation, United States</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/854422/overview">Dengju Yao</ext-link>, Harbin University of Science and Technology, China</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Hai-Xia Long, <email>myresearch_hainnu@163.com</email>
</corresp>
</author-notes>
<pub-date pub-type="epub">
<day>30</day>
<month>05</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>15</volume>
<elocation-id>1369811</elocation-id>
<history>
<date date-type="received">
<day>13</day>
<month>01</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>07</day>
<month>05</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2024 Sun, Zhou, Liu, Xiu, Bilal and Long.</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Sun, Zhou, Liu, Xiu, Bilal and Long</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>
<bold>Introduction:</bold> MicroRNAs (miRNAs) are small and non-coding RNA molecules which have multiple important regulatory roles within cells. With the deepening research on miRNAs, more and more researches show that the abnormal expression of miRNAs is closely related to various diseases. The relationship between miRNAs and diseases is crucial for discovering the pathogenesis of diseases and exploring new treatment methods.</p>
<p>
<bold>Methods:</bold> Therefore, we propose a new sparse autoencoder and MLP method (SPALP) to predict the association between miRNAs and diseases. In this study, we adopt advanced deep learning technologies, including sparse autoencoder and multi-layer perceptron (MLP), to improve the accuracy of predicting miRNA-disease associations. Firstly, the SPALP model uses a sparse autoencoder to perform feature learning and extract the initial features of miRNAs and diseases separately, obtaining the latent features of miRNAs and diseases. Then, the latent features combine miRNAs functional similarity data with diseases semantic similarity data to construct comprehensive miRNAs-diseases datasets. Subsequently, the MLP model can predict the unknown association among miRNAs and diseases.</p>
<p>
<bold>Result:</bold> To verify the performance of our model, we set up several comparative experiments. The experimental results show that, compared with traditional methods and other deep learning prediction methods, our method has significantly improved the accuracy of predicting miRNAs-disease associations, with 94.61% accuracy and 0.9859 AUC value. Finally, we conducted case study of SPALP model. We predicted the top 30 miRNAs that might be related to Lupus Erythematosus, Ecute Myeloid Leukemia, Cardiovascular, Stroke, Diabetes Mellitus five elderly diseases and validated that 27, 29, 29, 30, and 30 of the top 30 are indeed associated.</p>
<p>
<bold>Discussion:</bold> The SPALP approach introduced in this study is adept at forecasting the links between miRNAs and diseases, addressing the complexities of analyzing extensive bioinformatics datasets and enriching the comprehension contribution to disease progression of miRNAs.</p>
</abstract>
<kwd-group>
<kwd>miRNAs</kwd>
<kwd>deep learning</kwd>
<kwd>sparse autoencoder</kwd>
<kwd>multi-layer perceptron</kwd>
<kwd>elderly diseases</kwd>
</kwd-group>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Computational Genomics</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1">
<title>Highlights</title>
<p>
<list list-type="simple">
<list-item>
<p>&#x2022; Developing effective computational methods to predict the unknown miRNAs-diseases association is an urgent task.</p>
</list-item>
<list-item>
<p>&#x2022; A SPALP method was proposed to predict the miRNAs-diseases association.</p>
</list-item>
<list-item>
<p>&#x2022; This paper mainly relies on sparse autoencoders and MLP (Multi-layer Perceptron) to achieve the best results.</p>
</list-item>
<list-item>
<p>&#x2022; This paper conducted a series of comparative experiments to adopt appropriate parameters for SPALP model.</p>
</list-item>
</list>
</p>
</sec>
<sec id="s2" sec-type="intro">
<title>1 Introduction</title>
<p>MicroRNA (miRNA) is non coding single stranded RNA molecule with a length of approximately 22 nucleotides encoded by endogenous genes (<xref ref-type="bibr" rid="B1">A Brief et al., 2009</xref>; <xref ref-type="bibr" rid="B44">Zhang et al., 2022a</xref>). It participates in post-transcriptional gene expression regulation in animals and plants. In the 1990s, Lee et al. discovered a 22&#xa0;nt small non-coding RNA (named lin-4) in nematodes through genetic screening (<xref ref-type="bibr" rid="B18">Lee et al., 1993</xref>). MiRNAs mainly bind with the 3&#x2032;untranslated region of target genes to suppress or reduce the expression level of these genes (<xref ref-type="bibr" rid="B4">Bartel, 2004</xref>). MiRNAs are involved in a series of important processes in life, including early development, cell proliferation, apoptosis, cell death, fat metabolism, and cell differentiation (<xref ref-type="bibr" rid="B37">Xu et al., 2004</xref>). Abnormal expression of miRNAs has been widely found to be closely related to the occurrence and development of various diseases (<xref ref-type="bibr" rid="B27">Sayed and Abdellatif, 2011</xref>; <xref ref-type="bibr" rid="B28">Tang et al., 2018</xref>; <xref ref-type="bibr" rid="B33">Wang et al., 2023a</xref>).</p>
<p>Subsequent studies have shown that miRNAs play a complex and essential role in the pathogenesis of various diseases. Increasing evidence demonstrates the intricate relationship between miRNAs and multiple diseases, including cancers (<xref ref-type="bibr" rid="B23">Lynam-Lennon et al., 2009</xref>). MiRNAs serve dual roles in cancer: they can act as oncogenes (Oncomirs) (<xref ref-type="bibr" rid="B10">Esquela-Kerscher, 2006</xref>), promoting tumor growth by inhibiting tumor suppressor gene translation, or act as tumor suppressors, negating this effect by inhibiting the miRNAs translation of oncogenes (<xref ref-type="bibr" rid="B5">Chakrabortty et al., 2023</xref>). Besides cancer, miRNAs are also related to cardiovascular, neurological, and infectious diseases. Scientists are actively exploring the association between miRNAs and diseases (<xref ref-type="bibr" rid="B25">Nemeth et al., 2023</xref>).</p>
<p>Early, traditional biological experiments were the primary means for scientists to explore the association between miRNAs and diseases. However, as research progressed and single-cell RNA sequencing technology advanced, more miRNAs were discovered, and their associations with diseases became increasingly complex. The intricate interaction networks between miRNAs and target genes (<xref ref-type="bibr" rid="B24">Mendes et al., 2009</xref>), miRNAs and proteins (<xref ref-type="bibr" rid="B3">Baek et al., 2008</xref>), and miRNAs and epigenetics (<xref ref-type="bibr" rid="B9">Chuang and Jones, 2007</xref>) make accurately predicting the association between specific miRNAs and diseases a complex and challenging task (<xref ref-type="bibr" rid="B14">Jin et al., 2022</xref>). The traditional methods of biological experiments are time-consuming and costly, furthermore, it often have a low success rate. Relying solely on these experiments to explore miRNAs-diseases associations is no longer sufficient.</p>
<p>With the flourishing development of the computer field, machine learning has been widely applied in various domains (<xref ref-type="bibr" rid="B40">Zeng et al., 2022a</xref>; <xref ref-type="bibr" rid="B7">Chen et al., 2023a</xref>; <xref ref-type="bibr" rid="B35">Wang et al., 2023</xref>; <xref ref-type="bibr" rid="B36">Xu et al., 2023</xref>; <xref ref-type="bibr" rid="B38">Yan et al., 2023</xref>) due to its ability to compute continuously exploding amounts of data at low costs (<xref ref-type="bibr" rid="B15">Jordan and Mitchell, 2015</xref>; <xref ref-type="bibr" rid="B53">Zou et al., 2019</xref>; <xref ref-type="bibr" rid="B20">Li et al., 2021</xref>; <xref ref-type="bibr" rid="B12">He et al., 2023</xref>). Jiang et al. used support vector machines (SVM) (<xref ref-type="bibr" rid="B42">Zhang et al., 2022b</xref>) to predict associations between human diseases and miRNAs (<xref ref-type="bibr" rid="B13">Jiang et al., 2013</xref>). Chen et al. proposed a decision-tree-based ensemble method for miRNA-disease association prediction (<xref ref-type="bibr" rid="B8">Chen et al., 2019</xref>). Zhao et al. used multifactorial random forest (RF) statistical analysis to construct and test miRNA features identified for Alzheimer&#x2019;s disease (<xref ref-type="bibr" rid="B45">Zhao et al., 2020</xref>). William Kang et al. proposed random forests to predict the association between miRNAs and cancers (<xref ref-type="bibr" rid="B16">Kang et al., 2022</xref>). However, these machine learning-based predictions&#x2019; accuracy rates for miRNAs and disease association are relatively low. Traditional machine learning algorithms are not highly precise and have not reached the desired level of accuracy.</p>
<p>As technology has evolved, deep learning (<xref ref-type="bibr" rid="B17">LeCun et al., 2015</xref>; <xref ref-type="bibr" rid="B29">Tang et al., 2021</xref>; <xref ref-type="bibr" rid="B39">Zeng et al., 2022b</xref>; <xref ref-type="bibr" rid="B34">Wang et al., 2023b</xref>), with its better predictive performance than machine learning, has been applied in various industries. Liu et al. used autoencoders to obtain low-dimensional feature representations and random forests to predict the association between miRNAs and diseases (<xref ref-type="bibr" rid="B22">Liu et al., 2022</xref>). Using regression models, Zhou et al. learned feature representations from miRNA and disease similarity networks. They input the integrated miRNAs and disease feature representations into deep autoencoders, predicting new miRNA and disease association through reconstruction error (<xref ref-type="bibr" rid="B48">Zhou et al., 2021</xref>). Zhang et al. predicted miRNA-disease associations using node-level attention encoders (<xref ref-type="bibr" rid="B41">Zhang et al., 2022c</xref>). By integrating latent features and similarities, Liu and others used stacked autoencoders and XGBoost to infer unknown miRNA-disease associations (<xref ref-type="bibr" rid="B21">Liu et al., 2021</xref>).</p>
<p>This paper proposes a new deep learning-based method, SPALP. It uses sparse autoencoders to extract latent features of miRNAs and diseases, combining miRNA latent features with miRNA similarity matrices into M-features and disease latent features with disease similarity matrices into D-features. M-features and D-features are then combined for feature reconstruction. Finally, a multi-layer perceptron is used to predict unknown miRNA-disease associations. This method achieved an average AUC value of 0.9854 and an average accuracy rate of 95.12% on HMDD V2.0(<ext-link ext-link-type="uri" xlink:href="http://cmbi.bjmu.edu.cn/hmdd">http://cmbi.bjmu.edu.cn/hmdd</ext-link>). The model was then applied biologically, predicting the top 30 miRNAs possibly associated with Lupus Erythematosus, Ecute Myeloid Leukemia, Cardiovascular, Stroke, Diabetes Mellitus five elderly diseases. Upon validation with RNADisease V4.0 (<xref ref-type="bibr" rid="B6">Chen et al., 2023b</xref>), 27, 29, 29, 30, 30 of these miRNAs were found to be associated with cardiovascular diseases. The SPALP method proposed in this paper can effectively predict the association between miRNAs and diseases, significantly assisting downstream analysis in bioinformatics.</p>
</sec>
<sec id="s3" sec-type="materials|methods">
<title>2 Materials and methods</title>
<sec id="s3-1">
<title>2.1 Benchmark datasets</title>
<p>Constructing benchmark data is a sufficient and necessary condition for building robust and reliable prediction model (<xref ref-type="bibr" rid="B19">Li and Liu, 2023</xref>; <xref ref-type="bibr" rid="B43">Zhang et al., 2023</xref>). We collected known association information between miRNAs and diseases, miRNAs identification name corresponding matrices, and miRNAs-diseases association adjacency matrices. We constructed miRNAs functional similarity matrices and diseases semantic similarity data. We generated latent features of miRNAs and diseases based on the miRNAs-diseases association matrix.</p>
<p>In this paper, we experimented with miRNAs-diseases association provided by HMDD v2.0 (<ext-link ext-link-type="uri" xlink:href="http://cmbi.bjmu.edu.cn/hmdd">http://cmbi.bjmu.edu.cn/hmdd</ext-link>), which includes 495 types of miRNAs and 383 kinds of diseases. We constructed an adjacency matrix of miRNAs-diseases interaction, MD, to facilitate the experiment and better represent the relationship between miRNAs and diseases. Each row in this matrix represents a type of miRNAs, and each column represents a type of diseases. If the <italic>ith</italic> kind of miRNAs and the <italic>jth</italic> type of diseases have a known association in the MD matrix, the <italic>MD(i, j)</italic> is set to 1; if there is no association between that miRNAs and diseases, it is set to 0. This method was used to construct the miRNAs-diseases association adjacency matrix MD.</p>
<p>In HMDD v2.0, there are known 5,430 pairs of miRNA disease associations, which are positive samples. We performed k-means clustering on unknown samples and randomly extracted a corresponding number of samples from each cluster as negative samples (<xref ref-type="bibr" rid="B49">Zhou et al., 2020a</xref>). We used downsampling to balance the positive and negative samples.</p>
</sec>
<sec id="s3-2">
<title>2.2 SPALP model</title>
<p>The SPALP model mainly consists of the following steps.<list list-type="simple">
<list-item>
<p> (i) Based on previous research, construct the miRNAs functional and diseases semantic similarity matrices. Decompose the known miRNAs-diseases association matrix into the miRNAs and diseases feature matrices. The miRNAs feature matrix is the miRNAs-diseases association matrix, and the diseases feature matrix is the transpose of the miRNAs-diseases association matrix.</p>
<list list-type="simple">
<list-item>
<p>(ii) Input the miRNAs feature matrix into a sparse autoencoder to obtain the latent feature matrix. Similarly, input the diseases feature matrix into a sparse autoencoder to get the latent feature matrix.</p>
</list-item>
<list-item>
<p>(iii) Combine the miRNAs latent feature matrix with the functional similarity matrix to form the M-feature matrix. Combine the diseases latent feature matrix with the semantic similarity matrix to create the D-feature matrix. Then, combine the M-feature matrix and the D-feature matrix to get the M-D-feature matrix.</p>
</list-item>
<list-item>
<p>(iv) Input the M-D-feature matrix into a Multi-layer Perceptron (MLP) for training.</p>
</list-item>
</list>
</list-item>
<list-item>
<p>(v) Use the MLP to predict unknown association between miRNAs and diseases. Output the probability value of miRNAs associated with a certain disease, sort them in descending order according to the value, remove the known miRNAs associated with the disease in HMDDv2.0, and finally output the predicted miRNAs.</p>
</list-item>
</list>
</p>
<p>These steps will be detailed in <xref ref-type="fig" rid="F1">Figure 1</xref>.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>The workflow of the SPALP model.</p>
</caption>
<graphic xlink:href="fgene-15-1369811-g001.tif"/>
</fig>
</sec>
<sec id="s3-3">
<title>2.3 MiRNA functional similarity</title>
<p>The concept of miRNA functional similarity originates from the research conducted by Wang et al. (<xref ref-type="bibr" rid="B31">Wang et al., 2010</xref>). This concept is based on the observation that if a certain miRNA is associated with a specific disease, other similar miRNAs are also likely to be associated with that disease. Based on this idea, we constructed the miRNA functional similarity matrix, which each element in the matrix expresses the functional similarity score between two miRNAs.</p>
</sec>
<sec id="s3-4">
<title>2.4 Disease semantic similarity</title>
<p>Based on the approach by Wang et al. (<xref ref-type="bibr" rid="B31">Wang et al., 2010</xref>) and the MeSH database, a Directed Acyclic Graph (DAG) can be constructed, where the vertexes of DAG represent diseases, and the edges of DAG represent relationships between the vertexes. There is only one type of relationship can be connected between child vertexes with their parent vertexes. For a given disease A, it can be represented as DAG(A) &#x003D; (T, E), where T is the set of A and all its ancestor nodes (including itself), and E is the collection of corresponding edges. We define the contribution of t (disease) to the semantic value of A (disease) as Eq. <xref ref-type="disp-formula" rid="e1">1</xref>:<disp-formula id="e1">
<mml:math id="m1">
<mml:mrow>
<mml:mrow>
<mml:mfenced close="" open="{">
<mml:mrow>
<mml:mtable columnalign="left">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mi>A</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced close=")" open="(">
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x003D;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mtext>&#x2003;</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mi>f</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>t</mml:mi>
<mml:mo>&#x003D;</mml:mo>
<mml:mi>A</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mi>A</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced close=")" open="(">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x003D;</mml:mo>
<mml:mo>&#x2061;</mml:mo>
<mml:mi mathvariant="italic">max</mml:mi>
<mml:mrow>
<mml:mfenced close="}" open="{">
<mml:mrow>
<mml:mo>&#x2206;</mml:mo>
<mml:mo>&#x2217;</mml:mo>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mi>A</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced close=")" open="(">
<mml:mrow>
<mml:msup>
<mml:mi>t</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x7c;</mml:mo>
<mml:msup>
<mml:mi>t</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
<mml:mi>&#x3f5;</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>h</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>d</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>n</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>o</mml:mi>
<mml:mi>f</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mtext>&#x2003;</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mi>f</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>t</mml:mi>
<mml:mo>&#x2260;</mml:mo>
<mml:mi>A</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>
</p>
<p>Here &#x2206; is the semantic contribution decay factor. Wang et al. set its value at 0.5 in their study on disease semantic similarity. The contribution of disease D to itself is 1, and the contributions of other diseases to D decrease with increasing distance. They define the semantic value DV(A) of disease A as Eq. <xref ref-type="disp-formula" rid="e2">2</xref>:<disp-formula id="e2">
<mml:math id="m2">
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mfenced close=")" open="(">
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x003D;</mml:mo>
<mml:msub>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>A</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mi>A</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced close=")" open="(">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>
</p>
<p>Between disease A and disease B, the semantic similarity b is determined using the following formula Eq. <xref ref-type="disp-formula" rid="e3">3</xref>:<disp-formula id="e3">
<mml:math id="m3">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mfenced close=")" open="(">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>B</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x003D;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>A</mml:mi>
</mml:msub>
<mml:mo>&#x2229;</mml:mo>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>B</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced close=")" open="(">
<mml:mrow>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mi>A</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced close=")" open="(">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x002B;</mml:mo>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mi>B</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced close=")" open="(">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mfenced close=")" open="(">
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x002B;</mml:mo>
<mml:mi>D</mml:mi>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mfenced close=")" open="(">
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>
</p>
</sec>
<sec id="s3-5">
<title>2.5 MiRNA and disease feature reconstruction</title>
<p>From the adjacency matrix MD of miRNAs-diseases association, we obtain the feature matrix related to miRNAs and the feature matrix related to diseases. The dimension of the <inline-formula id="inf1">
<mml:math id="m4">
<mml:mrow>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> matrix is 495 &#xd7; 383, and the dimension of the <inline-formula id="inf2">
<mml:math id="m5">
<mml:mrow>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> matrix is 383 &#xd7; 495 as shown in Eqs <xref ref-type="disp-formula" rid="e4">4</xref> and <xref ref-type="disp-formula" rid="e5">5</xref>.<disp-formula id="e4">
<mml:math id="m6">
<mml:mrow>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x003D;</mml:mo>
<mml:mi>M</mml:mi>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>
<disp-formula id="e5">
<mml:math id="m7">
<mml:mrow>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x003D;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>D</mml:mi>
</mml:mrow>
<mml:mi mathvariant="normal">T</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
<label>(5)</label>
</disp-formula>
</p>
<p>These two feature matrices are input into a sparse autoencoder, from which we obtain the latent features of miRNAs (M) and diseases (D). The dimension of the M matrix is 495 &#xd7; 128, and the dimension of the D matrix is 383 &#xd7; 128 as shown in Eqs <xref ref-type="disp-formula" rid="e4">4,</xref>
<xref ref-type="disp-formula" rid="e5">5</xref>.</p>
<p>Based on the clustering results, the miRNA indices and disease indices are extracted and combined into an index matrix. Then, using the indices from the index matrix, the features of miRNAs and diseases can be retrieved. The latent features of miRNAs (M) are combined with the miRNA functional similarity matrix according to miRNA indices to form the M-feature as shown in Eq. <xref ref-type="disp-formula" rid="e6">6</xref>.<disp-formula id="e6">
<mml:math id="m8">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>f</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mo>&#x003D;</mml:mo>
<mml:mrow>
<mml:mfenced close="}" open="{">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mtext>im</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(6)</label>
</disp-formula>
</p>
<p>The latent features of diseases (D) are combined with the disease semantic similarity matrix according to disease indices to form the D-feature as shown in Eq. <xref ref-type="disp-formula" rid="e7">7</xref>.<disp-formula id="e7">
<mml:math id="m9">
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>f</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mo>&#x003D;</mml:mo>
<mml:mrow>
<mml:mfenced close="}" open="{">
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(7)</label>
</disp-formula>
</p>
<p>The M-feature and D-feature matrices are combined to create the final M-D feature matrix used for model processing as shown in Eq. <xref ref-type="disp-formula" rid="e8">8</xref>.<disp-formula id="e8">
<mml:math id="m10">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>D</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>f</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mo>&#x003D;</mml:mo>
<mml:mrow>
<mml:mfenced close="}" open="{">
<mml:mrow>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>M</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>f</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>D</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>f</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(8)</label>
</disp-formula>
</p>
<p>This process allows for a comprehensive representation of miRNA and disease characteristics, incorporating inherent features and relational similarities to enhance the model&#x2019;s predictive accuracy.</p>
</sec>
<sec id="s3-6">
<title>2.6 Sparse autoencoder</title>
<p>For a sparse autoencoder, the objective function consists of the reconstruction error and the sparsity penalty term. The reconstruction error part trains the network by minimizing the error between the input and output. Its formula is as follows:<disp-formula id="e9">
<mml:math id="m11">
<mml:mrow>
<mml:msub>
<mml:mi>J</mml:mi>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mtext>econstruction</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced close=")" open="(">
<mml:mrow>
<mml:mi>W</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="normal">b</mml:mi>
<mml:mo>;</mml:mo>
<mml:msup>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x003D;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mfrac>
<mml:msup>
<mml:mrow>
<mml:mfenced close="" open="&#x2016;">
<mml:mrow>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mfenced close=")" open="(">
<mml:mrow>
<mml:msup>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mfenced close=")" open="(">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mrow>
<mml:mfenced close="&#x2016;" open="">
<mml:mrow>
<mml:msup>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mfenced close=")" open="(">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
<label>(9)</label>
</disp-formula>
</p>
<p>Where <inline-formula id="inf3">
<mml:math id="m12">
<mml:mrow>
<mml:mi>W</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf4">
<mml:math id="m13">
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> are the network weights and biases, <inline-formula id="inf5">
<mml:math id="m14">
<mml:mrow>
<mml:msup>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mfenced close=")" open="(">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is the <italic>i</italic>th sample in the training datasets, and <inline-formula id="inf6">
<mml:math id="m15">
<mml:mrow>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mfenced close=")" open="(">
<mml:mrow>
<mml:msup>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mfenced close=")" open="(">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is the output of the network.</p>
<p>The sparsity penalty term can be implemented through a sparsity constraint, which is formulated as follows:<disp-formula id="e10">
<mml:math id="m16">
<mml:mrow>
<mml:msub>
<mml:mi>J</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mtext>parse</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced close=")" open="(">
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x003D;</mml:mo>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x003D;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>s</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mi mathvariant="normal">L</mml:mi>
<mml:mrow>
<mml:mfenced close=")" open="(">
<mml:mrow>
<mml:mrow>
<mml:mfenced close=")" open="(">
<mml:mrow>
<mml:mi>&#x3c1;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>&#x3c1;</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(10)</label>
</disp-formula>
</p>
<p>In this formula, <inline-formula id="inf7">
<mml:math id="m17">
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the number of neurons in the hidden layer, <inline-formula id="inf8">
<mml:math id="m18">
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> represents the output of the hidden layer, <inline-formula id="inf9">
<mml:math id="m19">
<mml:mrow>
<mml:mi>&#x3c1;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the desired average activation of the neurons, and <inline-formula id="inf10">
<mml:math id="m20">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>&#x3c1;</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the actual average activation computed. <inline-formula id="inf11">
<mml:math id="m21">
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mi mathvariant="normal">L</mml:mi>
<mml:mrow>
<mml:mfenced close=")" open="(">
<mml:mrow>
<mml:mrow>
<mml:mfenced close="&#x2016;" open="">
<mml:mrow>
<mml:mi>&#x3c1;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>&#x3c1;</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> represents the Kullback-Leibler divergence and is calculated using the following formula:<disp-formula id="e11">
<mml:math id="m22">
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mi>L</mml:mi>
<mml:mrow>
<mml:mfenced close=")" open="(">
<mml:mrow>
<mml:mrow>
<mml:mfenced close=")" open="(">
<mml:mrow>
<mml:mi>&#x3c1;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>&#x3c1;</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x003D;</mml:mo>
<mml:mi>&#x3c1;</mml:mi>
<mml:mo>&#x2061;</mml:mo>
<mml:mi mathvariant="italic">log</mml:mi>
<mml:mfrac>
<mml:mrow>
<mml:mi>&#x3c1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>&#x3c1;</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x002B;</mml:mo>
<mml:mrow>
<mml:mfenced close=")" open="(">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3c1;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi mathvariant="italic">log</mml:mi>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi mathvariant="normal">&#x3c1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi>&#x3c1;</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(11)</label>
</disp-formula>
</p>
<p>Sparse autoencoder uses network to learn features and perform feature extraction. Including the sparsity penalty ensures that the learned representations are robust and that the network does not over fitting the training data. This approach is particularly beneficial for capturing the essential characteristics of the data in a compressed form, which is crucial for effective feature representation in complex datasets like those involving miRNAs and diseases.</p>
</sec>
<sec id="s3-7">
<title>2.7 Multi-layer perceptron</title>
<p>A Multi-layer Perceptron (MLP) network consists of an input layer, one or more hidden layers, and an output layer, which is a feed forward neural network that learns the mapping relationship from input to output for pattern recognition and classification tasks.</p>
<p>Assuming there are <inline-formula id="inf12">
<mml:math id="m23">
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> samples with <inline-formula id="inf13">
<mml:math id="m24">
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> features, the input layer <inline-formula id="inf14">
<mml:math id="m25">
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> can be represented as <inline-formula id="inf15">
<mml:math id="m26">
<mml:mrow>
<mml:mi>X</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>. If the MLP has only one hidden layer with <inline-formula id="inf16">
<mml:math id="m27">
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> neurons, then the weights and biases of the hidden layer can be denoted as <inline-formula id="inf17">
<mml:math id="m28">
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>h</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf18">
<mml:math id="m29">
<mml:mrow>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>h</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, respectively. If there are <inline-formula id="inf19">
<mml:math id="m30">
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> output labels, the weights and biases of the output layer are <inline-formula id="inf20">
<mml:math id="m31">
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>o</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf21">
<mml:math id="m32">
<mml:mrow>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>h</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>. The outputs of the hidden layer can be computed by the formula <xref ref-type="disp-formula" rid="e12">(12)</xref>. The output layer can be calculated using the formula <xref ref-type="disp-formula" rid="e13">(13)</xref>.<disp-formula id="e12">
<mml:math id="m33">
<mml:mrow>
<mml:mi>H</mml:mi>
<mml:mo>&#x003D;</mml:mo>
<mml:mi>X</mml:mi>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>h</mml:mi>
</mml:msub>
<mml:mo>&#x002B;</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>h</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(12)</label>
</disp-formula>
<disp-formula id="e13">
<mml:math id="m34">
<mml:mrow>
<mml:mi>O</mml:mi>
<mml:mo>&#x003D;</mml:mo>
<mml:mi>X</mml:mi>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>o</mml:mi>
</mml:msub>
<mml:mo>&#x002B;</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>o</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(13)</label>
</disp-formula>
</p>
<p>We typically use the Rectified Linear Unit (ReLU) activation function.<disp-formula id="e14">
<mml:math id="m35">
<mml:mrow>
<mml:mtext>ReLU</mml:mtext>
<mml:mo>:</mml:mo>
<mml:mi mathvariant="normal">y</mml:mi>
<mml:mo>&#x003D;</mml:mo>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>max</mml:mi>
<mml:mrow>
<mml:mfenced close=")" open="(">
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(14)</label>
</disp-formula>
</p>
<p>For the <inline-formula id="inf22">
<mml:math id="m36">
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> layer (<inline-formula id="inf23">
<mml:math id="m37">
<mml:mrow>
<mml:mfenced close="" open="(">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula> &#x003D; 1,2,., <inline-formula id="inf24">
<mml:math id="m38">
<mml:mrow>
<mml:mi>L</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>), the output is <inline-formula id="inf25">
<mml:math id="m39">
<mml:mrow>
<mml:msup>
<mml:mi>z</mml:mi>
<mml:mi>l</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> before the activation function and the output is <inline-formula id="inf26">
<mml:math id="m40">
<mml:mrow>
<mml:msup>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> after activation function. Then, the output of the previous layer after activation becomes the input for the current layer, and the output before activation of the current layer is:<disp-formula id="e15">
<mml:math id="m41">
<mml:mrow>
<mml:msup>
<mml:mi>z</mml:mi>
<mml:mi>l</mml:mi>
</mml:msup>
<mml:mo>&#x003D;</mml:mo>
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mi>l</mml:mi>
</mml:msup>
<mml:msup>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>&#x002B;</mml:mo>
<mml:msup>
<mml:mi>b</mml:mi>
<mml:mi>l</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
<label>(15)</label>
</disp-formula>
<disp-formula id="e16">
<mml:math id="m42">
<mml:mrow>
<mml:msup>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
</mml:msup>
<mml:mo>&#x003D;</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mfenced close=")" open="(">
<mml:mrow>
<mml:msup>
<mml:mi>z</mml:mi>
<mml:mi>l</mml:mi>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(16)</label>
</disp-formula>
</p>
<p>Computing the output values through various weights and biases of layer is commonly known as forward propagation. We use a process called back propagation to calculate the error and update the model. In back propagation, we derive from the output layer back to the input layer to obtain the gradient formulas for each layer&#x2019;s weights. <inline-formula id="inf27">
<mml:math id="m43">
<mml:mrow>
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mi>l</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and biases <inline-formula id="inf28">
<mml:math id="m44">
<mml:mrow>
<mml:msup>
<mml:mi>b</mml:mi>
<mml:mi>l</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>This structure allows the MLP to effectively capture and model complex relationships in the data, making it a powerful tool for classification and regression in various fields, including bioinformatics and medical research.</p>
</sec>
<sec id="s3-8">
<title>2.8 Evaluation metrics</title>
<p>In our experiments, the Accuracy, Precision, Recall, F1-score, True Positive Rate (TPR), and False Positive Rate (FPR) as evaluation metrics facilitate the assessment of the performance of SPALP model, which are constructed by True Positive (TP), False Positive (FP), True Negative (TN), False Negative (FN) from confusion matrix of two categories (<xref ref-type="bibr" rid="B2">Ai et al., 2023</xref>; <xref ref-type="bibr" rid="B52">Zhu et al., 2023a</xref>; <xref ref-type="bibr" rid="B51">Zhu et al., 2023b</xref>; <xref ref-type="bibr" rid="B32">Wang et al., 2023c</xref>; <xref ref-type="bibr" rid="B26">Qian et al., 2023</xref>; <xref ref-type="bibr" rid="B54">Zou et al., 2023</xref>). In order to display the performance of the model more intuitively, the Receiver Operating Characteristic (ROC) curve can be plotted by TPR and FPR and the Precision-Recall (PR) curve can be plot by Precision and Recall. The area under the ROC curve is represented by AUC.<disp-formula id="e17">
<mml:math id="m45">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>y</mml:mi>
<mml:mo>&#x003D;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x002B;</mml:mo>
<mml:mi>T</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x002B;</mml:mo>
<mml:mi>T</mml:mi>
<mml:mi>N</mml:mi>
<mml:mo>&#x002B;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x002B;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(17)</label>
</disp-formula>
<disp-formula id="e18">
<mml:math id="m46">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>&#x003D;</mml:mo>
<mml:mi>P</mml:mi>
<mml:mo>&#x003D;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x002B;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(18)</label>
</disp-formula>
<disp-formula id="e19">
<mml:math id="m47">
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
<mml:mo>&#x003D;</mml:mo>
<mml:mi>R</mml:mi>
<mml:mo>&#x003D;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x002B;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(19)</label>
</disp-formula>
<disp-formula id="e20">
<mml:math id="m48">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>s</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mo>&#x003D;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x002B;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x002B;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(20)</label>
</disp-formula>
<disp-formula id="e21">
<mml:math id="m49">
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>R</mml:mi>
<mml:mo>&#x003D;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x002B;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(21)</label>
</disp-formula>
<disp-formula id="e22">
<mml:math id="m50">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>R</mml:mi>
<mml:mo>&#x003D;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x002B;</mml:mo>
<mml:mi>T</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(22)</label>
</disp-formula>
</p>
</sec>
</sec>
<sec id="s4" sec-type="results|discussion">
<title>3 Results and discussion</title>
<p>The experiments are implemented using the Python programming language.The hardware environment is as following: 12th Gen Intel (R) Core (TM) i7-12700F 2.10&#xa0;GHz CPU, NVIDIA GeForce RTX 4090 GPU, 16G RAM and Win 10 operating system. The parameter settings of SPALP model are shown in <xref ref-type="table" rid="T1">Table 1</xref>.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>The parameter settings of SPALP model.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">SPALP</th>
<th align="center">Parameter settings</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Sparse autoencoder</td>
<td align="center">Learning rate &#x003D; 0.001, optimizer: Adam, activation function: sigmod loss &#x003D; reconstruction error loss &#x002B; sparse regularization loss</td>
</tr>
<tr>
<td align="center">MLP</td>
<td align="center">Optimizer: Adam, activation function: ReLU, maximum number of iterations: 300</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Experimental Setups are following. The SPALP model consists of a sparse encoder and a multi-layer perceptron. Thus, the latent feature dimensions generated by the sparse encoder, different data combinations, and various classifiers can all impact the results. To explore the optimal parameters and the effectiveness of the model, we set up the following experiments:<list list-type="simple">
<list-item>
<p>(i) Comparative analysis of different latent feature dimensions produced by the sparse encoder.</p>
</list-item>
<list-item>
<p>(ii) Comparative analysis of the effects of different data combinations.</p>
</list-item>
<list-item>
<p>(iii) Comparative analysis of the effects of different classifiers.</p>
</list-item>
<list-item>
<p>(iv) Comparative analysis of the performance of different prediction models.</p>
</list-item>
<list-item>
<p>(v) Case study to biological validation of the SPALP model.</p>
</list-item>
</list>
</p>
<sec id="s4-1">
<title>3.1 Analysis of latent feature dimensions produced by the sparse autoencoder</title>
<p>To study the impact of latent feature dimensions on the SPALP model, miRNAs latent features and diseases latent features of 8, 16, 32, 64, 128, 256, and 512 dimension size are adopted to the sparse autoencoder. We first plot the loss function curves for miRNAs and diseases latent features based on different dimension obtained through the sparse autoencoder, respectively, as shown in <xref ref-type="fig" rid="F2">Figure 2</xref>. The curve loss is calculated by the sparse autoencoder, representing the error between the original data and the output of the decoder. <xref ref-type="fig" rid="F2">Figure 2</xref> shows when the dimension is set to 128, the loss function reliably converges to its minimum value.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Convergence curves of loss function with different dimensions for <bold>(A)</bold> miRNAs and <bold>(B)</bold> diseases.</p>
</caption>
<graphic xlink:href="fgene-15-1369811-g002.tif"/>
</fig>
<p>By comparing these two loss function graphs, we found that the loss values of miRNAs latent features and diseases latent features continuously decrease from 8 dimensions to 64 dimensions, indicating that the larger the dimension of latent features before 64 dimensions, the better performance can be obtained. However, the loss values of latent features from 64 to 512 dimensions are essentially the same.</p>
<p>To further compare different dimensional size of latent features impacting on the capability of SPALP model, we also plot ROC curves and PR curves for comparison, with the results shown in <xref ref-type="fig" rid="F3">Figure 3</xref>. <xref ref-type="fig" rid="F3">Figure 3</xref> demonstrates that the ROC and PR curves can converge to the best value when the dimension is 128, because the area below the ROC and PR curves is the largest.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Comparisons of <bold>(A)</bold> ROC curves and <bold>(B)</bold> PR curves with different dimensions.</p>
</caption>
<graphic xlink:href="fgene-15-1369811-g003.tif"/>
</fig>
<p>Additionally, to more clearly observe the evaluation metrics for 8, 16, 32, 64, 128, 256, 512 dimensions and to explore the optimal dimension, the results of evaluation are shown in <xref ref-type="table" rid="T2">Table 2</xref>. When the dimension is 128, the SPALP model can get optimal prediction results. Furthermore, two phenomena can be observed. Firstly, when the latent feature dimension size is below 128, there is a gradual improvement based on various evaluation metrics from 8 to 128 dimensions. This indicates that when the dimension is below 128, the lower the dimension, the less comprehensive the feature representation will be. Secondly, if the dimension size exceeds 128, the performance of the SPALP model progressively worsens with increasing dimension size. This decline in performance may be due to redundancy in the data features, as excessive features can lead to over fitting or noise in the model. Therefore, we selected 128 dimension as the optimal latent feature dimension for the SPALP model.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Comparison of different potential feature dimensions produced by sparse encoders.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Dimensionality</th>
<th align="center">Accuracy</th>
<th align="center">Precision</th>
<th align="center">Recall</th>
<th align="center">F1-score</th>
<th align="center">AUC</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">8</td>
<td align="center">0.8359</td>
<td align="center">0.8377</td>
<td align="center">0.8330</td>
<td align="center">0.8353</td>
<td align="center">0.9093</td>
</tr>
<tr>
<td align="center">16</td>
<td align="center">0.8530</td>
<td align="center">0.8288</td>
<td align="center">0.8944</td>
<td align="center">0.8604</td>
<td align="center">0.9260</td>
</tr>
<tr>
<td align="center">32</td>
<td align="center">0.8908</td>
<td align="center">0.8862</td>
<td align="center">0.8953</td>
<td align="center">0.8907</td>
<td align="center">0.9517</td>
</tr>
<tr>
<td align="center">64</td>
<td align="center">0.9341</td>
<td align="center">0.9437</td>
<td align="center">0.9257</td>
<td align="center">0.9346</td>
<td align="center">0.9798</td>
</tr>
<tr>
<td align="center">
<bold>128</bold>
</td>
<td align="center">
<bold>0.9461</bold>
</td>
<td align="center">
<bold>0.9494</bold>
</td>
<td align="center">0.9415</td>
<td align="center">
<bold>0.9455</bold>
</td>
<td align="center">
<bold>0.9859</bold>
</td>
</tr>
<tr>
<td align="center">256</td>
<td align="center">0.9397</td>
<td align="center">0.9397</td>
<td align="center">
<bold>0.9428</bold>
</td>
<td align="center">0.9441</td>
<td align="center">0.9832</td>
</tr>
<tr>
<td align="center">512</td>
<td align="center">0.9382</td>
<td align="center">0.9353</td>
<td align="center">0.9405</td>
<td align="center">0.9403</td>
<td align="center">0.9802</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The bold values represent the optimal value of the current column.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s4-2">
<title>3.2 Analysis of the effectiveness of latent features</title>
<p>To explore the effectiveness of our model in predicting miRNAs and diseases association, four sets of experiments about features are designed in following.</p>
<p>The first group used only similarity data, i.e., miRNAs functional similarity data and diseases semantic similarity data. The second group combined similarity data with unprocessed data. The third group used only latent features produced by the sparse encoder, i.e., miRNAs and diseases latent features. The fourth group is SPALP, which used both similarity data and latent features processed by the sparse encoder.</p>
<p>By comparing these four groups, we investigate the effectiveness of our model in the combined prediction of miRNAs and diseases association. We plotted ROC and Precision-Recall (PR) curves for the above combinations, as shown in <xref ref-type="fig" rid="F4">Figure 4</xref>. Additionally, we compiled statistics for the different combinations, including Accuracy, Precision, Recall, F1-score, and AUC values, as shown in <xref ref-type="table" rid="T3">Table 3</xref>.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Comparisons of <bold>(A)</bold> ROC curves and <bold>(B)</bold> PR curves with different reconstruction features.</p>
</caption>
<graphic xlink:href="fgene-15-1369811-g004.tif"/>
</fig>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Comparison of different data combinations.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center"/>
<th align="center">Accuracy</th>
<th align="center">Precision</th>
<th align="center">Recall</th>
<th align="center">F1-score</th>
<th align="center">AUC</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Only Similarity</td>
<td align="center">0.8203</td>
<td align="center">0.8259</td>
<td align="center">0.8138</td>
<td align="center">0.8198</td>
<td align="center">0.8991</td>
</tr>
<tr>
<td align="center">Similarity and initial data</td>
<td align="center">0.8954</td>
<td align="center">0.9025</td>
<td align="center">0.8902</td>
<td align="center">0.8963</td>
<td align="center">0.9591</td>
</tr>
<tr>
<td align="center">Only Latent Feature</td>
<td align="center">0.9382</td>
<td align="center">0.9442</td>
<td align="center">0.9320</td>
<td align="center">0.9381</td>
<td align="center">0.9811</td>
</tr>
<tr>
<td align="center">
<bold>SPALP</bold>
</td>
<td align="center">
<bold>0.9461</bold>
</td>
<td align="center">
<bold>0.9494</bold>
</td>
<td align="center">
<bold>0.9415</bold>
</td>
<td align="center">
<bold>0.9455</bold>
</td>
<td align="center">
<bold>0.9859</bold>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The bold values represent the optimal value of the current column.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>From the figures and the table, the accuracy of these four experiments is 0.8203, 0.8954, 0.9382, and 0.9461, respectively, and the AUC value is 0.8991, 0.9591, 0.9811, and 0.9859, respectively. The comparison indicates that the performance of predictions using only similarity data has the worst results. The performance improved when similarity data are combined with unprocessed feature matrices. The best results are achieved using SPALP, which combine latent features with similarity data.</p>
</sec>
<sec id="s4-3">
<title>3.3 Comparison of different classifiers</title>
<p>Several commonly used and effective classifiers are compared with MLP, including K-Nearest Neighbors (KNN), Decision Tree, Random Forest, Logistic Regression, XGBoost. ROC and PR curves are plotted based on their performance in the experiments, as shown in <xref ref-type="fig" rid="F5">Figure 5</xref>. The larger the area under the ROC curve, the better the prediction effect. For the PR curve, the larger the area wrapped by the curve and the larger the equilibrium point (Recall &#x003D; Precision), the better the performance. <xref ref-type="fig" rid="F5">Figure 5</xref> demonstrates that MLP can reach the optimal performance, which is best classifier.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Comparisons of <bold>(A)</bold> ROC curves and <bold>(B)</bold> PR curves with different classifiers.</p>
</caption>
<graphic xlink:href="fgene-15-1369811-g005.tif"/>
</fig>
<p>
<xref ref-type="table" rid="T4">Table 4</xref> shows the comparison among the six classifiers (Decision Tree, KNN, Logistic Regression, Random Forest, XGBoost, MLP) by accuracy, precision, recall, F1-score and AUC values. Although, the recall value obtained by MLP classifier is slightly lower than XGBoost, MLP is overall optimal classifier.</p>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>Comparison of different classifiers.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center"/>
<th align="center">Accuracy</th>
<th align="center">Precision</th>
<th align="center">Recall</th>
<th align="center">F1-score</th>
<th align="center">AUC</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Decision Tree</td>
<td align="center">0.8341</td>
<td align="center">0.8415</td>
<td align="center">0.8277</td>
<td align="center">0.8346</td>
<td align="center">0.8997</td>
</tr>
<tr>
<td align="center">KNN</td>
<td align="center">0.8171</td>
<td align="center">0.8289</td>
<td align="center">0.8040</td>
<td align="center">0.8163</td>
<td align="center">0.9028</td>
</tr>
<tr>
<td align="center">Logistic regression</td>
<td align="center">0.8424</td>
<td align="center">0.8422</td>
<td align="center">0.8469</td>
<td align="center">0.8445</td>
<td align="center">0.9197</td>
</tr>
<tr>
<td align="center">Random Forest</td>
<td align="center">0.8747</td>
<td align="center">0.8673</td>
<td align="center">0.8879</td>
<td align="center">0.8775</td>
<td align="center">0.9475</td>
</tr>
<tr>
<td align="center">XGBoost</td>
<td align="center">0.9276</td>
<td align="center">0.9130</td>
<td align="center">
<bold>0.9471</bold>
</td>
<td align="center">0.9298</td>
<td align="center">0.9808</td>
</tr>
<tr>
<td align="center">
<bold>MLP</bold>
</td>
<td align="center">
<bold>0.9461</bold>
</td>
<td align="center">
<bold>0.9494</bold>
</td>
<td align="center">0.9415</td>
<td align="center">
<bold>0.9455</bold>
</td>
<td align="center">
<bold>0.9859</bold>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The bold values represent the optimal value of the current column.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>Evaluation criteria for classifier performance mainly include Accuracy, F1-score, and the AUC value of the ROC curve. MLP had the highest Accuracy, F1-score, and AUC values in this experiment, indicating the most significant classification effect.</p>
</sec>
<sec id="s4-4">
<title>3.4 Comparison with other computational methods</title>
<p>To further evaluate the performance of our model on prediction task, we compared the SPALP model with other methods (SMALF (<xref ref-type="bibr" rid="B21">Liu et al., 2021</xref>), GBDT-LR (<xref ref-type="bibr" rid="B50">Zhou et al., 2020b</xref>), ABMDA (<xref ref-type="bibr" rid="B46">Zhao et al., 2019</xref>), HGANMDA (<xref ref-type="bibr" rid="B47">Zhengwei et al., 2022</xref>), SAEMDA (<xref ref-type="bibr" rid="B30">Wang et al., 2022</xref>), ELMDA (<xref ref-type="bibr" rid="B11">Gu and Li, 2023</xref>)).</p>
<p>SMALF uses stacked autoencoders for latent feature extraction and XGBoost for classification. GBDT-LR initially integrates miRNAs similarity and disease similarity to represent miRNAs-diseases relationship, then applies GBDT to extract new features, and finally, the logistic regression algorithm is used to predict miRNAs-diseases association. ABMDA utilizes a boosting algorithm integrated with many decision trees to mine miRNAs-diseases association and accurately calculate miRNAs-diseases similarity. HGANMDA uses a hierarchical attention network to learn the importance of different neighboring nodes and meta paths, and uses bilinear decoders to predict the association of miRNA diseases. SAEMDA uses stacked autoencoders to train and predict miRNA disease associations, while ELMDA extracts structural features of miRNA disease pairs and uses multi classifier voting to predict disease-related miRNAs.</p>
<p>In this section, we designed a comparative experiment to compare the above six models with the SPALP model. The experimental results are shown in <xref ref-type="fig" rid="F6">Figure 6</xref>. Using the data provided in HMDDv2.0, the experimental results showed that the SPALP model had the highest AUC value among these seven models, indicating that the SPALP model has good predictive ability for miRNA disease associations.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Comparison of different methods.</p>
</caption>
<graphic xlink:href="fgene-15-1369811-g006.tif"/>
</fig>
</sec>
<sec id="s4-5">
<title>3.5 Case study</title>
<p>To further validate the performance of SPALP, five different diseases are selected as case studies for predicting miRNA-disease associations in our experiments. They are Acute Myeloid Leukemia, Lupus Erythematosus, Cardiovascular disease, Stroke, Diabetes Mellitus(Type 2) respectively. Also, they are common diseases in the elderly population.</p>
<p>The SPALP model can predict unknown miRNAs disease associations by integrating known miRNAs disease associations and similarity information. Firstly, on the HMDD v2.0 database, the SPALP model is trained using known miRNAs disease associations. The association between all miRNAs and a certain disease is used as the test set. Then, the trained model is used to calculate the association miRNAs score for the aforementioned diseases, which is a continuous value; Finally, arrange in descending order based on the predicted score (probability value). After removing miRNAs known to be associated with these diseases in HMDD v2.0, output miRNAs predicted by the SPALP model to be associated with a certain disease. RNARelease V4.0 database can be obtained from <ext-link ext-link-type="uri" xlink:href="http://www.rnadisease.org/">http://www.rnadisease.org/&#x23;</ext-link> and can be used to validate the top 30 miRNAs.</p>
<p>As shown in <xref ref-type="table" rid="T5">Tables 5</xref>&#x2013;<xref ref-type="table" rid="T9">9</xref>, after validation in RNADisease V4.0, 27 out of the top 30 miRNAs predicted by the SPALP model that may be related to Lupus Erythematosus passed the validation, as shown in <xref ref-type="table" rid="T5">Table 5</xref>. In <xref ref-type="table" rid="T6">Table 6</xref>, except for hsa-mir-205, which was not found in the miRNAs database related to Act Myeloid Leukemia, all other miRNAs predicted by the SPALP model were found. However, we found the miRNA hsa-mir-205 in the miRNAs database related to Leukemia. Explain that hsa-mir-205 is associated with Leukemia.Among the top 30 predicted miRNAs, 29 can be validated for Cardiovascular disease through RNADisease V4.0. More interestingly, for Stroke and Diabetes Mellitus(Type 2), 30 miRNAs have been fully verified in the RNADisease V4.0 database. These results indicate that the SPALP model has a strong ability to predict the association between unknown miRNAs and diseases.</p>
<table-wrap id="T5" position="float">
<label>TABLE 5</label>
<caption>
<p>The top 30 miRNAs may be associated with Lupus Erythematosus.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Rank</th>
<th align="center">MiRNAs</th>
<th align="center">Evidence</th>
<th align="center">Rank</th>
<th align="center">MiRNAs</th>
<th align="center">Evidence</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">1</td>
<td align="center">hsa-mir-17</td>
<td align="center">RNADisease V4.0</td>
<td align="center">16</td>
<td align="center">hsa-mir-192</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">2</td>
<td align="center">hsa-mir-19b</td>
<td align="center">RNADisease V4.0</td>
<td align="center">17</td>
<td align="center">hsa-mir-93</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">3</td>
<td align="center">hsa-mir-429</td>
<td align="center">RNADisease V4.0</td>
<td align="center">18</td>
<td align="center">
<bold>hsa-mir-373</bold>
</td>
<td align="center">
<bold>unconfirmed</bold>
</td>
</tr>
<tr>
<td align="center">4</td>
<td align="center">hsa-mir-146a</td>
<td align="center">RNADisease V4.0</td>
<td align="center">19</td>
<td align="center">hsa-mir-21</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">5</td>
<td align="center">hsa-mir-101</td>
<td align="center">RNADisease V4.0</td>
<td align="center">20</td>
<td align="center">hsa-mir-92a</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">6</td>
<td align="center">hsa-mir-18a</td>
<td align="center">RNADisease V4.0</td>
<td align="center">21</td>
<td align="center">hsa-mir-30a</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">7</td>
<td align="center">hsa-mir-141</td>
<td align="center">RNADisease V4.0</td>
<td align="center">22</td>
<td align="center">
<bold>hsa-mir-106b</bold>
</td>
<td align="center">
<bold>unconfirmed</bold>
</td>
</tr>
<tr>
<td align="center">8</td>
<td align="center">hsa-mir-125b</td>
<td align="center">RNADisease V4.0</td>
<td align="center">23</td>
<td align="center">hsa-mir-145</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">9</td>
<td align="center">hsa-mir-205</td>
<td align="center">RNADisease V4.0</td>
<td align="center">24</td>
<td align="center">hsa-mir-19a</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">10</td>
<td align="center">hsa-mir-126</td>
<td align="center">RNADisease V4.0</td>
<td align="center">25</td>
<td align="center">hsa-mir-29a</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">11</td>
<td align="center">hsa-mir-200a</td>
<td align="center">RNADisease V4.0</td>
<td align="center">26</td>
<td align="center">hsa-mir-18b</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">12</td>
<td align="center">hsa-mir-142</td>
<td align="center">RNADisease V4.0</td>
<td align="center">27</td>
<td align="center">
<bold>hsa-mir-130a</bold>
</td>
<td align="center">
<bold>unconfirmed</bold>
</td>
</tr>
<tr>
<td align="center">13</td>
<td align="center">hsa-mir-29c</td>
<td align="center">RNADisease V4.0</td>
<td align="center">28</td>
<td align="center">hsa-mir-7</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">14</td>
<td align="center">hsa-mir-224</td>
<td align="center">RNADisease V4.0</td>
<td align="center">29</td>
<td align="center">hsa-mir-9</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">15</td>
<td align="center">hsa-mir-29b</td>
<td align="center">RNADisease V4.0</td>
<td align="center">30</td>
<td align="center">hsa-mir-302b</td>
<td align="center">RNADisease V4.0</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The bold values represent the optimal value of the current column.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<table-wrap id="T6" position="float">
<label>TABLE 6</label>
<caption>
<p>The top 30 miRNAs may be associated with Acute Myeloid Leukemia.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Rank</th>
<th align="center">MiRNAs</th>
<th align="center">Evidence</th>
<th align="center">Rank</th>
<th align="center">MiRNAs</th>
<th align="center">Evidence</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">1</td>
<td align="center">hsa-mir-17</td>
<td align="center">RNADisease V4.0</td>
<td align="center">16</td>
<td align="center">hsa-mir-1</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">2</td>
<td align="center">hsa-mir-18a</td>
<td align="center">RNADisease V4.0</td>
<td align="center">17</td>
<td align="center">hsa-mir-195</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">3</td>
<td align="center">hsa-mir-19b</td>
<td align="center">RNADisease V4.0</td>
<td align="center">18</td>
<td align="center">hsa-mir-21</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">4</td>
<td align="center">hsa-mir-19a</td>
<td align="center">RNADisease V4.0</td>
<td align="center">19</td>
<td align="center">hsa-mir-124</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">5</td>
<td align="center">hsa-mir-125b</td>
<td align="center">RNADisease V4.0</td>
<td align="center">20</td>
<td align="center">hsa-mir-32</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">6</td>
<td align="center">hsa-mir-20a</td>
<td align="center">RNADisease V4.0</td>
<td align="center">21</td>
<td align="center">hsa-mir-148a</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">7</td>
<td align="center">hsa-mir-92a</td>
<td align="center">RNADisease V4.0</td>
<td align="center">22</td>
<td align="center">hsa-mir-218</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">8</td>
<td align="center">hsa-mir-130a</td>
<td align="center">RNADisease V4.0</td>
<td align="center">23</td>
<td align="center">hsa-mir-199b</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">9</td>
<td align="center">hsa-mir-23a</td>
<td align="center">RNADisease V4.0</td>
<td align="center">24</td>
<td align="center">hsa-mir-133a</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">10</td>
<td align="center">hsa-mir-142</td>
<td align="center">RNADisease V4.0</td>
<td align="center">25</td>
<td align="center">hsa-mir-181a</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">11</td>
<td align="center">hsa-mir-373</td>
<td align="center">RNADisease V4.0</td>
<td align="center">26</td>
<td align="center">hsa-mir-363</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">12</td>
<td align="center">hsa-mir-203</td>
<td align="center">RNADisease V4.0</td>
<td align="center">27</td>
<td align="center">hsa-mir-30b</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">13</td>
<td align="center">hsa-mir-125a</td>
<td align="center">RNADisease V4.0</td>
<td align="center">28</td>
<td align="center">hsa-mir-432</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">14</td>
<td align="center">hsa-mir-130b</td>
<td align="center">RNADisease V4.0</td>
<td align="center">29</td>
<td align="center">hsa-mir-193b</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">15</td>
<td align="center">
<bold>hsa-mir-205</bold>
</td>
<td align="center">
<bold>unconfirmed</bold>
</td>
<td align="center">30</td>
<td align="center">hsa-mir-224</td>
<td align="center">RNADisease V4.0</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The bold values represent the optimal value of the current column.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<table-wrap id="T7" position="float">
<label>TABLE 7</label>
<caption>
<p>The top 30 miRNAs may be associated with Cardiovascular disease.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Rank</th>
<th align="center">MiRNAs</th>
<th align="center">Evidence</th>
<th align="center">Rank</th>
<th align="center">MiRNAs</th>
<th align="center">Evidence</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">1</td>
<td align="center">hsa-mir-20a</td>
<td align="center">RNADisease V4.0</td>
<td align="center">16</td>
<td align="center">hsa-mir-125a</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">2</td>
<td align="center">hsa-mir-17</td>
<td align="center">RNADisease V4.0</td>
<td align="center">17</td>
<td align="center">hsa-mir-23a</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">3</td>
<td align="center">hsa-mir-18a</td>
<td align="center">RNADisease V4.0</td>
<td align="center">18</td>
<td align="center">hsa-mir-30b</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">4</td>
<td align="center">hsa-mir-34a</td>
<td align="center">RNADisease V4.0</td>
<td align="center">19</td>
<td align="center">hsa-mir-148a</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">5</td>
<td align="center">hsa-mir-19a</td>
<td align="center">RNADisease V4.0</td>
<td align="center">20</td>
<td align="center">hsa-mir-143</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">6</td>
<td align="center">hsa-mir-155</td>
<td align="center">RNADisease V4.0</td>
<td align="center">21</td>
<td align="center">hsa-mir-125b</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">7</td>
<td align="center">hsa-mir-92a</td>
<td align="center">RNADisease V4.0</td>
<td align="center">22</td>
<td align="center">hsa-mir-10b</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">8</td>
<td align="center">hsa-mir-21</td>
<td align="center">RNADisease V4.0</td>
<td align="center">23</td>
<td align="center">hsa-mir-335</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">9</td>
<td align="center">hsa-mir-27a</td>
<td align="center">RNADisease V4.0</td>
<td align="center">24</td>
<td align="center">hsa-mir-195</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">10</td>
<td align="center">hsa-mir-205</td>
<td align="center">RNADisease V4.0</td>
<td align="center">25</td>
<td align="center">hsa-mir-99b</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">11</td>
<td align="center">hsa-mir-145</td>
<td align="center">RNADisease V4.0</td>
<td align="center">26</td>
<td align="center">hsa-mir-9</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">12</td>
<td align="center">hsa-mir-24</td>
<td align="center">RNADisease V4.0</td>
<td align="center">27</td>
<td align="center">hsa-mir-26b</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">13</td>
<td align="center">hsa-mir-126</td>
<td align="center">RNADisease V4.0</td>
<td align="center">
<bold>28</bold>
</td>
<td align="center">
<bold>hsa-mir-196b</bold>
</td>
<td align="center">
<bold>unconfirmed</bold>
</td>
</tr>
<tr>
<td align="center">14</td>
<td align="center">hsa-mir-31</td>
<td align="center">RNADisease V4.0</td>
<td align="center">29</td>
<td align="center">hsa-mir-210</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">15</td>
<td align="center">hsa-mir-93</td>
<td align="center">RNADisease V4.0</td>
<td align="center">30</td>
<td align="center">hsa-mir-127</td>
<td align="center">RNADisease V4.0</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The bold values represent the optimal value of the current column.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<table-wrap id="T8" position="float">
<label>TABLE 8</label>
<caption>
<p>The top 30 miRNAs may be associated with Stroke.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Rank</th>
<th align="center">MiRNAs</th>
<th align="center">Evidence</th>
<th align="center">Rank</th>
<th align="center">MiRNAs</th>
<th align="center">Evidence</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">1</td>
<td align="center">hsa-mir-124</td>
<td align="center">RNADisease V4.0</td>
<td align="center">16</td>
<td align="center">hsa-mir-122</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">2</td>
<td align="center">hsa-mir-34a</td>
<td align="center">RNADisease V4.0</td>
<td align="center">17</td>
<td align="center">hsa-let-7c</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">3</td>
<td align="center">hsa-mir-1</td>
<td align="center">RNADisease V4.0</td>
<td align="center">18</td>
<td align="center">hsa-mir-9</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">4</td>
<td align="center">hsa-mir-155</td>
<td align="center">RNADisease V4.0</td>
<td align="center">19</td>
<td align="center">hsa-mir-298</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">5</td>
<td align="center">hsa-mir-146a</td>
<td align="center">RNADisease V4.0</td>
<td align="center">20</td>
<td align="center">hsa-mir-17</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">6</td>
<td align="center">hsa-mir-181a</td>
<td align="center">RNADisease V4.0</td>
<td align="center">21</td>
<td align="center">hsa-mir-34c</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">7</td>
<td align="center">hsa-mir-362</td>
<td align="center">RNADisease V4.0</td>
<td align="center">22</td>
<td align="center">hsa-mir-126</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">8</td>
<td align="center">hsa-mir-497</td>
<td align="center">RNADisease V4.0</td>
<td align="center">23</td>
<td align="center">hsa-mir-125a</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">9</td>
<td align="center">hsa-let-7f</td>
<td align="center">RNADisease V4.0</td>
<td align="center">24</td>
<td align="center">hsa-mir-18b</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">10</td>
<td align="center">hsa-mir-145</td>
<td align="center">RNADisease V4.0</td>
<td align="center">25</td>
<td align="center">hsa-mir-338</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">11</td>
<td align="center">hsa-mir-20a</td>
<td align="center">RNADisease V4.0</td>
<td align="center">26</td>
<td align="center">hsa-mir-26a</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">12</td>
<td align="center">hsa-let-7i</td>
<td align="center">RNADisease V4.0</td>
<td align="center">27</td>
<td align="center">hsa-mir-494</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">13</td>
<td align="center">hsa-mir-148a</td>
<td align="center">RNADisease V4.0</td>
<td align="center">28</td>
<td align="center">hsa-mir-199b</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">14</td>
<td align="center">hsa-mir-210</td>
<td align="center">RNADisease V4.0</td>
<td align="center">29</td>
<td align="center">hsa-mir-23a</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">15</td>
<td align="center">hsa-mir-199a</td>
<td align="center">RNADisease V4.0</td>
<td align="center">30</td>
<td align="center">hsa-mir-222</td>
<td align="center">RNADisease V4.0</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T9" position="float">
<label>TABLE 9</label>
<caption>
<p>The top 30 miRNAs may be associated with Diabetes Mellitus (Type 2).</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Rank</th>
<th align="center">MiRNAs</th>
<th align="center">Evidence</th>
<th align="center">Rank</th>
<th align="center">MiRNAs</th>
<th align="center">Evidence</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">1</td>
<td align="center">hsa-mir-21</td>
<td align="center">RNADisease V4.0</td>
<td align="center">16</td>
<td align="center">hsa-mir-128</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">2</td>
<td align="center">hsa-mir-223</td>
<td align="center">RNADisease V4.0</td>
<td align="center">17</td>
<td align="center">hsa-mir-146b</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">3</td>
<td align="center">hsa-mir-146a</td>
<td align="center">RNADisease V4.0</td>
<td align="center">18</td>
<td align="center">hsa-mir-24</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">4</td>
<td align="center">hsa-mir-15a</td>
<td align="center">RNADisease V4.0</td>
<td align="center">19</td>
<td align="center">hsa-mir-320a</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">5</td>
<td align="center">hsa-mir-17</td>
<td align="center">RNADisease V4.0</td>
<td align="center">20</td>
<td align="center">hsa-mir-122</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">6</td>
<td align="center">hsa-mir-34a</td>
<td align="center">RNADisease V4.0</td>
<td align="center">21</td>
<td align="center">hsa-mir-483</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">7</td>
<td align="center">hsa-mir-29b</td>
<td align="center">RNADisease V4.0</td>
<td align="center">22</td>
<td align="center">hsa-mir-191</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">8</td>
<td align="center">hsa-mir-143</td>
<td align="center">RNADisease V4.0</td>
<td align="center">23</td>
<td align="center">hsa-mir-197</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">9</td>
<td align="center">hsa-mir-103a</td>
<td align="center">RNADisease V4.0</td>
<td align="center">24</td>
<td align="center">hsa-mir-221</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">10</td>
<td align="center">hsa-mir-486</td>
<td align="center">RNADisease V4.0</td>
<td align="center">25</td>
<td align="center">hsa-mir-144</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">11</td>
<td align="center">hsa-mir-20b</td>
<td align="center">RNADisease V4.0</td>
<td align="center">26</td>
<td align="center">hsa-mir-140</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">12</td>
<td align="center">hsa-mir-107</td>
<td align="center">RNADisease V4.0</td>
<td align="center">27</td>
<td align="center">hsa-mir-183</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">13</td>
<td align="center">hsa-mir-20a</td>
<td align="center">RNADisease V4.0</td>
<td align="center">28</td>
<td align="center">hsa-mir-182</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">14</td>
<td align="center">hsa-mir-106b</td>
<td align="center">RNADisease V4.0</td>
<td align="center">29</td>
<td align="center">hsa-mir-106a</td>
<td align="center">RNADisease V4.0</td>
</tr>
<tr>
<td align="center">15</td>
<td align="center">hsa-mir-29a</td>
<td align="center">RNADisease V4.0</td>
<td align="center">30</td>
<td align="center">hsa-mir-153</td>
<td align="center">RNADisease V4.0</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec id="s5" sec-type="conclusion">
<title>4 Conclusion</title>
<p>Along with the deepening of research on miRNAs, more and more evidence suggests that it plays a crucial role in the pathogenesis and progression of various diseases. Studying the association between miRNAs and diseases helps to understand disease mechanisms and provides new targets and strategies for early diagnosis, treatment, and prevention. By analyzing miRNAs expression profiles, scientists can identify miRNAs associated with disease states, providing clues for developing clinically potential biomarkers and treatment methods.</p>
<p>This study integrates deep learning techniques and provides a powerful model, SPALP. Due to the fact that the number of known associations in the miRNA disease association dataset only accounts for 0.0286% of the dataset, sparse autoencoders are very suitable for processing such data, effectively capturing key information in the data and extracting effective information. This model uses a sparse autoencoder to generate potential features of miRNA and diseases. By combining miRNA and disease similarity data with latent features to reconstruct features, and using MLP for training, unknown associations between miRNA and diseases can be predicted. We conducted biological verification on Lupus Erythematosus, Acute Myeloid Leukemia, Cardiovascular disease, Stroke, Diabetes Mellitus (Type 2), and output the first 30 miRNAs that may be related to the disease, of which 26, 29, 29, 30, and 30 passed the verification, proving that SPALP is a model with good performance. We hope to accelerate research on the association between miRNAs and diseases. Our approach provides new insights into the development of precision medicine and personalized treatment, aiming to provide more accurate guidance for disease diagnosis and treatment strategies in clinical practice.</p>
</sec>
</body>
<back>
<sec id="s6" sec-type="data-availability">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/Supplementary Material, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec id="s7">
<title>Author contributions</title>
<p>S-LS: Conceptualization, Data curation, Formal Analysis, Investigation, Methodology, Writing&#x2013;original draft. B-WZ: Conceptualization, Data curation, Formal Analysis, Writing&#x2013;original draft. S-ZL: Formal Analysis, Investigation, Visualization, Writing&#x2013;original draft. Y-HX: Conceptualization, Methodology, Writing&#x2013;original draft. AB: Writing&#x2013;review and editing. H-XL: Funding acquisition, Methodology, Resources, Supervision, Writing&#x2013;review and editing.</p>
</sec>
<sec id="s8" sec-type="funding-information">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research, authorship, and/or publication of this article. This research was funded by the National Natural Science Foundation of China (No. 62262019), the Hainan Provincial Natural Science Foundation of China (Nos 823RC488, 623RC481, and 620RC603), Foreign Young Talents Program of the State Bureau of Foreign Experts Ministry of Science and Technology China (No. QN2023034001).</p>
</sec>
<ack>
<p>Thanks to GenericDiagramming Platform for assisting.</p>
</ack>
<sec id="s9" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s10" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ai</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Ding</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Low rank matrix factorization algorithm based on multi-graph regularization for detecting drug-disease association</article-title>. <source>Ieee-Acm Trans. Comput. Biol. Bioinforma.</source> <volume>20</volume> (<issue>5</issue>), <fpage>3033</fpage>&#x2013;<lpage>3043</lpage>. <pub-id pub-id-type="doi">10.1109/TCBB.2023.3274587</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Baek</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Vill&#xe9;n</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Shin</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Camargo</surname>
<given-names>F. D.</given-names>
</name>
<name>
<surname>Gygi</surname>
<given-names>S. P.</given-names>
</name>
<name>
<surname>Bartel</surname>
<given-names>DPJN</given-names>
</name>
</person-group>: <article-title>The impact of microRNAs on protein output</article-title>. , <year>2008</year>, <volume>455</volume>(<issue>7209</issue>):<fpage>64</fpage>&#x2013;<lpage>71</lpage>. <pub-id pub-id-type="doi">10.1038/nature07242</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bartel</surname>
<given-names>D. P.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>MicroRNAs: genomics, biogenesis, mechanism, and function</article-title>. <source>Cell.</source> <volume>116</volume> (<issue>2</issue>), <fpage>281</fpage>&#x2013;<lpage>297</lpage>. <pub-id pub-id-type="doi">10.1016/s0092-8674(04)00045-5</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Chakrabortty</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Patton</surname>
<given-names>D. J.</given-names>
</name>
<name>
<surname>Smith</surname>
<given-names>B. F.</given-names>
</name>
<name>
<surname>Agarwal</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2023</year>) <source>miRNAs: potential as biomarkers and therapeutic targets for cancer</source>.</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Ye</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Yao</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>L.</given-names>
</name>
<etal/>
</person-group> (<year>2023b</year>). <article-title>RNADisease v4. 0: an updated resource of RNA-associated diseases, providing RNA-disease analysis, enrichment and prediction</article-title>. <source>Nucleic Acids Res.</source> <volume>51</volume> (<issue>D1</issue>), <fpage>D1397</fpage>&#x2013;<lpage>D1404</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkac814</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2023a</year>). <article-title>Potent antibiotic design via guided search from antibacterial activity evaluations</article-title>. <source>Bioinformatics</source> <volume>39</volume> (<issue>2</issue>), <fpage>btad059</fpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btad059</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>C.-C.</given-names>
</name>
<name>
<surname>Yin</surname>
<given-names>J. J. P.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Ensemble of decision tree reveals potential miRNA-disease associations</article-title>. <source>PLoS Comput. Biol.</source> <volume>15</volume> (<issue>7</issue>), <fpage>e1007209</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pcbi.1007209</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chuang</surname>
<given-names>J. C.</given-names>
</name>
<name>
<surname>Jones</surname>
<given-names>PAJPr</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>Epigenetics and microRNAs</article-title>. <source>Pediatr. Res.</source> <volume>61</volume> (<issue>7</issue>), <fpage>24R-29R</fpage>&#x2013;<lpage>29R</lpage>. <pub-id pub-id-type="doi">10.1203/pdr.0b013e3180457684</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Esquela-Kerscher</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>Slack FJJNrc: oncomirs&#x2014;microRNAs with a role in cancer</article-title>. <source>Nat. Rev. Cancer</source> <volume>6</volume> (<issue>4</issue>), <fpage>259</fpage>&#x2013;<lpage>269</lpage>. <pub-id pub-id-type="doi">10.1038/nrc1840</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gu</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>X. J.Bb</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Prediction of disease-related miRNAs by voting with multiple classifiers</article-title>. <source>BMC Bioinforma.</source> <volume>24</volume> (<issue>1</issue>), <fpage>177</fpage>. <pub-id pub-id-type="doi">10.1186/s12859-023-05308-x</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>He</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Ye</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Tetsuya</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zou</surname>
<given-names>Q.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>MRMD3.0: a Python tool and webserver for dimensionality reduction and data visualization via an ensemble strategy</article-title>. <source>J. Mol. Biol.</source> <volume>435</volume>, <fpage>168116</fpage>. <pub-id pub-id-type="doi">10.1016/j.jmb.2023.168116</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jiang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Jin</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Wang YJIjodm, bioinformatics: <bold>predicting human microRNA-disease associations based on support vector machine</bold>
</article-title>. <source>Int. J. Data Min. Bioinform.</source> <volume>8</volume> (<issue>3</issue>), <fpage>282</fpage>&#x2013;<lpage>293</lpage>. <pub-id pub-id-type="doi">10.1504/ijdmb.2013.056078</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jin</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Zeng</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Pang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>iDNA-ABF: multi-scale deep biological language learning model for the interpretable prediction of DNA methylations</article-title>. <source>Genome Biol.</source> <volume>23</volume> (<issue>1</issue>), <fpage>219</fpage>&#x2013;<lpage>223</lpage>. <pub-id pub-id-type="doi">10.1186/s13059-022-02780-1</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jordan</surname>
<given-names>M. I.</given-names>
</name>
<name>
<surname>Mitchell</surname>
<given-names>TMJS</given-names>
</name>
</person-group>: <article-title>Machine learning: trends, perspectives, and prospects</article-title>. , <year>2015</year>, <volume>349</volume>(<issue>6245</issue>):<fpage>255</fpage>&#x2013;<lpage>260</lpage>. <pub-id pub-id-type="doi">10.1126/science.aaa8415</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Kouznetsova</surname>
<given-names>V. L.</given-names>
</name>
<name>
<surname>Ifjcs</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Prevention: <bold>miRNA in machine-learning-based diagnostics of cancers</bold>
</article-title>. <source>Cancer Screen. Prev.</source> <volume>1</volume> (<issue>1</issue>), <fpage>32</fpage>&#x2013;<lpage>38</lpage>. <pub-id pub-id-type="doi">10.14218/csp.2021.00001</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>LeCun</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Bengio</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Hinton</surname>
<given-names>G.Jn</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Deep learning</article-title>. <source>Deep Learn.</source> <volume>521</volume> (<issue>7553</issue>), <fpage>436</fpage>&#x2013;<lpage>444</lpage>. <pub-id pub-id-type="doi">10.1038/nature14539</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lee</surname>
<given-names>R. C.</given-names>
</name>
<name>
<surname>Feinbaum</surname>
<given-names>R. L.</given-names>
</name>
<name>
<surname>Ambros</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>The</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>1993</year>). <article-title>The <italic>C. elegans</italic> heterochronic gene lin-4 encodes small RNAs with antisense complementarity to lin-14</article-title>. <source>Cell.</source> <volume>75</volume> (<issue>5</issue>), <fpage>843</fpage>&#x2013;<lpage>854</lpage>. <pub-id pub-id-type="doi">10.1016/0092-8674(93)90529-y</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>BioSeq-Diabolo: biological sequence similarity analysis using Diabolo</article-title>. <source>PLOS Comput. Biol.</source> <volume>19</volume> (<issue>6</issue>), <fpage>e1011214</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pcbi.1011214</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Pang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>BioSeq-BLM: a platform for analyzing DNA, RNA, and protein sequences based on biological language models</article-title>. <source>Nucleic Acids Res.</source> <volume>49</volume> (<issue>22</issue>), <fpage>e129</fpage>. <pub-id pub-id-type="doi">10.1093/nar/gkab829</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Nie</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Deng</surname>
<given-names>L. J.Bb</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>SMALF: miRNA-disease associations prediction based on stacked autoencoder and XGBoost</article-title>. <source>BMC Bioinforma.</source> <volume>22</volume> (<issue>1</issue>), <fpage>219</fpage>. <pub-id pub-id-type="doi">10.1186/s12859-021-04135-2</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Peng</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>Q.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Identification of miRNA&#x2013;disease associations via deep forest ensemble learning based on autoencoder</article-title>. <source>Brief. Bioinform.</source> <volume>23</volume> (<issue>3</issue>), <fpage>bbac104</fpage>. <pub-id pub-id-type="doi">10.1093/bib/bbac104</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lynam-Lennon</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Maher</surname>
<given-names>S. G.</given-names>
</name>
<name>
<surname>Reynolds</surname>
<given-names>J. V.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>The roles of microRNA in cancer and apoptosis</article-title>. <source>Biol. Rev. Camb Philos. Soc.</source> <volume>84</volume> (<issue>1</issue>), <fpage>55</fpage>&#x2013;<lpage>71</lpage>. <pub-id pub-id-type="doi">10.1111/j.1469-185X.2008.00061.x</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mendes</surname>
<given-names>N. D.</given-names>
</name>
<name>
<surname>Freitas</surname>
<given-names>A. T.</given-names>
</name>
<name>
<surname>Sagot</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>FJNar: <bold>current tools for the identification of miRNA genes and their targets</bold>
</article-title>. <source>Nucleic Acids Res.</source> <volume>37</volume> (<issue>8</issue>), <fpage>2419</fpage>&#x2013;<lpage>2433</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkp145</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Nemeth</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Bayraktar</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Ferracin</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Gajnrg</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2023</year>) <source>Non-coding RNAs in disease: from mechanisms to therapeutics</source>, <fpage>1</fpage>&#x2013;<lpage>22</lpage>.</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Qian</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Ding</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zou</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Multi-view kernel sparse representation for identification of membrane protein types</article-title>. <source>Ieee-Acm Trans. Comput. Biol. Bioinforma.</source> <volume>20</volume> (<issue>2</issue>), <fpage>1234</fpage>&#x2013;<lpage>1245</lpage>. <pub-id pub-id-type="doi">10.1109/TCBB.2022.3191325</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sayed</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Abdellatif</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>MicroRNAs in development and disease</article-title>. <source>Physiol. Rev.</source> <volume>91</volume> (<issue>3</issue>), <fpage>827</fpage>&#x2013;<lpage>887</lpage>. <pub-id pub-id-type="doi">10.1152/physrev.00006.2010</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Wan</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Teschendorff</surname>
<given-names>A. E.</given-names>
</name>
<name>
<surname>Zou</surname>
<given-names>Q.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Tumor origin detection with tissue-specific miRNA and DNA methylation markers</article-title>. <source>Bioinformatics</source> <volume>34</volume> (<issue>3</issue>), <fpage>398</fpage>&#x2013;<lpage>406</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btx622</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Pang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>IDP-Seq2Seq: identification of intrinsically disordered regions based on sequence to sequence learning</article-title>. <source>Bioinformatics</source> <volume>36</volume> (<issue>21</issue>), <fpage>5177</fpage>&#x2013;<lpage>5186</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btaa667</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>C.-C.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>T.-H.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>X. J. B. B.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Prediction of potential miRNA&#x2013;disease associations based on stacked autoencoder</article-title>. <source>Brief. Bioinform.</source> <volume>23</volume> (<issue>2</issue>), <fpage>bbac021</fpage>. <pub-id pub-id-type="doi">10.1093/bib/bbac021</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Cui</surname>
<given-names>Q. J. B.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Inferring the human microRNA functional similarity and functional network based on microRNA-associated diseases</article-title>, <volume>26</volume>(<issue>13</issue>):<fpage>1644</fpage>&#x2013;<lpage>1650</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btq241</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Ding</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Tiwari</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Muhammad</surname>
<given-names>K.</given-names>
</name>
<etal/>
</person-group> (<year>2023c</year>). <article-title>A deep multiple kernel learning-based higher-order fuzzy inference system for identifying DNA N4-methylcytosine sites</article-title>. <source>Inf. Sci.</source> <volume>630</volume>, <fpage>40</fpage>&#x2013;<lpage>52</lpage>. <pub-id pub-id-type="doi">10.1016/j.ins.2023.01.149</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Jin</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Yin</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>F.</given-names>
</name>
<etal/>
</person-group> (<year>2023a</year>). <article-title>DeepBIO: an automated and interpretable deep-learning platform for high-throughput biological sequence prediction, functional annotation and visualization analysis</article-title>. <source>Nucleic Acids Res.</source> <volume>51</volume> (<issue>7</issue>), <fpage>3017</fpage>&#x2013;<lpage>3029</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkad055</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zou</surname>
<given-names>Q.</given-names>
</name>
</person-group> (<year>2023b</year>). <article-title>Deep learning methods for bioinformatics and biomedicine</article-title>. <source>Methods San Diego, Calif.</source> <volume>216</volume>, <fpage>1</fpage>&#x2013;<lpage>2</lpage>. <pub-id pub-id-type="doi">10.1016/j.ymeth.2023.06.003</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhai</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Ding</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zou</surname>
<given-names>Q.</given-names>
</name>
</person-group> (<year>2023</year>) <source>SBSM-pro: support bio-sequence machine for proteins</source>.</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Meng</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Cai</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zeng</surname>
<given-names>X.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>Graph embedding and Gaussian mixture variational autoencoder network for end-to-end analysis of single-cell RNA sequencing data</article-title>. <source>Cell. Rep. Methods</source> <volume>3</volume>, <fpage>100382</fpage>. <pub-id pub-id-type="doi">10.1016/j.crmeth.2022.100382</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Hay</surname>
<given-names>B. A.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>MicroRNAs and the regulation of cell death</article-title>. <source>Trends Genet.</source> <volume>20</volume> (<issue>12</issue>), <fpage>617</fpage>&#x2013;<lpage>624</lpage>. <pub-id pub-id-type="doi">10.1016/j.tig.2004.09.010</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yan</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Lv</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Peng</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>sAMPpred-GAT: prediction of antimicrobial peptide by graph attention network and predicted peptide structure</article-title>. <source>Bioinformatics</source> <volume>39</volume> (<issue>1</issue>), <fpage>btac715</fpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btac715</pub-id>
</citation>
</ref>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>A Brief review on the mechanisms of miRNA regulation</article-title>. <source>J Genomics Proteomics Bioinforma.</source> <volume>7</volume> (<issue>04</issue>), <fpage>147</fpage>&#x2013;<lpage>154</lpage>. <pub-id pub-id-type="doi">10.1016/S1672-0229(08)60044-3</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zeng</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Kang</surname>
<given-names>S.-g.</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Lightstone</surname>
<given-names>F. C.</given-names>
</name>
<etal/>
</person-group> (<year>2022b</year>). <article-title>Deep generative molecular design reshapes drug discovery</article-title>. <source>Cell. Rep. Med.</source> <volume>4</volume>, <fpage>100794</fpage>. <pub-id pub-id-type="doi">10.1016/j.xcrm.2022.100794</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zeng</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Xiang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Nussinov</surname>
<given-names>R.</given-names>
</name>
<etal/>
</person-group> (<year>2022a</year>). <article-title>Accurate prediction of molecular properties and drug targets using a self-supervised image representation learning framework</article-title>. <source>Nat. Mach. Intell.</source> <volume>4</volume> (<issue>11</issue>), <fpage>1004</fpage>&#x2013;<lpage>1016</lpage>. <pub-id pub-id-type="doi">10.1038/s42256-022-00557-6</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Fang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Xie</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Gu</surname>
<given-names>GJIAT. C. B.</given-names>
</name>
</person-group> (<year>2022c</year>). <article-title>Bioinformatics: predicting miRNA-disease associations via node-level attention graph auto-encoder</article-title>. <source>IEEE/ACM Trans. Comput. Biol. Bioinform.</source> <volume>20</volume> (<issue>2</issue>), <fpage>1308</fpage>&#x2013;<lpage>1318</lpage>. <pub-id pub-id-type="doi">10.1109/TCBB.2022.3170843</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>H. Y.</given-names>
</name>
<name>
<surname>Zou</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Ju</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>C. G.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2022b</year>). <article-title>Distance-based support vector machine to predict DNA N6-methyladenine modification</article-title>. <source>Curr. Bioinforma.</source> <volume>17</volume> (<issue>5</issue>), <fpage>473</fpage>&#x2013;<lpage>482</lpage>. <pub-id pub-id-type="doi">10.2174/1574893617666220404145517</pub-id>
</citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Pan</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Shi</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Gu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>P450Rdb: a manually curated database of reactions catalyzed by cytochrome P450 enzymes</article-title>. <source>J. Adv. Res</source>. <pub-id pub-id-type="doi">10.1016/j.jare.2023.10.012</pub-id>
</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>Z. Y.</given-names>
</name>
<name>
<surname>Ning</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Ye</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Y. H.</given-names>
</name>
<name>
<surname>Futamura</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Sakurai</surname>
<given-names>T.</given-names>
</name>
<etal/>
</person-group> (<year>2022a</year>). <article-title>iLoc-miRNA: extracellular/intracellular miRNA prediction using deep BiLSTM with attention mechanism</article-title>. <source>Briefings Bioinforma.</source> <volume>23</volume>, <fpage>bbac395</fpage>. <pub-id pub-id-type="doi">10.1093/bib/bbac395</pub-id>
</citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Kang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Svetnik</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Warden</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Wilcock</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>David Smith</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>A machine learning approach to identify a circulating MicroRNA signature for Alzheimer disease</article-title>. <source>J. Appl. Lab. Med.</source> <volume>5</volume> (<issue>1</issue>), <fpage>15</fpage>&#x2013;<lpage>28</lpage>. <pub-id pub-id-type="doi">10.1373/jalm.2019.029595</pub-id>
</citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Yin</surname>
<given-names>J. J. B.</given-names>
</name>
</person-group>: <article-title>Adaptive boosting-based computational model for predicting potential miRNA-disease associations</article-title>. , <year>2019</year>, <volume>35</volume>(<issue>22</issue>):<fpage>4730</fpage>&#x2013;<lpage>4738</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btz297</pub-id>
</citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhengwei</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Tangbo</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Deshuang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>ZhuHong</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Ru</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Hierarchical graph attention network for miRNA-disease association prediction</article-title>. <source>J Mol. Ther. J. Am. Soc. Gene Ther.</source> <volume>30</volume> (<issue>4</issue>), <fpage>1775</fpage>&#x2013;<lpage>1786</lpage>. <pub-id pub-id-type="doi">10.1016/j.ymthe.2022.01.041</pub-id>
</citation>
</ref>
<ref id="B48">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Yin</surname>
<given-names>M.-M.</given-names>
</name>
<name>
<surname>Jiao</surname>
<given-names>C.-N.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>J.-X.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>C.-H.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>J.-XJIT. N. N.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>) <source>Predicting miRNA-disease associations through deep autoencoder with multiple kernel learning</source>.</citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Azim</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2020a</year>). <article-title>Predicting potential miRNA-disease associations by combining gradient boosting decision tree with logistic regression</article-title>. <source>J Comput. Biol. Chem.</source> <volume>85</volume>, <fpage>107200</fpage>. <pub-id pub-id-type="doi">10.1016/j.compbiolchem.2020.107200</pub-id>
</citation>
</ref>
<ref id="B50">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Azim</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>W. J.Cb</given-names>
</name>
</person-group> (<year>2020b</year>). <article-title>chemistry: <bold>predicting potential miRNA-disease associations by combining gradient boosting decision tree with logistic regression</bold>
</article-title>. <source>Comput. Biol. Chem.</source> <volume>85</volume>, <fpage>107200</fpage>. <pub-id pub-id-type="doi">10.1016/j.compbiolchem.2020.107200</pub-id>
</citation>
</ref>
<ref id="B51">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Hao</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2023b</year>). <article-title>Identifying disease-related microbes based on multi-scale variational graph autoencoder embedding Wasserstein distance</article-title>. <source>BMC Biol.</source> <volume>21</volume> (<issue>1</issue>), <fpage>294</fpage>. <pub-id pub-id-type="doi">10.1186/s12915-023-01796-8</pub-id>
</citation>
</ref>
<ref id="B52">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhu</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Yuan</surname>
<given-names>S. S.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>C. B.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Liao</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2023a</year>). <article-title>A first computational frame for recognizing heparin-binding protein</article-title>. <source>Diagn. (Basel)</source> <volume>13</volume> (<issue>14</issue>), <fpage>2465</fpage>. <pub-id pub-id-type="doi">10.3390/diagnostics13142465</pub-id>
</citation>
</ref>
<ref id="B53">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zou</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Xing</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Wei</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Gene2vec: gene subsequence embedding for prediction of mammalian N-6-methyladenosine sites from mRNA</article-title>. <source>Rna</source> <volume>25</volume> (<issue>2</issue>), <fpage>205</fpage>&#x2013;<lpage>218</lpage>. <pub-id pub-id-type="doi">10.1261/rna.069112.118</pub-id>
</citation>
</ref>
<ref id="B54">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zou</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Ren</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Cai</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Ding</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Deng</surname>
<given-names>K.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>Accurately identifying hemagglutinin using sequence information and machine learning methods</article-title>. <source>Front. Med. (Lausanne)</source> <volume>10</volume>, <fpage>1281880</fpage>. <pub-id pub-id-type="doi">10.3389/fmed.2023.1281880</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>