<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" dtd-version="1.3" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Artif. Intell.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Artificial Intelligence</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Artif. Intell.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2624-8212</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/frai.2026.1749205</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Computational understanding of non-coding RNA pairwise interactions</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Nicolini</surname> <given-names>Marco</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
<uri xlink:href="https://loop.frontiersin.org/people/3243093"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Stacchietti</surname> <given-names>Federico</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
<uri xlink:href="https://loop.frontiersin.org/people/3330972"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Casiraghi</surname> <given-names>Elena</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
<uri xlink:href="https://loop.frontiersin.org/people/1393524"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Valentini</surname> <given-names>Giorgio</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
<uri xlink:href="https://loop.frontiersin.org/people/2526175"/>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>AnacletoLab, Dipartimento di Informatica, Universit&#x000E1; degli Studi di Milano</institution>, <city>Milan</city>, <country country="it">Italy</country></aff>
<aff id="aff2"><label>2</label><institution>European Lab for Learning and Intelligent Systems (ELLIS)</institution>, <city>Milan</city>, <country country="it">Italy</country></aff>
<aff id="aff3"><label>3</label><institution>Environmental Genomics and Systems Biology Division, Lawrence Berkeley National Laboratory</institution>, <city>Berkeley, CA</city>, <country country="us">United States</country></aff>
<aff id="aff4"><label>4</label><institution>Department of Computer Science, Aalto University</institution>, <city>Espoo</city>, <country country="fi">Finland</country></aff>
<author-notes>
<corresp id="c001"><label>&#x0002A;</label>Correspondence: Giorgio Valentini, <email xlink:href="mailto:valentini@di.unimi.it">valentini@di.unimi.it</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-18">
<day>18</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>9</volume>
<elocation-id>1749205</elocation-id>
<history>
<date date-type="received">
<day>18</day>
<month>11</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>13</day>
<month>01</month>
<year>2026</year>
</date>
<date date-type="accepted">
<day>28</day>
<month>01</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2026 Nicolini, Stacchietti, Casiraghi and Valentini.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Nicolini, Stacchietti, Casiraghi and Valentini</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-18">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<p>Non-coding RNAs (ncRNAs) govern a vast network of regulatory interactions within the cells, yet their pairwise relationships remain largely uncharted due to the complexity of RNA structure and the limits of current experimental methods. We present <italic>CUPID</italic> (Computational Understanding of Pairwise Interactions in ncRNA Data), a deep learning framework that predicts ncRNA-ncRNA interactions directly from primary sequence information. <italic>CUPID</italic> uses embeddings from a pre-trained RNA language model combined with a feed-forward classifier to identify patterns linked to molecular pairing. This approach avoids reliance on thermodynamic models or manual feature design and, unlike previously proposed models, is able to generalize across different types of ncRNAs, including long non-coding, circular, micro-, and small nuclear RNAs. By learning the hidden rules that govern RNA recognition, <italic>CUPID</italic> provides a scalable tool for exploring ncRNA interaction networks and advancing our understanding of RNA-based regulation.</p></abstract>
<kwd-group>
<kwd>ncRNA-ncRNA interaction</kwd>
<kwd>deep learning</kwd>
<kwd>fine-tuning</kwd>
<kwd>artificial intelligence</kwd>
<kwd>machine learning</kwd>
<kwd>non-coding RNA</kwd>
<kwd>large language models</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was received for this work and/or its publication. This work was supported by the National Center for Gene Therapy and Drugs Based on RNA Technology-MUR (project no. CN 00000041) funded by the NextGeneration EU Program and by the National Plan for NRRP Complementary Investments (PNC) in the call for the funding of research initiatives for technologies and innovative trajectories in the health-project no. PNC0000003-AdvaNced Technologies for Human-centrEd Medicine (project acronym: ANTHEM). Computational resources at CINECA for this work have been funded by UNITECH INDACO, which is an HPC project at the state University of Milan.</funding-statement>
</funding-group>
<counts>
<fig-count count="8"/>
<table-count count="1"/>
<equation-count count="12"/>
<ref-count count="35"/>
<page-count count="14"/>
<word-count count="7246"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Medicine and Public Health</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<label>1</label>
<title>Introduction</title>
<p>Understanding RNA-RNA interactions is critical for deciphering the regulatory circuits that orchestrate gene expression, RNA processing, and signal transduction. Non-coding RNAs (ncRNAs), despite lacking protein-coding potential, play pivotal roles in these processes (<xref ref-type="bibr" rid="B2">Ali et al., 2021</xref>). However, experimental mapping of ncRNA interactions remains challenging due to the limitations of existing experimental and computational techniques (<xref ref-type="bibr" rid="B13">Lorenzi et al., 2021</xref>).</p>
<p>Methods such as Minimum Free Energy (MFE) calculations and accessibility-based models have been usually applied to predict RNA-RNA interactions. Tools like IntaRNA (<xref ref-type="bibr" rid="B14">Mann et al., 2017</xref>) estimate the interaction energy as &#x00394;<italic>G</italic><sub>total</sub> &#x0003D; &#x00394;<italic>G</italic><sub>duplex</sub>&#x0002B;&#x00394;<italic>G</italic><sub>accessibility</sub>, where the first term quantifies the energy released upon hybridization, and the second accounts for the cost of rendering binding regions accessible. Benchmark studies have demonstrated that accessibility-based algorithms can effectively differentiate native interactions from background noise (<xref ref-type="bibr" rid="B27">Umu and Gardner, 2017</xref>), yet these approaches rely on predefined parameters and simplified energy models. In parallel, experimental techniques such as RNA Antisense Purification (RAP-RNA) offer validation but remain limited by their high cost and labor intensity (<xref ref-type="bibr" rid="B8">Engreitz et al., 2014</xref>).</p>
<p>Advances in machine learning and graph-based modeling for biological data, including recent work on explainability and diffusion-based attention mechanisms, have motivated a surge of learning-driven approaches for predicting interactions across diverse molecular systems (<xref ref-type="bibr" rid="B11">Gliozzo et al., 2025</xref>; <xref ref-type="bibr" rid="B5">Cetin and Sefer, 2025</xref>; <xref ref-type="bibr" rid="B20">Sefer, 2025</xref>).</p>
<p>Machine learning methods, such as convolutional neural networks, deep forests and graph neural networks (<xref ref-type="bibr" rid="B3">Alipanahi et al., 2015</xref>; <xref ref-type="bibr" rid="B25">Tian et al., 2021</xref>; <xref ref-type="bibr" rid="B31">Wei et al., 2022</xref>) have been applied to RNA-protein interaction prediction, while graph-based approaches embed heterogeneous networks of ncRNAs and diseases using multigraph contrastive learning (<xref ref-type="bibr" rid="B23">Sun et al., 2025</xref>) or apply random-walk based graph representation learning techniques to predict non coding RNA interactions (<xref ref-type="bibr" rid="B26">Torgano et al., 2025</xref>).</p>
<p>While effective, these methods often rely on predefined feature extraction, graph structures, or supervised training, limiting their adaptability to novel ncRNA sequences.</p>
<p>In contrast, LLMs can directly learn from large corpora of proteins or RNA data (<xref ref-type="bibr" rid="B28">Valentini et al., 2023</xref>; <xref ref-type="bibr" rid="B35">Zhao et al., 2024</xref>; <xref ref-type="bibr" rid="B22">Shen et al., 2024</xref>; <xref ref-type="bibr" rid="B16">Nicolini et al., 2025a</xref>), capturing intricate interaction motifs beyond predefined energy models or graph-based constraints. Unlike thermodynamic models, which impose simplifying assumptions, LLMs infer interaction likelihoods from latent structural patterns, offering a flexible, data-driven approach. In particular, transformer-based foundation models can generate biologically meaningful representations directly from raw sequences, by exploiting large RNA sequence corpora (<xref ref-type="bibr" rid="B19">Sapoval et al., 2022</xref>; <xref ref-type="bibr" rid="B6">Chen et al., 2022</xref>; <xref ref-type="bibr" rid="B33">Yu et al., 2024</xref>). More in general several deep learning methods have been proposed to predict specific ncRNA interactions, using rna2vec pre-training and deep feature mining (<xref ref-type="bibr" rid="B34">Yu et al., 2022</xref>) or conditional random fields and graph convolutional networks (<xref ref-type="bibr" rid="B30">Wang et al., 2022</xref>), heterogeneous graph neural networks (<xref ref-type="bibr" rid="B12">Li et al., 2025</xref>) and convolutional neural networks combined with a Transformer Encoder (<xref ref-type="bibr" rid="B32">Yang et al., 2025</xref>) for the prediction of miRNA-lncRNA interactions.</p>
<p>We also recently proposed a deep neural network trained on embedded representations of a subset of ncRNAs obtained from the RNA-FM language model (<xref ref-type="bibr" rid="B22">Shen et al., 2024</xref>), achieving state-of-the-art results for predicting miRNA interactions with other ncRNA molecules (<xref ref-type="bibr" rid="B17">Nicolini et al., 2025b</xref>). However, our proposed model, like other models recently proposed in the literature (<xref ref-type="bibr" rid="B12">Li et al., 2025</xref>; <xref ref-type="bibr" rid="B32">Yang et al., 2025</xref>), is only able to predict specific ncRNA interactions (e.g., interactions with miRNAs). Furthermore, due to limitations on the maximum allowed sequence length of the underlying RNA-FM transformer, it can only process sequences shorter than approximately 1,000 nucleotides, thus limiting the model&#x00027;s application to relatively long ncRNAs (e.g., lncRNAs).</p>
<p>To overcome these limitations, we propose a novel Transformed-based deep learning model, that, differently from previous models proposed in literature, is able to predict a large range of ncRNA interactions, including long non-coding RNA (lncRNA), circular RNA (circRNA), microRNA (miRNA), small nuclear RNA (snRNA), small nucleolar RNA (snoRNA), Small Cajal body-specific RNAs (scaRNAs), small cytoplasmic RNAs (scRNA), and other types of ncRNAs. Moreover, by adopting GenerRNA (<xref ref-type="bibr" rid="B35">Zhao et al., 2024</xref>) to encode RNA sequences, our model can process full-length ncRNA sequences (up to 4,096 nucleotides) without truncation, thus significantly enlarging the set of ncRNAs that can be processed by the model.</p>
<p>We hypothesize that LLM-based contextual embeddings provide a rich representation for ncRNA interaction prediction, circumventing the limitations of manual feature engineering or predefined structural graphs. We reasoned that GenerRNA (<xref ref-type="bibr" rid="B35">Zhao et al., 2024</xref>), pretrained on a large corpus of ncRNA sequences using a masked language modeling objective, can capture long-range interactions of ncRNA molecules, thus facilitating downstream tasks such as ncRNA interaction prediction.</p>
<p>Our <italic>CUPID</italic> model (Computational Understanding of Pairwise Interactions in ncRNA Data), predicts ncRNA interactions using only sequence information. <italic>CUPID</italic> extracts embeddings from a pre-trained ncRNA language model and feeds a dense feed-forward neural network (FFNN) to automatically learn intricate sequence interaction features. This design circumvents the need for explicit thermodynamic parameterization and manually engineered features, offering a scalable and efficient alternative for uncovering novel regulatory interactions (<xref ref-type="bibr" rid="B9">Fabbri et al., 2019</xref>).</p></sec>
<sec id="s2">
<label>2</label>
<title>Methods</title>
<sec>
<label>2.1</label>
<title>Dataset</title>
<p>Our dataset comprises a subset of multispecies ncRNA interaction pairs from RNA-KG (<xref ref-type="bibr" rid="B4">Cavalleri et al., 2024</xref>).<xref ref-type="fn" rid="fn0003"><sup>1</sup></xref></p>
<p>The RNA-KG integrates physical and functional interactions between different types of ncRNAs, and their relationships with other biomolecules (genes and proteins) and chemicals, as well as with biomedical concepts coded in the Gene Ontology (<xref ref-type="bibr" rid="B1">Aleksander et al., 2023</xref>), the Human Phenotype Ontology (<xref ref-type="bibr" rid="B10">Gargano et al., 2024</xref>), Mondo (<xref ref-type="bibr" rid="B29">Vasilevsky et al., 2025</xref>), and other bio-medical ontologies related to the &#x0201C;RNA world.&#x0201D;</p>
<p>In particular, we extracted RNA&#x02013;RNA edges from RNA-KG by selecting only relations annotated as <monospace>interacts-with</monospace>. In RNA-KG, <monospace>interacts-with</monospace> denotes experimentally supported <italic>physical</italic> RNA&#x02013;RNA interactions, and we therefore excluded other relation types encoding functional associations (e.g., regulatory links, co-expression, or disease associations). The <monospace>interacts-with</monospace> edges integrated in RNA-KG originate from multiple underlying curated interaction databases. <xref ref-type="fig" rid="F1">Figure 1</xref> presents an overview of the main RNA entities and their relationships available in the the RNA-KG. Readers may refer to the RNA-KG reference (<xref ref-type="bibr" rid="B4">Cavalleri et al., 2024</xref>) for the complete list of contributing sources and evidence provenance.</p>
<fig position="float" id="F1">
<label>Figure 1</label>
<caption><p>Simplified representation of the RNA-KG meta-graph, focused on ncRNAs and their interactions.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-09-1749205-g0001.tif">
<alt-text content-type="machine-generated">Simplified RNA-KG meta-graph centered on ncRNAs and their interactions. Nodes represent RNA classes, genes, proteins, ontological terms, and related entities, color-coded by category (coding RNA, lncRNA, sncRNA, viral RNA, ontology, others). Directed edges denote labeled molecular relationships (e.g., interacts with, represses expression of, molecularly interacts with).
</alt-text>
</graphic>
</fig>
<p>We filtered the dataset to retain only sequences that fit within the GenerRNA (<xref ref-type="bibr" rid="B35">Zhao et al., 2024</xref>)&#x00027;s token limit (approximately 4,096 nucleotides), since Byte Pair Encoding (BPE) compresses raw nucleotide sequences, allowing longer sequences to fit within the model&#x00027;s constraints. After applying this length filter, the dataset contains:</p>
<list list-type="bullet">
<list-item><p>101, 088 interaction pairs (down from an initial 130, 310 pairs).</p></list-item>
<list-item><p>11, 212 unique sequences (selected from 19, 624 potential sequences) belonging to 9 different RNA molecule types: long non-coding RNA (lncRNA), circular RNA (circRNA), microRNA (miRNA), small nuclear RNA (snRNA), small nucleolar RNA (snoRNA), Small Cajal body-specific RNAs (scaRNAs), small cytoplasmic RNAs (scRNA), not (better) classified non coding RNA molecules (ncRNA), and pseudo RNA.<xref ref-type="fn" rid="fn0004"><sup>2</sup></xref></p></list-item>
</list>
<p>In the following, we denote the set of length-filtered molecules as</p>
<disp-formula id="E1"><mml:math id="M1"><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>S</mml:mi></mml:mstyle></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mtext>&#x02003;</mml:mtext><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mo>&#x02026;</mml:mo><mml:mo>,</mml:mo><mml:mo>|</mml:mo><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>S</mml:mi></mml:mstyle></mml:mrow><mml:mo>|</mml:mo><mml:mo>,</mml:mo></mml:mrow></mml:math></disp-formula>
<p>where the type of each molecule <inline-formula><mml:math id="M2"><mml:mrow><mml:mi>s</mml:mi><mml:mo>&#x02208;</mml:mo><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>S</mml:mi></mml:mstyle></mml:mrow></mml:mrow></mml:math></inline-formula> is given by &#x003D5;(<italic>s</italic>), i.e. <inline-formula><mml:math id="M3"><mml:mi>&#x003D5;</mml:mi><mml:mo>:</mml:mo><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>S</mml:mi></mml:mstyle></mml:mrow><mml:mo>&#x02192;</mml:mo><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>T</mml:mi></mml:mstyle></mml:mrow></mml:math></inline-formula> represents a mapping of a ncRNA sequence <inline-formula><mml:math id="M4"><mml:mi>s</mml:mi><mml:mo>&#x02208;</mml:mo><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>S</mml:mi></mml:mstyle></mml:mrow></mml:math></inline-formula> to its ncRNA type <inline-formula><mml:math id="M5"><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>T</mml:mi></mml:mstyle></mml:mrow></mml:math></inline-formula>, e.g. miRNA, lncRNA or any other ncRNA type.</p>
<p>The identity of an interaction pair is solely determined by its constituent molecules, regardless of order; that is,</p>
<disp-formula id="E2"><mml:math id="M6"><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>.</mml:mo></mml:mrow></mml:math></disp-formula>
<p>The type of an interaction (<italic>s</italic><sub><italic>i</italic></sub>, <italic>s</italic><sub><italic>j</italic></sub>) with <italic>s</italic><sub><italic>i</italic></sub>&#x02260;<italic>s</italic><sub><italic>j</italic></sub> and <inline-formula><mml:math id="M7"><mml:mrow><mml:msub><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>S</mml:mi></mml:mstyle></mml:mrow></mml:mrow></mml:math></inline-formula> is determined by the types of the ncRNA <italic>s</italic><sub><italic>i</italic></sub> and <italic>s</italic><sub><italic>j</italic></sub> theirselves, regardless of their order:</p>
<disp-formula id="E3"><mml:math id="M8"><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>&#x003D5;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mi>&#x003D5;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>&#x003D5;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mi>&#x003D5;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math></disp-formula>
<p>For instance, possible types of ncRNA interactions are miRNA-lncRNA or miRNA-circRNA. Assuming that interacting ncRNA pairs of different types exhibit distinct specificities that the model should learn, we reasoned that types with negligible sample sizes might introduce noise rather than valuable information. Therefore, the set of interaction pairs used in this work is obtained by further filtering the dataset of interacting pairs to remove interacting pair types represented by fewer than 100 samples, resulting in 10,644 unique sequences composing 99,841 interacting pairs. <xref ref-type="fig" rid="F2">Figure 2</xref> shows the distribution of the different types of ncRNA interactions.</p>
<fig position="float" id="F2">
<label>Figure 2</label>
<caption><p>Distribution of ncRNA interactions pairs in the filtered interaction set. Rows: first (left) molecule type; Columns: right molecule type.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-09-1749205-g0002.tif">
<alt-text content-type="machine-generated">Distribution of ncRNA interaction pairs in the filtered dataset. Cell color intensity reflects couple counts (e.g., lncRNA&#x02013;miRNA 54,773; miRNA&#x02013;pseudo 16,853; snoRNA&#x02013;snoRNA 4,322), with dashes indicating no observed interactions.</alt-text>
</graphic>
</fig>
</sec>
<sec>
<label>2.2</label>
<title>Data augmentation</title>
<p>To address the issues due to the limited cardinality of the available training data, especially for specific types of ncRNA interactions (e.g., snRNA-miRNA or miRNA-circRNA), we employ a data augmentation strategy that effectively increases the dataset size by a factor of 4. For each original training instance represented as a pair of interacting ncRNA (<italic>s</italic><sub><italic>i</italic></sub>, <italic>s</italic><sub><italic>j</italic></sub>) we generate three additional augmented instances:</p>
<list list-type="order">
<list-item><p>Molecule Order Reversal: Swap the order of the molecules: (<italic>s</italic><sub><italic>j</italic></sub>, <italic>s</italic><sub><italic>i</italic></sub>).</p></list-item>
<list-item><p>Sequence Flipping: Reverse the nucleotide order in both molecules (denoted by the superscript <italic>F</italic>): <inline-formula><mml:math id="M9"><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>F</mml:mi></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mi>F</mml:mi></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula>.</p></list-item>
<list-item><p>Combined Augmentation: Reverse both the molecule order and the nucleotide sequences: <inline-formula><mml:math id="M10"><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mi>F</mml:mi></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>F</mml:mi></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula>.</p></list-item>
</list>
<p>Thus, if the original dataset contains <italic>N</italic> instances, the augmented dataset becomes: <italic>N</italic><sub>aug</sub> &#x0003D; 4<italic>N</italic> (<xref ref-type="supplementary-material" rid="SM1">Supplementary Figure S1</xref>). This augmentation introduces invariance to both the order and orientation of sequences, thereby enabling the model to better capture the underlying biological patterns and improving its robustness against input variability.</p>
<p>In order to avoid leakage between training and test sets, data augmentation is performed after splitting the dataset.</p>
</sec>
<sec>
<label>2.3</label>
<title>Negative examples generation</title>
<p>In our dataset, only positive non-coding RNA-RNA interactions are explicitly provided, and they occur with varying frequencies.</p>
<p>To effectively train <italic>CUPID</italic> , we generated negative examples for each interaction pair type by matching the frequency distribution of the positive interactions. Specifically, negative examples were generated under the assumption that any pair of ncRNA sequences drawn from the set of unique sequences that is not observed as a positive interaction constitutes a possible negative instance.</p>
<p>Let <inline-formula><mml:math id="M11"><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>S</mml:mi></mml:mstyle></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x02026;</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>N</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:math></inline-formula> be the set of unique ncRNA sequences present in the dataset. Denote by</p>
<disp-formula id="E4"><mml:math id="M12"><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>P</mml:mi></mml:mstyle></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x02223;</mml:mo><mml:msub><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>S</mml:mi></mml:mstyle></mml:mrow><mml:mtext class="textrm" mathvariant="normal">interact</mml:mtext></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></disp-formula>
<p>the set of all positive ncRNA-ncRNA interactions. Then, the set of all possible ncRNA pairs is given by <inline-formula><mml:math id="M13"><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>S</mml:mi></mml:mstyle></mml:mrow><mml:mo>&#x000D7;</mml:mo><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>S</mml:mi></mml:mstyle></mml:mrow></mml:math></inline-formula> (excluding self-interactions).</p>
<p>The set of <italic>potential negatives</italic> is defined as:</p>
<disp-formula id="E5"><mml:math id="M14"><mml:mrow><mml:msub><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>N</mml:mi></mml:mstyle></mml:mrow></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">potential</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x02208;</mml:mo><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>S</mml:mi></mml:mstyle></mml:mrow><mml:mo>&#x000D7;</mml:mo><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>S</mml:mi></mml:mstyle></mml:mrow><mml:mo>&#x02223;</mml:mo><mml:msub><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02260;</mml:mo><mml:msub><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow><mml:mo>\</mml:mo><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>P</mml:mi></mml:mstyle></mml:mrow><mml:mo>.</mml:mo></mml:mrow></mml:math></disp-formula>
<p><italic>Negative sampling procedure</italic> To generate the negative samples for each interacting pair type, we corrupt its tuples. In other words, given a positive pair (<italic>s</italic><sub><italic>i</italic></sub>, <italic>s</italic><sub><italic>j</italic></sub>) with type (&#x003D5;(<italic>s</italic><sub><italic>i</italic></sub>), &#x003D5;(<italic>s</italic><sub><italic>j</italic></sub>)), we keep the first molecule <italic>s</italic><sub><italic>i</italic></sub> fixed and sample <inline-formula><mml:math id="M15"><mml:mrow><mml:msup><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msup><mml:mo>&#x02208;</mml:mo><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>S</mml:mi></mml:mstyle></mml:mrow></mml:mrow></mml:math></inline-formula> such that:</p>
<disp-formula id="E6"><mml:math id="M16"><mml:mrow><mml:msup><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msup><mml:mo>&#x02260;</mml:mo><mml:msub><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mtext>&#x02003;</mml:mtext><mml:mi>&#x003D5;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mi>&#x003D5;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mtext>&#x02003;</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msup><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x02209;</mml:mo><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>P</mml:mi></mml:mstyle></mml:mrow></mml:mrow></mml:math></disp-formula>
<p>In this way we avoid generating negatives between ncRNA types that never interact (e.g., scaRNA and lncRNA).</p>
<p>Because we generate negatives for each positive pair (<italic>s</italic><sub><italic>i</italic></sub>, <italic>s</italic><sub><italic>j</italic></sub>) by corrupting the right molecule while keeping the same type pair (&#x003D5;(<italic>s</italic><sub><italic>i</italic></sub>), &#x003D5;(<italic>s</italic><sub><italic>j</italic></sub>)), the negative set preserves the interaction <italic>type-pair</italic> distribution of the positives in expectation (and approximately in practice, up to rejection of candidates already present as positives or previously sampled negatives).</p>
<p>For each positive edge, we selected <italic>n</italic> negative edges, in order to control the imbalance between positive and negative edges in the testing phase (we set <italic>n</italic> &#x0003D; 20 in our experiments).</p>
<p><italic>Negative sampling algorithm</italic> The negative sampling algorithm is detailed in <xref ref-type="other" rid="algorithm_1">Algorithm 1</xref>. In our implementation, we set <italic>n</italic> &#x0003D; 20. Note that, since the condition at line 5 of the algorithm cannot be always guaranteed, it is likely that the number of negatives <italic>n</italic> &#x02264; 20. In our experiments we set <italic>n</italic> &#x0003D; 20.</p>
<statement content-type="algorithm" id="algorithm_1">
<label>Algorithm 1</label>
<p>Negative sampling algorithm.
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-09-1749205-g0009.tif"/>
</p>
</statement>
</sec>
<sec>
<label>2.4</label>
<title>Model architecture</title>
<sec>
<label>2.4.1</label>
<title>The overall <italic>CUPID</italic> architecture</title>
<p>Our model follows a two-stage pipeline, as illustrated in <xref ref-type="fig" rid="F3">Figure 3</xref>. It first extracts ncRNA sequence embeddings using a pre-trained ncRNA Language Model (GenerRNA; <xref ref-type="bibr" rid="B35">Zhao et al., 2024</xref>) and then processes these embeddings through a Feed-Forward Neural Network (FFNN) to predict interaction probabilities.</p>
<fig position="float" id="F3">
<label>Figure 3</label>
<caption><p>High-level <italic>CUPID</italic> architecture schema.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-09-1749205-g0003.tif">
<alt-text content-type="machine-generated">High-level CUPID architecture. Two ncRNA sequences are independently encoded by the GenerRNA module; resulting embeddings are combined through a feed-forward network to predict interaction probability.</alt-text>
</graphic>
</fig>
<p>The GenerRNA architecture mimics the GPT-2-medium model (<xref ref-type="bibr" rid="B18">Radford et al., 2019</xref>), and is composed of 24 stacked transformer-decoder layers, each incorporating a self-attention mechanism that models pairwise interactions among all positions in its input sequence. GenerRNA uses a context window of 1,024 tokens, corresponding to input sequences with a length of approximately 4,096 nucleotides coded through byte pair encoding (<xref ref-type="bibr" rid="B21">Sennrich et al., 2016</xref>). Note that this maximum length permits the encoding of large RNA molecules. This decoder-only Transformer architecture operates in an autoregressive manner, predicting the subsequent token given the previous ones. Both the input and output of the model are represented as tokens, which are encoded and decoded by a trained tokenizer using byte pair encoding. A special token (EOS) is used to delimit sequences, indicating the start and end of each sequence.</p>
<p>Each transformer block is fed with a input of size <italic>L</italic>&#x000D7;<italic>H</italic>, thus allowing to process RNA sequences having up to <italic>L</italic> tokens, each one represented through a <italic>H</italic>-dimensional real vector, with <italic>L</italic> &#x0003D; <italic>H</italic> &#x0003D; 1, 024, and outputs a latent representation with the same dimensionality for each input token. For each input sequence, the block employs a multi-head self-attention mechanism with 16 attention heads. This is followed by an &#x0201C;Add &#x00026; Norm&#x0201D; sub-block, which applies residual addition and layer normalization. Subsequently, a feed-forward sub-layer expands the hidden states from 1,024 to 4,096 dimensions, applies a non-linear activation (ReLU), and then projects them back to 1,024 dimensions. Another &#x0201C;Add &#x00026; Norm&#x0201D; sub-block is applied after the feed-forward network, and finally, the block produces an output matrix <bold>X</bold>&#x02208;&#x0211D;<sup><italic>L</italic>&#x000D7;<italic>H</italic></sup>. A schematic diagram of this block is reported in the <xref ref-type="fig" rid="F4">Figure 4</xref>.</p>
<fig position="float" id="F4">
<label>Figure 4</label>
<caption><p>High-level architecture of a GenerRNA block.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-09-1749205-g0004.tif">
<alt-text content-type="machine-generated">High-level architecture of a GenerRNA block. Input token embeddings pass through multi-head self-attention (16 heads), followed by add &#x00026; normalization, a feed-forward network with ReLU activation, and final normalization, producing output embeddings.</alt-text>
</graphic>
</fig>
</sec>
<sec>
<label>2.4.2</label>
<title>Pooling techniques</title>
<p>The <italic>i</italic><sup><italic>th</italic></sup> row of matrix <bold>X</bold> is a latent representation <inline-formula><mml:math id="M27"><mml:mrow><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:mn>1024</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula> of the <italic>i</italic><sup><italic>th</italic></sup> token. To obtain a fixed-length embedding for the entire sequence, we tested two types of pooling over the sequence (i.e., across the <italic>H</italic> tokens), as well as their concatenation:</p>
<list list-type="bullet">
<list-item><p>Average (<italic>Avg</italic>) Pooling: obtained as the mean of the embeddings of all the tokens: <inline-formula><mml:math id="M28"><mml:mrow><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>e</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">avg</mml:mtext></mml:mstyle></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>L</mml:mi></mml:mrow></mml:mfrac><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>L</mml:mi></mml:mrow></mml:munderover><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>x</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>.</mml:mo></mml:mrow></mml:math></inline-formula></p></list-item>
<list-item><p>Maximum (<italic>Max</italic>) Pooling: Compute the element-wise maximum over all token embeddings: <inline-formula><mml:math id="M29"><mml:mrow><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>e</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">max</mml:mtext></mml:mstyle></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:munder><mml:mrow><mml:mi>c</mml:mi><mml:mi>m</mml:mi><mml:mi>a</mml:mi><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>&#x02208;</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x02026;</mml:mo><mml:mi>L</mml:mi></mml:mrow></mml:munder><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x02026;</mml:mo><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mi>H</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>, where <italic>cmax</italic> is the columnwise <italic>max</italic> operator, and <italic>x</italic><sub><italic>ij</italic></sub> are the elements of the <bold>X</bold> embedding matrix.</p></list-item>
<list-item><p>Concatenation of [<italic>Avg, Max</italic>]: Combine both pooled representations into a single embedding vector: <inline-formula><mml:math id="M30"><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>e</mml:mtext></mml:mstyle><mml:mo>=</mml:mo><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>e</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">avg</mml:mtext></mml:mstyle></mml:mrow></mml:msub><mml:mo>;</mml:mo><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>e</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mstyle class="text"><mml:mtext class="textrm" mathvariant="normal">max</mml:mtext></mml:mstyle></mml:mrow></mml:msub></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn><mml:mo>,</mml:mo><mml:mn>048</mml:mn></mml:mrow></mml:msup><mml:mo>.</mml:mo></mml:mrow></mml:math></inline-formula></p></list-item>
</list>
<p>These pooling strategies are schematically depicted in <xref ref-type="fig" rid="F5">Figure 5</xref>.</p>
<fig position="float" id="F5">
<label>Figure 5</label>
<caption><p>Pooling embedding strategies.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-09-1749205-g0005.tif">
<alt-text content-type="machine-generated">Pooling embedding strategies. An input sequence embedding in &#x0211D;^(L&#x000D7;H) undergoes average and max pooling, generating an embedding vector for average and another embedding vector for max, which are concatenated into a vector in &#x211D;^(2H) for downstream prediction.</alt-text>
</graphic>
</fig>
<p>The embedded representation of a candidate interacting ncRNA pair is composed by the concatenation of the embeddings of the two interacting molecules.</p></sec>
<sec>
<label>2.4.3</label>
<title>The classification unit</title>
<p>To predict the interaction we used the pooled embeddings of the RNA sequences as input to a dense Feed Forward Neural Network (FFNN), having the following architecture:</p>
<list list-type="bullet">
<list-item><p><bold>Input layer dimension:</bold> 1,024 for Avg and Max-pooling embedding strategies, 2,048 when the embedding of the input molecule is obtained by concatenating the embeddings obtained by AVG and Max pooling,</p></list-item>
<list-item><p><bold>Hidden layers:</bold> 4 hidden layers with 1,024 neurons each and ReLU activation function,</p></list-item>
<list-item><p><bold>Output layer:</bold> 1 neuron with sigmoid activation function.</p></list-item>
</list>
<p>To train the network we applied the following hyper-parameters:<italic>Learning Rate:</italic> &#x003B7; &#x0003D; 5 &#x000D7; 10<sup>&#x02212;4</sup> with a linear warm-up phase of 4 epochs, followed by cosine decay. <italic>Epochs:</italic> 50 epochs with early stopping (patience of 10 epochs). The model with the best validation loss is selected (e.g., if the lowest validation loss is observed at epoch 35, then early stopping is triggered at epoch 45, and the model from epoch 35 is used).<italic>Batch Size:</italic> 512. <italic>Dropout Rate:</italic> 0.2. <italic>Optimizer:</italic> Adam. <italic>Loss Function:</italic> Binary Cross-Entropy.</p>
<p>Training and validation loss curves were monitored over epochs to assess model convergence and to avoid potential overfitting by early stopping.</p></sec>
<sec>
<label>2.4.4</label>
<title>Mini-batch balancing</title>
<p>Due to the imbalance in our dataset we adopted a training strategy designed to prevent the model from learning predominantly from the negatives. To address this, we constructed mini-batches that contain a controlled mix of positive and negative examples. Recall that our training set is composed of the set of positive interaction pairs, <inline-formula><mml:math id="M31"><mml:mrow><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>P</mml:mi></mml:mstyle></mml:mrow><mml:mo>,</mml:mo><mml:mo>|</mml:mo><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>P</mml:mi></mml:mstyle></mml:mrow><mml:mo>|</mml:mo><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mo>&#x0002B;</mml:mo></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>, and the set of negative interaction pairs <inline-formula><mml:math id="M32"><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>N</mml:mi></mml:mstyle></mml:mrow></mml:math></inline-formula>, with <inline-formula><mml:math id="M33"><mml:mrow><mml:mo>|</mml:mo><mml:mrow><mml:mstyle mathvariant="script"><mml:mi>N</mml:mi></mml:mstyle></mml:mrow><mml:mo>|</mml:mo><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mo>-</mml:mo></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>n</mml:mi><mml:msub><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mo>&#x0002B;</mml:mo></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>, as detailed in Section 2.3. Each mini-batch <italic>B</italic> of size <italic>m</italic> is formed by randomly selecting <italic>m</italic><sub><italic>p</italic></sub> positive examples (using a uniform distribution with replacement) and <italic>m</italic><sub><italic>n</italic></sub> negative examples (using a uniform distribution without replacement). The ratio of negatives within each mini-batch is defined by</p>
<disp-formula id="E7"><mml:math id="M34"><mml:mrow><mml:mi>r</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:msub><mml:mrow><mml:mi>m</mml:mi></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>m</mml:mi></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>m</mml:mi></mml:mrow><mml:mrow><mml:mi>p</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:mo>,</mml:mo><mml:mtext>&#x02003;with&#x02003;</mml:mtext><mml:msub><mml:mrow><mml:mi>m</mml:mi></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>m</mml:mi></mml:mrow><mml:mrow><mml:mi>p</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>m</mml:mi><mml:mo>.</mml:mo></mml:mrow></mml:math></disp-formula>
<p>Here, <italic>r</italic> can vary between 0 and 1. A value of <italic>r</italic> &#x0003D; 0.7 implies that 70% of the mini-batch consists of negatives. The choice to sample positives with replacement is driven by their limited number, ensuring sufficient representation even in large batches, whereas sampling negatives without replacement allows for a broader coverage of these more abundant examples.</p>
</sec>
</sec>
<sec>
<label>2.5</label>
<title>Experimental evaluation</title>
<sec>
<label>2.5.1</label>
<title>Data preparation and splitting</title>
<p>In all our experiments the negative examples were sampled according to the relative frequency of the interacting pair types according to the procedure described in Section 2.3 using a negative:positive ratio equal to 20:1.</p>
<p>The dataset was partitioned into stratified training and test sets (train:test ratio = 90:10). The training set was further split into a stratified set for training (80% of interaction pairs) and the remaining 20% for validation. The validation set was used for early stopping and for tuning the classification threshold via maximization of the Matthews correlation coefficient (MCC; <xref ref-type="bibr" rid="B15">Matthews, 1975</xref>) on the validation data.</p></sec>
<sec>
<label>2.5.2</label>
<title>Evaluation metrics</title>
<p>To comprehensively assess model&#x00027;s performance, we computed a range of evaluation metrics, encompassing both threshold-dependent and threshold-independent measures. Specifically, we first evaluated standard binary classification metrics, including accuracy, balanced accuracy (to account for class imbalance), precision, recall, F1 score, AUROC (Area Under the Receiver Operating Characteristic Curve), and AUPRC (Area Under the Precision-Recall Curve). In addition to these overall metrics, we conducted a stratified analysis based on interacting pair types, computing the aforementioned measures separately for each pair type.</p>
<p>Let <italic>y</italic><sub><italic>i</italic></sub>&#x02208;{0, 1} be the ground-truth label and <inline-formula><mml:math id="M35"><mml:msub><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:math></inline-formula> the predicted probability for sample <italic>i</italic>. Given a decision threshold <italic>t</italic>, we define <inline-formula><mml:math id="M36"><mml:msub><mml:mrow><mml:mi>&#x00177;</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mspace width="2.77695pt" class="tmspace"/><mml:mo>&#x021D4;</mml:mo><mml:mspace width="2.77695pt" class="tmspace"/><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>p</mml:mi></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02265;</mml:mo><mml:mi>t</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:math></inline-formula> and the confusion matrix counts:</p>
<disp-formula id="E8"><mml:math id="M37"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mtext class="textrm" mathvariant="normal">TP</mml:mtext><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munder class="msub"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:munder></mml:mstyle><mml:mstyle mathvariant="bold"><mml:mtext>I</mml:mtext></mml:mstyle><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x02227;</mml:mo><mml:msub><mml:mrow><mml:mi>&#x00177;</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mtext class="textrm" mathvariant="normal">TN</mml:mtext><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munder class="msub"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:munder></mml:mstyle><mml:mstyle mathvariant="bold"><mml:mtext>I</mml:mtext></mml:mstyle><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>0</mml:mn><mml:mo>&#x02227;</mml:mo><mml:msub><mml:mrow><mml:mi>&#x00177;</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mtext class="textrm" mathvariant="normal">FP</mml:mtext><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munder class="msub"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:munder></mml:mstyle><mml:mstyle mathvariant="bold"><mml:mtext>I</mml:mtext></mml:mstyle><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>0</mml:mn><mml:mo>&#x02227;</mml:mo><mml:msub><mml:mrow><mml:mi>&#x00177;</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mtext class="textrm" mathvariant="normal">FN</mml:mtext><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munder class="msub"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:munder></mml:mstyle><mml:mstyle mathvariant="bold"><mml:mtext>I</mml:mtext></mml:mstyle><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x02227;</mml:mo><mml:msub><mml:mrow><mml:mi>&#x00177;</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>.</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>Threshold-dependent metrics are then computed as:</p>
<disp-formula id="EQ9"><mml:math id="M38"><mml:mtable class="eqnarray" columnalign="center"><mml:mtr><mml:mtd><mml:mtext class="textrm" mathvariant="normal">Accuracy(rate of correctly predicted instances)</mml:mtext></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mtext class="textrm" mathvariant="normal">TP</mml:mtext><mml:mo>&#x0002B;</mml:mo><mml:mtext class="textrm" mathvariant="normal">TN</mml:mtext></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">TP</mml:mtext><mml:mo>&#x0002B;</mml:mo><mml:mtext class="textrm" mathvariant="normal">TN</mml:mtext><mml:mo>&#x0002B;</mml:mo><mml:mtext class="textrm" mathvariant="normal">FP</mml:mtext><mml:mo>&#x0002B;</mml:mo><mml:mtext class="textrm" mathvariant="normal">FN</mml:mtext></mml:mrow></mml:mfrac><mml:mo>,</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mtext class="textrm" mathvariant="normal">Recall(proportion of TP w.r.t. all positive samples)</mml:mtext></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mtext class="textrm" mathvariant="normal">TP</mml:mtext></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">TP</mml:mtext><mml:mo>&#x0002B;</mml:mo><mml:mtext class="textrm" mathvariant="normal">FN</mml:mtext></mml:mrow></mml:mfrac><mml:mo>,</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mtext class="textrm" mathvariant="normal">Specificity(proportion of TN w.r.t. all negative samples)</mml:mtext><mml:mo>=</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mfrac><mml:mrow><mml:mtext class="textrm" mathvariant="normal">TN</mml:mtext></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">TN</mml:mtext><mml:mo>&#x0002B;</mml:mo><mml:mtext class="textrm" mathvariant="normal">FP</mml:mtext></mml:mrow></mml:mfrac><mml:mo>,</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mtext class="textrm" mathvariant="normal">Precision(proportion of TP among predicted positives)</mml:mtext></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mtext class="textrm" mathvariant="normal">TP</mml:mtext></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">TP</mml:mtext><mml:mo>&#x0002B;</mml:mo><mml:mtext class="textrm" mathvariant="normal">FP</mml:mtext></mml:mrow></mml:mfrac><mml:mo>,</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mtext class="textrm" mathvariant="normal">F1(harmonic mean of precision and recall)</mml:mtext><mml:mo>=</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>2</mml:mn><mml:mo>&#x000B7;</mml:mo><mml:mfrac><mml:mrow><mml:mtext class="textrm" mathvariant="normal">Precision</mml:mtext><mml:mo>&#x000B7;</mml:mo><mml:mtext class="textrm" mathvariant="normal">Recall</mml:mtext></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">Precision</mml:mtext><mml:mo>&#x0002B;</mml:mo><mml:mtext class="textrm" mathvariant="normal">Recall</mml:mtext></mml:mrow></mml:mfrac><mml:mo>,</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mtext class="textrm" mathvariant="normal">BalancedAcc(accuracy balanced by class proportion)</mml:mtext><mml:mo>=</mml:mo></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mfrac><mml:mrow><mml:mtext class="textrm" mathvariant="normal">Recall</mml:mtext><mml:mo>&#x0002B;</mml:mo><mml:mtext class="textrm" mathvariant="normal">Specificity</mml:mtext></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:mfrac><mml:mo>.</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(1)</label></disp-formula>
<p>In our work the threshold <italic>t</italic> is chosen on the validation set by maximizing the MCC coefficient, which provides a balanced single-score summary that incorporates TP, TN, FP, and FN, and is therefore less sensitive than accuracy to class imbalance:</p>
<disp-formula id="E10"><mml:math id="M40"><mml:mtext class="textrm" mathvariant="normal">MCC</mml:mtext><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mtext class="textrm" mathvariant="normal">TP</mml:mtext><mml:mo>&#x000B7;</mml:mo><mml:mtext class="textrm" mathvariant="normal">TN</mml:mtext><mml:mo>-</mml:mo><mml:mtext class="textrm" mathvariant="normal">FP</mml:mtext><mml:mo>&#x000B7;</mml:mo><mml:mtext class="textrm" mathvariant="normal">FN</mml:mtext></mml:mrow><mml:mrow><mml:msqrt><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext class="textrm" mathvariant="normal">TP</mml:mtext><mml:mo>&#x0002B;</mml:mo><mml:mtext class="textrm" mathvariant="normal">FP</mml:mtext></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext class="textrm" mathvariant="normal">TP</mml:mtext><mml:mo>&#x0002B;</mml:mo><mml:mtext class="textrm" mathvariant="normal">FN</mml:mtext></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext class="textrm" mathvariant="normal">TN</mml:mtext><mml:mo>&#x0002B;</mml:mo><mml:mtext class="textrm" mathvariant="normal">FP</mml:mtext></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext class="textrm" mathvariant="normal">TN</mml:mtext><mml:mo>&#x0002B;</mml:mo><mml:mtext class="textrm" mathvariant="normal">FN</mml:mtext></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msqrt></mml:mrow></mml:mfrac><mml:mo>.</mml:mo></mml:math></disp-formula>
<p>Threshold-independent metrics summarize performance across all thresholds. The ROC curve plots TPR(<italic>t</italic>) &#x0003D; Recall(<italic>t</italic>) versus FPR(<italic>t</italic>) &#x0003D; FP(<italic>t</italic>)/(FP(<italic>t</italic>)&#x0002B;TN(<italic>t</italic>)), and AUROC is the area under this curve. The precision&#x02013;recall curve plots Precision(<italic>t</italic>) versus Recall(<italic>t</italic>), and AUPRC is its area; under strong class imbalance, AUPRC is often more informative than AUROC, with a random baseline equal to the positive prevalence <inline-formula><mml:math id="M41"><mml:mi>&#x003C0;</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:msub><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mo>&#x0002B;</mml:mo></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mo>&#x0002B;</mml:mo></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mo>-</mml:mo></mml:mrow></mml:msub></mml:mrow></mml:mfrac></mml:math></inline-formula>.</p></sec>
<sec>
<label>2.5.3</label>
<title>Training hyper-parameters and baselines for comparison</title>
<p>The hyper-parameters and configurations used for training the FFNN are reported in Section 2.4.3. Moreover, training and validation loss curves were monitored over epochs to assess model convergence and to avoid potential overfitting by early stopping.</p>
<p>Hyperparameter selection was performed in preliminary experiments on a reduced subset of the training/validation interaction pairs using a grid-search strategy. We varied the number of hidden layers in {2, 4, 6}, the dropout rate in {0.1, 0.2}, and the batch size in {16, 512, 1, 024}. For each configuration, models were trained using the same optimization settings described in Section 2.4.3, and the final model was selected as the configuration that maximized validation AUPRC. No hyperparameters were tuned on the test set.</p>
<p>Besides the random classifier, whose expected performance are AUROC = 0.5 and AUPRC = 0.047, we employed the IntaRNA method (<xref ref-type="bibr" rid="B14">Mann et al., 2017</xref>) as a baseline for comparison. IntaRNA estimates interaction energy. While interaction energy can be thresholded to obtain binary predictions, which enable the computation of accuracy, balanced accuracy, precision, recall, and F1 scores, we opted to limit the comparison to AUROC and AUPRC. These metrics provide a more robust and threshold-independent evaluation of predictive performance, ensuring a fair comparison across models.</p></sec></sec>
</sec>
<sec sec-type="results" id="s3">
<label>3</label>
<title>Results</title>
<p>We assessed the contribution of the data-augmentation strategy and the pooling operation used to obtain molecule-level embeddings. <xref ref-type="table" rid="T1">Table 1</xref> summarizes AUROC and AUPRC results across all configurations, including a baseline random classifier, IntaRNA and <italic>CUPID</italic> models. For <italic>CUPID</italic> we compared results obtained with (Data-aug) and without (No-Data-aug) data augmentation, considering different pooling techniques, i.e., concatenation (concat), maximum (Max), and average (Avg) pooling.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>Comparison of AUROC and AUPRC across different experimental settings. Random baseline refers to the expected performance of the random classifiers.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Methods</bold></th>
<th valign="top" align="center"><bold>AUROC</bold></th>
<th valign="top" align="center"><bold>AUPRC</bold></th>
</tr>
</thead>
<tbody> <tr>
<td valign="top" align="left">Random baseline</td>
<td valign="top" align="center">0.5</td>
<td valign="top" align="center">0.047</td>
</tr>
<tr>
<td valign="top" align="left">IntaRNA</td>
<td valign="top" align="center">0.544</td>
<td valign="top" align="center">0.055</td>
</tr>
<tr>
<td valign="top" align="left" colspan="3"><italic><bold>CUPID</bold></italic></td>
</tr>
<tr>
<td valign="top" align="left">No-Data-aug</td>
<td valign="top" align="center">0.658</td>
<td valign="top" align="center">0.078</td>
</tr>
<tr>
<td valign="top" align="left">Data-aug-Max</td>
<td valign="top" align="center">0.810</td>
<td valign="top" align="center">0.147</td>
</tr>
<tr>
<td valign="top" align="left">Data-aug-concat</td>
<td valign="top" align="center">0.862</td>
<td valign="top" align="center">0.222</td>
</tr>
<tr>
<td valign="top" align="left">Data-aug-Avg</td>
<td valign="top" align="center"><bold>0.919</bold></td>
<td valign="top" align="center"><bold>0.364</bold></td>
</tr></tbody>
</table>
<table-wrap-foot>
<p><italic>CUPID</italic> models are sorted in increasing order of both AUROC and AUPRC. Bold values denote the best-performing model.</p>
</table-wrap-foot>
</table-wrap>
<sec>
<label>3.1</label>
<title>Random baselines</title>
<p>With a random classifier we can expect an AUROC &#x0003D; 0.5, while the estimated baseline AUPRC is:</p>
<disp-formula id="E11"><mml:math id="M42"><mml:mrow><mml:mtext class="textrm" mathvariant="normal">Baseline AUPRC</mml:mtext><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:msub><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mo>&#x0002B;</mml:mo></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mo>&#x0002B;</mml:mo></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mo>-</mml:mo></mml:mrow></mml:msub></mml:mrow></mml:mfrac></mml:mrow></mml:math></disp-formula>
<p>where <italic>N</italic><sub>&#x0002B;</sub> is the number of positive samples, and <italic>N</italic><sub>&#x02212;</sub> is the number of negative samples. Given the 1:20 ratio of positive to negative samples, the AUPRC baseline in the performed experiments is:</p>
<disp-formula id="E12"><mml:math id="M43"><mml:mrow><mml:mtext class="textrm" mathvariant="normal">Baseline AUPRC</mml:mtext><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mn>1</mml:mn><mml:mo>&#x0002B;</mml:mo><mml:mn>20</mml:mn></mml:mrow></mml:mfrac><mml:mo>&#x02248;</mml:mo><mml:mn>0</mml:mn><mml:mo>.</mml:mo><mml:mn>0476</mml:mn><mml:mo>.</mml:mo></mml:mrow></mml:math></disp-formula>
<p>Our top-performing model achieves an AUPRC of 0.364, corresponding to a <italic>7.65-fold improvement</italic> (0.364/0.0476). This margin quantifies the difficulty of the task: the extreme class imbalance renders precision&#x02013;recall a stringent metric, and the observed gains indicate that the model extracts interaction-relevant information that is well above chance expectations.</p>
</sec>
<sec>
<label>3.2</label>
<title>IntaRNA results</title>
<p><xref ref-type="fig" rid="F6">Figure 6</xref> reports IntaRNA performance on the augmented test set. In this setting, IntaRNA shows limited predictive power. Its scoring function relies on thermodynamic and accessibility components (e.g., hybridization energy and site accessibility), and in our experiments we used the default parameterization. Given the heterogeneity of ncRNA classes and sequence lengths in our benchmark, improved performance would likely require careful, class-specific calibration of both energy- and accessibility-related settings. Moreover, while IntaRNA is a general thermodynamics- and accessibility-based framework and is not inherently tied to a specific organism, it was originally introduced and most extensively evaluated in bacterial sRNA&#x02013;mRNA interaction settings; consequently, when applied to heterogeneous ncRNA&#x02013;ncRNA interactions (including long lncRNAs and diverse eukaryotic classes), its default parameterization may be suboptimal without additional tuning.</p>
<fig position="float" id="F6">
<label>Figure 6</label>
<caption><p>Results for IntaRNA results with augmented test set. <bold>(a)</bold> Distribution of predicted probabilities for negative and positive interactions; <bold>(b)</bold> AUPRC; <bold>(c)</bold> AUROC.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-09-1749205-g0006.tif">
<alt-text content-type="machine-generated">IntaRNA results on the augmented test set. (a) Distribution of predicted scores for negative and positive interactions; (b) precision&#x02013;recall curve (AUPRC = 0.055); (c) ROC curve (AUROC = 0.544), with performance close to the diagonal, indicating limited discrimination.</alt-text>
</graphic>
</fig>
</sec>
<sec>
<label>3.3</label>
<title><italic>CUPID</italic> results</title>
<p><xref ref-type="table" rid="T1">Table 1</xref> compares all <italic>CUPID</italic> configurations. We first evaluated a <italic>CUPID</italic> model without augmentation, using concatenation of average and max pooling. <xref ref-type="fig" rid="F7">Figure 7</xref> shows the results obtained without data augmentation and with concatenated average-max pooling. The overall AUPRC results on the test set are relatively low (<xref ref-type="fig" rid="F7">Figure 7c</xref>), even if a certain learning is witnessed by the AUROC largely above 0.5 (<xref ref-type="fig" rid="F7">Figure 7f</xref>), and by the distribution of the predicted interaction probabilities for negative and positive examples (<xref ref-type="fig" rid="F7">Figure 7c</xref>), with probabilities for positives relatively higher with respect to negatives. Nevertheless, the relatively flat trend of the training loss reveals a certain difficulty of the model to learn the data. This is reflected also in the confusion matrix where most of negative examples (70%) are misclassified (<xref ref-type="fig" rid="F7">Figure 7e</xref>) and in the degradation of the AUPRC performance between validation (<xref ref-type="fig" rid="F7">Figure 7a</xref>) and test (<xref ref-type="fig" rid="F7">Figure 7d</xref>) data. By looking at specific ncRNA interactions, for certain interaction types (e.g., snRNA-snoRNA) we obtained good results across the different metrics, but for several ncRNA interactions (e.g., miRNA-lncRNA, miRNA-miRNA, and lncRNA-snoRNA) we achieved poor results, with AUPRC below 0.1 (<xref ref-type="fig" rid="F7">Figure 7g</xref>). Summarizing <xref ref-type="fig" rid="F7">Figure 7</xref>, shows that with this setting <italic>CUPID</italic> can provide a certain discrimination between positive and negative interactions (<xref ref-type="fig" rid="F7">Figure 7c</xref>), but its precision&#x02013;recall and ROC curves indicate a limited separation between positive and negative examples (<xref ref-type="fig" rid="F7">Figures 7d</xref>, <xref ref-type="fig" rid="F7">f</xref>).</p>
<fig position="float" id="F7">
<label>Figure 7</label>
<caption><p><italic>CUPID</italic> results with concatenated pooling, and without using augmented data. <bold>(a)</bold> Overall precision recall curve on the validation set including all the type of ncRNA interactions; <bold>(b)</bold> training and validation loss across epochs; <bold>(c)</bold> distribution of the <italic>CUPID</italic> predicted probabilities on negative and positive examples on the test set; <bold>(d)</bold> overall precision recall curve on the test set including all the type of ncRNA interactions; <bold>(e)</bold> confusion matrix on the test set; <bold>(f)</bold> ROC curve on the test set including all the type of ncRNA interactions; <bold>(g)</bold> <italic>CUPID</italic> results on the test set across different types on ncRNA interactions (rows) for different types of metrics (columns).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-09-1749205-g0007.tif">
<alt-text content-type="machine-generated">CUPID results with concatenated pooling and no data augmentation. (a) Precision&#x02013;recall curve on the validation set across ncRNA types (AUPRC = 0.135); (b) training and validation loss over 40 epochs; (c) predicted probability distributions for negatives and positives on the test set; (d) overall test precision&#x02013;recall curve (AUPRC = 0.078); (e) test confusion matrix; (f) test ROC curve (AUROC = 0.658); (g) heatmap of evaluation metrics (accuracy, balanced accuracy, precision, recall, F1, AUROC, AUPRC) across RNA pair types.</alt-text>
</graphic>
</fig>
<p>Introducing data augmentation consistently improves performance (<xref ref-type="table" rid="T1">Table 1</xref>). <xref ref-type="fig" rid="F8">Figure 8</xref> shows the results obtained with data augmentation and average pooling. The AUPRC is more than 4 times larger than without data augmentation (<xref ref-type="fig" rid="F8">Figure 8d</xref> and <xref ref-type="table" rid="T1">Table 1</xref>). Enlarging the size of training data by data augmentation allows the model to better learn the training data, as witnessed by the training loss that continues to decrease across epochs (<xref ref-type="fig" rid="F8">Figure 8b</xref>). This results in a clear separation between the scores predicted for positive and negative examples&#x02014;note that the probabilities predicted for negatives are compressed toward zero while for most positives are largely above 0.7 (even if with several outliers for both positive and negative examples, <xref ref-type="fig" rid="F8">Figure 8b</xref>). The confusion matrix also confirms that the model with augmented data can better predict negative examples (<xref ref-type="fig" rid="F8">Figure 8e</xref>); AUPRC (<xref ref-type="fig" rid="F8">Figure 8d</xref>) significantly improves, and AUROC is larger than 0.9 (<xref ref-type="fig" rid="F8">Figure 8f</xref>). Analyzing results for each specific ncRNA interaction, we can observe a significant improvement across all the considered metrics, with AUROC in most cases larger than 0.9, except for circRNA-miRNA, miRNA-scRNA, miRNA-snRNA, and miRNA-scaRNA (even if for these two last ncRNA interactions values are close to 0.9 (<xref ref-type="fig" rid="F8">Figure 8g</xref>).</p>
<fig position="float" id="F8">
<label>Figure 8</label>
<caption><p><italic>CUPID</italic> results with average pooling and using augmented data. <bold>(a)</bold> Overall precision recall curve on the validation set including all the type of ncRNA interactions; <bold>(b)</bold> training and validation loss across epochs; <bold>(c)</bold> distribution of the <italic>CUPID</italic> predicted probabilities on negative and positive examples on the test set; <bold>(d)</bold> overall precision recall curve on the test set including all the type of ncRNA interactions; <bold>(e)</bold> confusion matrix on the test set; <bold>(f)</bold> ROC curve on the test set including all the type of ncRNA interactions; <bold>(g)</bold> <italic>CUPID</italic> results on the test set across different types on ncRNA interactions (rows) for different types of metrics (columns).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-09-1749205-g0008.tif">
<alt-text content-type="machine-generated">CUPID results with average pooling and augmented data. (a) Validation precision&#x02013;recall curve (AUPRC = 0.601); (b) training and validation loss over 32 epochs; (c) predicted probability distributions for negatives and positives on the test set; (d) overall test precision&#x02013;recall curve (AUPRC = 0.364); (e) test confusion matrix; (f) test ROC curve (AUROC = 0.919); (g) heatmap of evaluation metrics (accuracy, balanced accuracy, precision, recall, F1, AUROC, AUPRC) across RNA interaction types.</alt-text>
</graphic>
</fig>
<p>These results confirm that data augmentation is crucial to improve results for two main reasons: at first the model has training data enough to better generalize; second, improves generalization leveraging molecule order and orientation, two symmetries that are not guaranteed to be learned from limited training data. Augmentation effectively enforces these invariances, reducing overfitting to sequence presentation and mitigating the scarcity of positive examples.</p>
<p>Pooling strategy has a direct impact on the stability of the molecule-level embedding. Average pooling&#x02013;yielding a smoothed representation over the full sequence&#x02014;achieves the highest AUROC and AUPRC (<xref ref-type="fig" rid="F8">Figure 8</xref>) compared to max pooling (<xref ref-type="supplementary-material" rid="SM1">Supplementary Figure S2</xref>) and concatenation pooling (<xref ref-type="supplementary-material" rid="SM1">Supplementary Figure S3</xref>). This indicates that interaction-relevant information is not confined to a small set of token embeddings but arises from distributed features along the sequence. Max pooling, in contrast, appears sensitive to local outliers and overly compresses positional variability, while concatenation does not provide additional benefits once augmentation is introduced. The results suggest that, for ncRNA interaction prediction, the aggregate signal across nucleotides is more informative than isolated high-activation sites.</p></sec>
</sec>
<sec sec-type="discussion" id="s4">
<label>4</label>
<title>Discussion</title>
<p>The results shown in this work demonstrate that RNA sequence-only inference can recover interaction signals across diverse ncRNA classes. The best-performing configuration reaches AUROC values above 0.9 on the test set, despite operating without structural, evolutionary, or thermodynamic information. This suggests that pretrained RNA language models encode latent features associated with intermolecular recognition. These features may reflect statistical regularities of pairing propensities and local compositional biases captured during pretraining, even in the absence of explicit structural supervision.</p>
<p>From a methodological standpoint, two contributions appear essential. First, the augmentation scheme addresses symmetries inherent to the problem. Because interacting RNAs can be presented in either order, and because sequence orientation can vary, enforcing invariance to these transformations is critical for robust generalization. Data augmentation also increases the number of examples available for training, thus improving the generalization performance of the model. Second, average pooling provides stable embeddings for ncRNA sequences. For molecules such as lncRNAs&#x02014;whose functional elements are dispersed and whose lengths vary over orders of magnitude&#x02014;summarizing the full sequence avoids overemphasis on isolated positions and instead captures global contextual tendencies. Moreover, to our knowledge, <italic>CUPID</italic> is the first model able to predict a large set of ncRNA interactions, and in principle can be applied to predict any ncRNA interaction.</p>
<p>The limitations observed for IntaRNA highlight the difference between energy-based and representation-based approaches. Thermodynamic models rely on explicit structural motifs and accessibility assumptions, which may not generalize to long, structured, or poorly conserved ncRNAs. In contrast, <italic>CUPID</italic> does not attempt to reconstruct secondary structure but leverages contextual sequence statistics learned from large corpora. These complementary perspectives suggest potential synergies: coupling language-model embeddings with coarse structural predictions could refine the discrimination between spurious and functionally relevant pairing events.</p>
<p>Despite these promising results, we note that the resources used to train <italic>CUPID</italic> are limited in size and exhibits a strong imbalance across interaction types. Although our type-constrained negative sampling preserves the empirical distribution of interaction types, rare types remain challenging; they can yield higher-variance estimates and may prevent the model from learning robust type-specific patterns. Accordingly, we emphasize AUPRC in our per-type analyses, as it is generally more informative than AUROC under severe class imbalance. Future work will benefit from larger and more balanced interaction resources, and could further improve stability on underrepresented classes via targeted strategies such as class-aware reweighting, resampling, or cost-sensitive objectives.</p>
<p>As larger ncRNA catalogs become available through resources such as RNAcentral (<xref ref-type="bibr" rid="B24">Sweeney et al., 2020</xref>), and as experimental protocols expand the coverage of ncRNA&#x02013;ncRNA interactions, the training regime of models like <italic>CUPID</italic> can be scaled accordingly. Future developments may integrate longer receptive fields, explicit cross-attention between molecules, or joint fine-tuning on experimentally resolved interactomes. These extensions could help reveal constraints underlying ncRNA recognition and improve the resolution of regulatory maps in eukaryotic transcriptomes.</p>
<p>In addition, while our study focuses on a resource-efficient paradigm that leverages pre-trained RNA language models with a lightweight interaction-specific prediction head, it would be interesting to complement our analysis with baselines that train a long-context Transformer from scratch. We did not include such a baseline here because, under the current supervision regime (approximately 10<sup>5</sup> interaction pairs after filtering), end-to-end training from random initialization may be difficult to optimize and may not yield generalizable representations. As larger and more diverse labeled interaction resources become available, systematic comparisons between pre-trained and from-scratch Transformer encoders will become increasingly informative.</p>
<p>A similar consideration holds when considering studies substituting RNA-LM models with several Transformer-based nucleotide language models. While these models could, in principle, be considered as alternative backbones for RNA sequence embeddings [e.g., models pre-trained predominantly on DNA such as Nucleotide Transformer, which has been reported to transfer RNA-related signals (<xref ref-type="bibr" rid="B7">Dalla-Torre et al., 2025</xref>)], we selected GenerRNA because it is pre-trained specifically on RNA sequences, provides a long-context representation and it is expected to better capture RNA-class-specific features. We therefore expect RNA-specialized pre-training to yield representations that are more directly tailored to RNA sequence regularities than more generic DNA-pre-trained alternatives, even when the latter can capture some RNA features. In this work, we focused on characterizing the proposed interaction-prediction pipeline using a single RNA-specialized backbone, including ablations on augmentation and pooling. As larger and more diverse interaction resources become available, it will be important to benchmark GenerRNA in a zero-shot setting against more general nucleotide Transformers, and to evaluate both backbones also after task-specific fine-tuning.</p>
<p>In summary, the results show that <italic>CUPID</italic> provides a scalable sequence-based framework for ncRNA&#x02013;ncRNA interaction prediction, achieving AUROC larger than 0.9 for several types on ncRNA interactions. Its performance, robustness to class heterogeneity, and limited dependence on domain-specific priors make it suitable for large-scale <italic>in silico</italic> screening and for guiding targeted experimental profiling of ncRNA regulatory networks.</p></sec>
</body>
<back>
<sec sec-type="data-availability" id="s5">
<title>Data availability statement</title>
<p>Publicly available datasets were analyzed in this study. The data, the CUPID code, and the scripts to reproduce the experiments and tutorials are available from GitHub: <ext-link ext-link-type="uri" xlink:href="https://github.com/AnacletoLAB/ncRNA-CUPID">https://github.com/AnacletoLAB/ncRNA-CUPID</ext-link>.</p>
</sec>
<sec sec-type="author-contributions" id="s6">
<title>Author contributions</title>
<p>MN: Conceptualization, Data curation, Investigation, Methodology, Resources, Software, Validation, Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing. FS: Data curation, Investigation, Software, Validation, Writing &#x02013; review &#x00026; editing. EC: Conceptualization, Formal analysis, Methodology, Project administration, Supervision, Visualization, Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing. GV: Conceptualization, Formal analysis, Funding acquisition, Investigation, Methodology, Supervision, Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing.</p>
</sec>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
<p>The author EC declared that they were an editorial board member of Frontiers, at the time of submission. This had no impact on the peer review process and the final decision.</p>
</sec>
<sec sec-type="ai-statement" id="s8">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p></sec>
<sec sec-type="disclaimer" id="s9">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec><sec sec-type="supplementary-material" id="s10">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/frai.2026.1749205/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/frai.2026.1749205/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Data_Sheet_1.pdf" id="SM1" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/></sec>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Aleksander</surname> <given-names>S. A.</given-names></name> <name><surname>Balhoff</surname> <given-names>J.</given-names></name> <name><surname>Carbon</surname> <given-names>S.</given-names></name> <name><surname>Cherry</surname> <given-names>J. M.</given-names></name> <name><surname>Drabkin</surname> <given-names>H. J.</given-names></name> <name><surname>Ebert</surname> <given-names>D.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>The gene ontology knowledgebase in 2023</article-title>. <source>Genetics</source> <volume>224</volume>:<fpage>iyad031</fpage>. doi: <pub-id pub-id-type="doi">10.1093/genetics/iyad031</pub-id><pub-id pub-id-type="pmid">36866529</pub-id></mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ali</surname> <given-names>S. A.</given-names></name> <name><surname>Peffers</surname> <given-names>M. J.</given-names></name> <name><surname>Ormseth</surname> <given-names>M. J.</given-names></name> <name><surname>Jurisica</surname> <given-names>I.</given-names></name> <name><surname>Kapoor</surname> <given-names>M.</given-names></name></person-group> (<year>2021</year>). <article-title>The non-coding RNA interactome in joint health and disease</article-title>. <source>Nat. Rev. Rheumatol</source>. <volume>17</volume>, <fpage>692</fpage>&#x02013;<lpage>705</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41584-021-00687-y</pub-id><pub-id pub-id-type="pmid">34588660</pub-id></mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Alipanahi</surname> <given-names>B.</given-names></name> <name><surname>Delong</surname> <given-names>A.</given-names></name> <name><surname>Weirauch</surname> <given-names>M. T.</given-names></name> <name><surname>Frey</surname> <given-names>B. J.</given-names></name></person-group> (<year>2015</year>). <article-title>Predicting the sequence specificities of DNA-and RNA-binding proteins by deep learning</article-title>. <source>Nat. Biotechnol</source>. <volume>33</volume>, <fpage>831</fpage>&#x02013;<lpage>838</lpage>. doi: <pub-id pub-id-type="doi">10.1038/nbt.3300</pub-id></mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Cavalleri</surname> <given-names>E.</given-names></name> <name><surname>Cabri</surname> <given-names>A.</given-names></name> <name><surname>Soto-Gomez</surname> <given-names>M.</given-names></name> <name><surname>Bonfitto</surname> <given-names>S.</given-names></name> <name><surname>Perlasca</surname> <given-names>P.</given-names></name> <name><surname>Gliozzo</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>An ontology-based knowledge graph for representing interactions involving RNA molecules</article-title>. <source>Sci. Data</source> <volume>11</volume>:<fpage>906</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41597-024-03673-7</pub-id><pub-id pub-id-type="pmid">39174566</pub-id></mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Cetin</surname> <given-names>S.</given-names></name> <name><surname>Sefer</surname> <given-names>E.</given-names></name></person-group> (<year>2025</year>). <article-title>A graphlet-based explanation generator for graph neural networks over biological datasets</article-title>. <source>Curr. Bioinform</source>. <volume>20</volume>, <fpage>840</fpage>&#x02013;<lpage>851</lpage>. doi: <pub-id pub-id-type="doi">10.2174/0115748936355418250114104026</pub-id></mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>J.</given-names></name> <name><surname>Hu</surname> <given-names>Z.</given-names></name> <name><surname>Sun</surname> <given-names>S.</given-names></name> <name><surname>Tan</surname> <given-names>Q.</given-names></name> <name><surname>Wang</surname> <given-names>Y.</given-names></name> <name><surname>Yu</surname> <given-names>Q.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Interpretable RNA foundation model from unannotated data for highly accurate RNA structure and function predictions</article-title>. <source>arXiv preprint arXiv:2204.00300</source>.</mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Dalla-Torre</surname> <given-names>H.</given-names></name> <name><surname>Gonzalez</surname> <given-names>L.</given-names></name> <name><surname>Mendoza-Revilla</surname> <given-names>J.</given-names></name> <name><surname>Lopez Carranza</surname> <given-names>N.</given-names></name> <name><surname>Grzywaczewski</surname> <given-names>A. H.</given-names></name> <name><surname>Oteri</surname> <given-names>F.</given-names></name> <etal/></person-group>. (<year>2025</year>). <article-title>Nucleotide Transformer: building and evaluating robust foundation models for human genomics</article-title>. <source>Nat. Methods</source> <volume>22</volume>, <fpage>287</fpage>&#x02013;<lpage>297</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41592-024-02523-z</pub-id><pub-id pub-id-type="pmid">39609566</pub-id></mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Engreitz</surname> <given-names>J. M.</given-names></name> <name><surname>Sirokman</surname> <given-names>K.</given-names></name> <name><surname>McDonel</surname> <given-names>P.</given-names></name> <name><surname>Shishkin</surname> <given-names>A. A.</given-names></name> <name><surname>Surka</surname> <given-names>C.</given-names></name> <name><surname>Russell</surname> <given-names>P.</given-names></name> <etal/></person-group>. (<year>2014</year>). <article-title>RNA-RNA interactions enable specific targeting of noncoding RNAs to nascent pre-mRNAs and chromatin sites</article-title>. <source>Cell</source> <volume>159</volume>, <fpage>188</fpage>&#x02013;<lpage>199</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.cell.2014.08.018</pub-id><pub-id pub-id-type="pmid">25259926</pub-id></mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Fabbri</surname> <given-names>M.</given-names></name> <name><surname>Girnita</surname> <given-names>L.</given-names></name> <name><surname>Varani</surname> <given-names>G.</given-names></name> <name><surname>Calin</surname> <given-names>G. A.</given-names></name></person-group> (<year>2019</year>). <article-title>Decrypting noncoding RNA interactions, structures, and functional networks</article-title>. <source>Genome Res</source>. <volume>29</volume>, <fpage>1377</fpage>&#x02013;<lpage>1388</lpage>. doi: <pub-id pub-id-type="doi">10.1101/gr.247239.118</pub-id><pub-id pub-id-type="pmid">31434680</pub-id></mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gargano</surname> <given-names>M. A.</given-names></name> <name><surname>Matentzoglu</surname> <given-names>N.</given-names></name> <name><surname>Coleman</surname> <given-names>B.</given-names></name> <name><surname>Addo-Lartey</surname> <given-names>E. B.</given-names></name> <name><surname>Anagnostopoulos</surname> <given-names>A. V.</given-names></name> <name><surname>Anderton</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>The Human Phenotype Ontology in 2024: phenotypes around the world</article-title>. <source>Nucleic Acids Res</source>. <volume>52</volume>, <fpage>D1333</fpage>&#x02013;<lpage>D1346</lpage>. doi: <pub-id pub-id-type="doi">10.1093/nar/gkad1005</pub-id><pub-id pub-id-type="pmid">37953324</pub-id></mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gliozzo</surname> <given-names>J.</given-names></name> <name><surname>Soto Gomez</surname> <given-names>M. A.</given-names></name> <name><surname>Bonometti</surname> <given-names>A.</given-names></name> <name><surname>Patak</surname> <given-names>A.</given-names></name> <name><surname>Casiraghi</surname> <given-names>E.</given-names></name> <name><surname>Valentini</surname> <given-names>G.</given-names></name></person-group> (<year>2025</year>). <article-title>miss-SNF: a multimodal patient similarity network integration approach to handle completely missing data sources</article-title>. <source>Bioinformatics</source> <volume>41</volume>:<fpage>btaf150</fpage>. doi: <pub-id pub-id-type="doi">10.1101/2025.02.24.639805</pub-id><pub-id pub-id-type="pmid">40184204</pub-id></mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>Z.</given-names></name> <name><surname>Li</surname> <given-names>K.</given-names></name> <name><surname>Lian</surname> <given-names>X.</given-names></name> <name><surname>Li</surname> <given-names>J.</given-names></name></person-group> (<year>2025</year>). <article-title>LncRNA-miRNA interaction prediction based on multi-source heterogeneous graph neural network and multi-level attention mechanism</article-title>. <source>Int. J. Biol. Macromol</source>. <volume>319</volume>:<fpage>145614</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.ijbiomac.2025.145614</pub-id><pub-id pub-id-type="pmid">40582680</pub-id></mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lorenzi</surname> <given-names>L.</given-names></name> <name><surname>Chiu</surname> <given-names>H. S.</given-names></name> <name><surname>Avila Cobos</surname> <given-names>F.</given-names></name> <name><surname>Gross</surname> <given-names>S.</given-names></name> <name><surname>Volders</surname> <given-names>P. J.</given-names></name> <name><surname>Cannoodt</surname> <given-names>R.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>The RNA atlas expands the catalog of human non-coding RNAs</article-title>. <source>Nat. Biotechnol</source>. <volume>39</volume>, <fpage>1453</fpage>&#x02013;<lpage>1465</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41587-021-00936-1</pub-id><pub-id pub-id-type="pmid">34140680</pub-id></mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mann</surname> <given-names>M.</given-names></name> <name><surname>Wright</surname> <given-names>P. R.</given-names></name> <name><surname>Backofen</surname> <given-names>R.</given-names></name></person-group> (<year>2017</year>). <article-title>IntaRNA 2.0: enhanced and customizable prediction of RNA-RNA interactions</article-title>. <source>Nucleic Acids Res</source>. <volume>45</volume>, <fpage>W435</fpage>&#x02013;<lpage>W439</lpage>. doi: <pub-id pub-id-type="doi">10.1093/nar/gkx279</pub-id><pub-id pub-id-type="pmid">28472523</pub-id></mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Matthews</surname> <given-names>B. W.</given-names></name></person-group> (<year>1975</year>). <article-title>Comparison of the predicted and observed secondary structure of t4 phage lysozyme</article-title>. <source>Biochim. Biophys. Acta</source> <volume>405</volume>, <fpage>442</fpage>&#x02013;<lpage>451</lpage>. doi: <pub-id pub-id-type="doi">10.1016/0005-2795(75)90109-9</pub-id><pub-id pub-id-type="pmid">1180967</pub-id></mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Nicolini</surname> <given-names>M.</given-names></name> <name><surname>Saitto</surname> <given-names>E.</given-names></name> <name><surname>Jimenez-Franco</surname> <given-names>R.</given-names></name> <name><surname>Cavalleri</surname> <given-names>E.</given-names></name> <name><surname>Alfonso</surname> <given-names>A. J. G.</given-names></name> <name><surname>Malchiodi</surname> <given-names>D.</given-names></name> <etal/></person-group>. (<year>2025a</year>). <article-title>Fine-tuning of conditional Transformers improves in silico enzyme prediction and generation</article-title>. <source>Comput. Struct. Biotechnol. J</source>. <volume>27</volume>, <fpage>1318</fpage>&#x02013;<lpage>1334</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.csbj.2025.03.037</pub-id><pub-id pub-id-type="pmid">40235640</pub-id></mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Nicolini</surname> <given-names>M.</given-names></name> <name><surname>Stacchietti</surname> <given-names>F.</given-names></name> <name><surname>Cano</surname> <given-names>C.</given-names></name> <name><surname>Casiraghi</surname> <given-names>E.</given-names></name> <name><surname>Valentini</surname> <given-names>G.</given-names></name></person-group> (<year>2025b</year>). <article-title>&#x0201C;A transformer-based model to predict micro RNA interactions,&#x0201D;</article-title> in <source>18th InteRNAtional Work-Conference on Artificial Neural Networks, IWANN 2025</source>. doi: <pub-id pub-id-type="doi">10.1007/978-3-032-02725-2_8</pub-id></mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Radford</surname> <given-names>A.</given-names></name> <name><surname>Wu</surname> <given-names>J.</given-names></name> <name><surname>Child</surname> <given-names>R.</given-names></name> <name><surname>Luan</surname> <given-names>D.</given-names></name> <name><surname>Amodei</surname> <given-names>D.</given-names></name> <name><surname>Sutskever</surname> <given-names>I.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>Language models are unsupervised multitask learners</article-title>. <source>OpenAI Blog</source> <volume>1</volume>:<fpage>9</fpage>.</mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sapoval</surname> <given-names>N.</given-names></name> <name><surname>Aghazadeh</surname> <given-names>A.</given-names></name> <name><surname>Nute</surname> <given-names>M. G.</given-names></name> <name><surname>Antunes</surname> <given-names>D. A.</given-names></name> <name><surname>Balaji</surname> <given-names>A.</given-names></name> <name><surname>Baraniuk</surname> <given-names>R.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Current progress and open challenges for applying deep learning across the biosciences</article-title>. <source>Nat. Commun</source>. <volume>13</volume>:<fpage>1728</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41467-022-29268-7</pub-id><pub-id pub-id-type="pmid">35365602</pub-id></mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sefer</surname> <given-names>E.</given-names></name></person-group> (<year>2025</year>). <article-title>Drgat: predicting drug responses via diffusion-based graph attention network</article-title>. <source>J. Comput. Biol</source>. <volume>32</volume>, <fpage>330</fpage>&#x02013;<lpage>350</lpage>. doi: <pub-id pub-id-type="doi">10.1089/cmb.2024.0807</pub-id><pub-id pub-id-type="pmid">39639802</pub-id></mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Sennrich</surname> <given-names>R.</given-names></name> <name><surname>Haddow</surname> <given-names>B.</given-names></name> <name><surname>Birch</surname> <given-names>A.</given-names></name></person-group> (<year>2016</year>). <article-title>&#x0201C;Neural machine translation of rare words with subword units,&#x0201D;</article-title> in <source>Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics</source> (<publisher-loc>Berlin, Germany</publisher-loc>: <publisher-name>Association for Computational Linguistics</publisher-name>), <fpage>1715</fpage>&#x02013;<lpage>1725</lpage>. doi: <pub-id pub-id-type="doi">10.18653/v1/P16-1162</pub-id></mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Shen</surname> <given-names>T.</given-names></name> <name><surname>Hu</surname> <given-names>Z.</given-names></name> <name><surname>Sun</surname> <given-names>S.</given-names></name> <name><surname>Liu</surname> <given-names>D.</given-names></name> <name><surname>Wong</surname> <given-names>F.</given-names></name> <name><surname>Wang</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>Accurate RNA 3D structure prediction using a language model-based deep learning approach</article-title>. <source>Nat. Methods</source> <volume>21</volume>, <fpage>2287</fpage>&#x02013;<lpage>2298</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s41592-024-02487-0</pub-id><pub-id pub-id-type="pmid">39572716</pub-id></mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sun</surname> <given-names>S.-L.</given-names></name> <name><surname>Jiang</surname> <given-names>Y.-Y.</given-names></name> <name><surname>Yang</surname> <given-names>J.-P.</given-names></name> <name><surname>Xiu</surname> <given-names>Y.-H.</given-names></name> <name><surname>Bilal</surname> <given-names>A.</given-names></name> <name><surname>Long</surname> <given-names>H.-X.</given-names></name></person-group> (<year>2025</year>). <article-title>Predicting noncoding RNA and disease associations using multigraph contrastive learning</article-title>. <source>Sci. Rep</source>. <volume>15</volume>:<fpage>230</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41598-024-81862-5</pub-id><pub-id pub-id-type="pmid">39747154</pub-id></mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sweeney</surname> <given-names>B. A.</given-names></name> <name><surname>Petrov</surname> <given-names>A. I.</given-names></name> <name><surname>Ribas</surname> <given-names>C. E.</given-names></name> <name><surname>Finn</surname> <given-names>R. D.</given-names></name> <name><surname>Bateman</surname> <given-names>A.</given-names></name> <name><surname>Szymanski</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>RNAcentral 2021: secondary structure integration, improved sequence search and new member databases</article-title>. <source>Nucleic Acids Res</source>. <volume>49</volume>, <fpage>D212</fpage>&#x02013;<lpage>D220</lpage>. doi: <pub-id pub-id-type="doi">10.1093/nar/gkaa921</pub-id><pub-id pub-id-type="pmid">33106848</pub-id></mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tian</surname> <given-names>X.</given-names></name> <name><surname>Shen</surname> <given-names>L.</given-names></name> <name><surname>Wang</surname> <given-names>Z.</given-names></name> <name><surname>Zhou</surname> <given-names>L.</given-names></name> <name><surname>Peng</surname> <given-names>L.</given-names></name></person-group> (<year>2021</year>). <article-title>A novel lncRNA-protein interaction prediction method based on deep forest with cascade forest structure</article-title>. <source>Sci. Rep</source>. <volume>11</volume>:<fpage>18881</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41598-021-98277-1</pub-id><pub-id pub-id-type="pmid">34556758</pub-id></mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Torgano</surname> <given-names>F.</given-names></name> <name><surname>Soto Gomez</surname> <given-names>M.</given-names></name> <name><surname>Zignani</surname> <given-names>M.</given-names></name> <name><surname>Gliozzo</surname> <given-names>J.</given-names></name> <name><surname>Cavalleri</surname> <given-names>E.</given-names></name> <name><surname>Mesiti</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2025</year>). <article-title>RNA knowledge-graph analysis through homogeneous embedding methods</article-title>. <source>Bioinform. Adv</source>. <volume>5</volume>:<fpage>vbaf109</fpage>. doi: <pub-id pub-id-type="doi">10.1101/2025.02.17.638592</pub-id><pub-id pub-id-type="pmid">40496493</pub-id></mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Umu</surname> <given-names>S. U.</given-names></name> <name><surname>Gardner</surname> <given-names>P. P.</given-names></name></person-group> (<year>2017</year>). <article-title>A comprehensive benchmark of RNA-RNA interaction prediction tools for all domains of life</article-title>. <source>Bioinformatics</source> <volume>33</volume>, <fpage>988</fpage>&#x02013;<lpage>996</lpage>. doi: <pub-id pub-id-type="doi">10.1093/bioinformatics/btw728</pub-id><pub-id pub-id-type="pmid">27993777</pub-id></mixed-citation>
</ref>
<ref id="B28">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Valentini</surname> <given-names>G.</given-names></name> <name><surname>Malchiodi</surname> <given-names>D.</given-names></name> <name><surname>Gliozzo</surname> <given-names>J.</given-names></name> <name><surname>Mesiti</surname> <given-names>M.</given-names></name> <name><surname>Soto-Gomez</surname> <given-names>M.</given-names></name> <name><surname>Cabri</surname> <given-names>A.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>The promises of large language models for protein design and modeling</article-title>. <source>Front. Bioinform</source>. <volume>3</volume>:<fpage>1304099</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fbinf.2023.1304099</pub-id><pub-id pub-id-type="pmid">38076030</pub-id></mixed-citation>
</ref>
<ref id="B29">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Vasilevsky</surname> <given-names>N. A.</given-names></name> <name><surname>Toro</surname> <given-names>S.</given-names></name> <name><surname>Matentzoglu</surname> <given-names>N.</given-names></name> <name><surname>Flack</surname> <given-names>J. E.</given-names></name> <name><surname>Mullen</surname> <given-names>K. R.</given-names></name> <name><surname>Hegde</surname> <given-names>H.</given-names></name> <etal/></person-group>. (<year>2025</year>). <article-title>Mondo: integrating disease terminology across communities</article-title>. <source>Genetics</source> <volume>2025</volume>:<fpage>iyaf215</fpage>. doi: <pub-id pub-id-type="doi">10.1093/genetics/iyaf215</pub-id><pub-id pub-id-type="pmid">41052288</pub-id></mixed-citation>
</ref>
<ref id="B30">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>W.</given-names></name> <name><surname>Zhang</surname> <given-names>L.</given-names></name> <name><surname>Sun</surname> <given-names>J.</given-names></name> <name><surname>Zhao</surname> <given-names>Q.</given-names></name> <name><surname>Shuai</surname> <given-names>J.</given-names></name></person-group> (<year>2022</year>). <article-title>Predicting the potential human lncRNA&#x02013;miRNA interactions based on graph convolution network with conditional random field</article-title>. <source>Brief. Bioinform</source>. <volume>23</volume>:<fpage>bbac463</fpage>. doi: <pub-id pub-id-type="doi">10.1093/bib/bbac463</pub-id><pub-id pub-id-type="pmid">36305458</pub-id></mixed-citation>
</ref>
<ref id="B31">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wei</surname> <given-names>J.</given-names></name> <name><surname>Chen</surname> <given-names>S.</given-names></name> <name><surname>Zong</surname> <given-names>L.</given-names></name> <name><surname>Gao</surname> <given-names>X.</given-names></name> <name><surname>Li</surname> <given-names>Y.</given-names></name></person-group> (<year>2022</year>). <article-title>Protein&#x02013;RNA interaction prediction with deep learning: structure matters</article-title>. <source>Brief. Bioinform</source>. <volume>23</volume>:<fpage>bbab540</fpage>. doi: <pub-id pub-id-type="doi">10.1093/bib/bbab540</pub-id><pub-id pub-id-type="pmid">34929730</pub-id></mixed-citation>
</ref>
<ref id="B32">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>T.</given-names></name> <name><surname>He</surname> <given-names>Y.</given-names></name> <name><surname>Wang</surname> <given-names>Y.</given-names></name></person-group> (<year>2025</year>). <article-title>Introducing tec-lncmir for prediction of lncRNA-miRNA interactions through deep learning of RNA sequences</article-title>. <source>Brief. Bioinform</source>. <volume>26</volume>:<fpage>bbaf046</fpage>. doi: <pub-id pub-id-type="doi">10.1093/bib/bbaf046</pub-id><pub-id pub-id-type="pmid">39927859</pub-id></mixed-citation>
</ref>
<ref id="B33">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yu</surname> <given-names>H.</given-names></name> <name><surname>Yang</surname> <given-names>H.</given-names></name> <name><surname>Sun</surname> <given-names>W.</given-names></name> <name><surname>Yan</surname> <given-names>Z.</given-names></name> <name><surname>Yang</surname> <given-names>X.</given-names></name> <name><surname>Zhang</surname> <given-names>H.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>An interpretable RNA foundation model for exploring functional RNA motifs in plants</article-title>. <source>Nat. Mach. Intell</source>. <volume>6</volume>, <fpage>1616</fpage>&#x02013;<lpage>1625</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s42256-024-00946-z</pub-id><pub-id pub-id-type="pmid">39703563</pub-id></mixed-citation>
</ref>
<ref id="B34">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yu</surname> <given-names>X.</given-names></name> <name><surname>Jiang</surname> <given-names>L.</given-names></name> <name><surname>Jin</surname> <given-names>S.</given-names></name> <name><surname>Zeng</surname> <given-names>X.</given-names></name> <name><surname>Liu</surname> <given-names>X.</given-names></name></person-group> (<year>2022</year>). <article-title>premLI: a pre-trained method to uncover microRNA&#x02013;lncRNA potential interactions</article-title>. <source>Brief. Bioinform</source>. <volume>23</volume>:<fpage>bbab470</fpage>. doi: <pub-id pub-id-type="doi">10.1093/bib/bbab470</pub-id><pub-id pub-id-type="pmid">34850810</pub-id></mixed-citation>
</ref>
<ref id="B35">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhao</surname> <given-names>Y.</given-names></name> <name><surname>Oono</surname> <given-names>K.</given-names></name> <name><surname>Takizawa</surname> <given-names>H.</given-names></name> <name><surname>Kotera</surname> <given-names>M.</given-names></name></person-group> (<year>2024</year>). <article-title>GenerRNA: a generative pre-trained language model for de novo RNA design</article-title>. <source>PLoS ONE</source> <volume>19</volume>:<fpage>e0310814</fpage>. doi: <pub-id pub-id-type="doi">10.1371/jouRNAl.pone.0310814</pub-id><pub-id pub-id-type="pmid">39352899</pub-id></mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn fn-type="custom" custom-type="edited-by" id="fn0001">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3087642/overview">Marco Masseroli</ext-link>, Polytechnic University of Milan, Italy</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by" id="fn0002">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1525158/overview">Emre Sefer</ext-link>, &#x000D6;zye&#x0011F;in University, T&#x000FC;rkiye</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2317581/overview">Adriano Setti</ext-link>, Sapienza University of Rome, Italy</p>
</fn>
</fn-group>
<fn-group>
<fn id="fn0003"><label>1</label><p>Retrieval of interacting pairs and corresponding sequences was performed using the scripts available from the RNA-KG web site: <ext-link ext-link-type="uri" xlink:href="https://github.com/AnacletoLAB/RNA-KG">https://github.com/AnacletoLAB/RNA-KG</ext-link>.</p></fn>
<fn id="fn0004"><label>2</label><p>In RNAinter, the term &#x0201C;pseudo&#x0201D; specifically denotes RNA sequences transcribed from pseudogenes. In this context, these are transcripts derived from genes that have lost their protein&#x02013;coding capability due to accumulated mutations, yet they are still produced as RNA. Similar to other ncRNAs, such pseudogene RNAs can sometimes participate in regulatory networks by, for example, acting as miRNA decoys or sponges, despite not encoding functional proteins.</p></fn>
</fn-group>
</back>
</article>