<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="review-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Genet.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Genetics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Genet.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">1664-8021</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1667325</article-id>
<article-id pub-id-type="doi">10.3389/fgene.2025.1667325</article-id>
<article-version article-version-type="Corrected Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Review</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Computational models for pan-cancer classification based on multi-omics data</article-title>
<alt-title alt-title-type="left-running-head">Wang et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fgene.2025.1667325">10.3389/fgene.2025.1667325</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Wang</surname>
<given-names>Jianlin</given-names>
</name>
<xref ref-type="aff" rid="aff1"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhang</surname>
<given-names>Jiao</given-names>
</name>
<xref ref-type="aff" rid="aff1"/>
<uri xlink:href="https://loop.frontiersin.org/people/3136504"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Dai</surname>
<given-names>Xuebing</given-names>
</name>
<xref ref-type="aff" rid="aff1"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Yan</surname>
<given-names>Chaokun</given-names>
</name>
<xref ref-type="aff" rid="aff1"/>
<uri xlink:href="https://loop.frontiersin.org/people/787100"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Fang</surname>
<given-names>Caili</given-names>
</name>
<xref ref-type="aff" rid="aff1"/>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
</contrib-group>
<aff id="aff1">
<institution>School of Computer and Information Engineering, Henan University</institution>, <city>Kaifeng</city>, <state>Henan</state>, <country country="CN">China</country>
</aff>
<author-notes>
<corresp id="c001">
<label>&#x2a;</label>Correspondence: Caili Fang, <email xlink:href="fangleheart@henu.edu.cn">fangleheart@henu.edu.cn</email>
</corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2025-10-28">
<day>28</day>
<month>10</month>
<year>2025</year>
</pub-date>
<pub-date publication-format="electronic" date-type="corrected" iso-8601-date="2025-12-01">
<day>01</day>
<month>12</month>
<year>2025</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2025</year>
</pub-date>
<volume>16</volume>
<elocation-id>1667325</elocation-id>
<history>
<date date-type="received">
<day>16</day>
<month>07</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>16</day>
<month>10</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2025 Wang, Zhang, Dai, Yan and Fang.</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Wang, Zhang, Dai, Yan and Fang</copyright-holder>
<license>
<ali:license_ref start_date="2025-10-28">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<p>Tumor heterogeneity presents a significant challenge in cancer treatment, limiting the ability of clinicians to achieve accurate early-stage diagnoses and develop customized therapeutic strategies. Early diagnosis is crucial for effective intervention, yet current methods lack robust solutions to overcome this challenge. The Pan-Cancer Atlas has emerged as a pivotal framework to investigate cancer heterogeneity by integrating multi-omics data (genomics, transcriptomics, proteomics) across tumor types. This initiative systematically maps inter- and intratumor variations, providing insight for clinical decision making. However, such frameworks often struggle to integrate dynamic temporal changes and spatial heterogeneity within tumors, limiting their real-time clinical applicability. In this review, we first summarize the available multi-omics data and public biomedical databases used in pan-cancer research. Then, we examine current pan-cancer classification approaches based on the computational models they employed, including machine learning and deep learning. We also provide a comparison of these classification methods to explore their advantages and limitations. Finally, we conclude by discussing the key challenges in pan-cancer research and suggesting potential directions for future studies.</p>
</abstract>
<kwd-group>
<kwd>pan-cancer classification</kwd>
<kwd>multi-omics data</kwd>
<kwd>deep learning algorithm</kwd>
<kwd>convolutional neural network</kwd>
<kwd>tumor heterogeneity</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declare that financial support was received for the research and/or publication of this article. This study was supported by the Strategic Priority Research Program of Chinese Academy of Sciences (Grant No. XDA0480502).</funding-statement>
</funding-group>
<counts>
<fig-count count="5"/>
<table-count count="4"/>
<equation-count count="0"/>
<ref-count count="91"/>
<page-count count="00"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-in-acceptance</meta-name>
<meta-value>Cancer Genetics and Oncogenomics</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="s1">
<label>1</label>
<title>Background</title>
<p>Cancer, a heterogeneous group of diseases that affect various tissues and organs, constitutes a major global health burden. Despite advances in prevention, detection, and therapeutic interventions, global cancer incidence and mortality rates continue to increase (<xref ref-type="bibr" rid="B65">Santucci et al., 2020</xref>; <xref ref-type="bibr" rid="B13">Bray et al., 2024</xref>). A key limitation of current clinical practices is their reliance on molecularly insensitive tools, which often detect cancer only at intermediate or advanced stages, preventing early diagnosis (<xref ref-type="bibr" rid="B78">Wei et al., 2022</xref>). This delay is critical, as early detection is directly related to patient outcomes. For example, the 5-year survival rate for early-stage prostate cancer is 98%, and early breast cancer has a cure rate exceeding 95% (<xref ref-type="bibr" rid="B66">Siegel et al., 2020</xref>). However, tumor heterogeneity and similarity complicate early and accurate diagnosis, as well as treatment planning. Tumor heterogeneity manifests itself through genomic, transcriptomic, and proteomic differences between tumor cells, driving variations in morphology, proliferation, and metastatic potential (<xref ref-type="bibr" rid="B91">Zheng et al., 2022</xref>). Furthermore, even within the same tumor, cancer cells exhibit phenotypic and morphological heterogeneity during progression (<xref ref-type="bibr" rid="B89">Zhang et al., 2025</xref>). For example, lung cancer cells can differentiate into the subtypes of small cell lung cancer, lung squamous cell carcinoma, and lung adenocarcinoma (<xref ref-type="bibr" rid="B84">Yang and Fan, 2024</xref>). Each type and subtype of cancer has unique characteristics, leading to various clinical treatment approaches, and this heterogeneity poses significant challenges to diagnosis and treatment (<xref ref-type="bibr" rid="B15">Capper et al., 2018</xref>). The similarity of tumors is reflected in the finding that, at a molecular level, tumors in different parts of the body can be more similar than tumors of the same type (<xref ref-type="bibr" rid="B67">Sinha et al., 2021</xref>).</p>
<p>To address these challenges, The Cancer Genome Atlas (TCGA) launched the Pan-Cancer Project in 2012 (<xref ref-type="bibr" rid="B79">Weinstein et al., 2013</xref>), integrating omics data from more than 11,000 tumor samples to identify shared and unique oncogenic drivers. Pan-cancer aims to describe and identify the commonalities and differences between different types of cancer in order to find the key factors that may trigger cancer and thus guide clinical diagnosis, which is important to improve the cure rate of cancer. Many institutions have launched pan-cancer studies and developed public databases that collect data from various cancer-related researches. For example, the UCSC Genome Browser, that developed and maintained by the University of California, Santa Cruz (UCSC), is a comprehensive multi-omics database. Integrates various types of molecular data including copy number variations, methylation profiles, gene and protein expression levels, and mutation records. Furthermore, the platform supports efficient data analysis and visualization through user-friendly tools. The Gene Expression Omnibus (GEO), developed and maintained by the National Center for Biotechnology Information (NCBI), serves as a public repository for gene expression data. This database systematically integrates diverse cancer-related datasets, including high-throughput gene expression profiles and microarray data. Analysis of these pan-cancer datasets enables researchers to identify unique features of individual cancer types and explore shared or distinct molecular patterns across cancers. Such insights support the accurate classification of cancer subtypes and the development of targeted therapies. These research efforts form the foundation for the advancement of precision cancer and remain a central focus in contemporary cancer studies.</p>
<p>Traditional pan-cancer studies relied on cluster analysis, network modeling, and pathway enrichment to identify histological similarities. However, these methods lack the resolution required for early diagnosis. Rapid advancements in sequencing technologies have exponentially increased the scale and complexity of omics data, necessitating advanced computational approaches. Machine learning (ML) and deep learning (DL) methods now offer scalable solutions to analyze these high-dimensional datasets. For example, <xref ref-type="bibr" rid="B44">Li et al. (2017)</xref> achieved 90% precision in classifying 31 tumor types using genetic algorithms (GA) and K closest neighbors (KNN), while <xref ref-type="bibr" rid="B53">Lyu and Haque (2018)</xref> leveraged convolutional neural networks to classify 33 cancers with 95. 59% precision, identification of biomarkers via guided Grad-CAM. Overall, classification studies of pan-cancer datasets are important for improving the cure rate of cancer. <xref ref-type="fig" rid="F1">Figure 1</xref> shows the standardized workflow for pan-cancer classification models utilizing machine learning and deep learning frameworks.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>The workflow of pan-cancer classification model.</p>
</caption>
<graphic xlink:href="fgene-16-1667325-g001.tif">
<alt-text content-type="machine-generated">Flowchart illustrating a pan-cancer classification framework. The process begins with data collection, including genome, transcriptome, and epigenome data. Data preprocessing involves normalization and handling data imbalance. Feature selection uses methods like mutual information and Chi-squared tests. Classification is achieved through machine learning (SVM, KNN, etc.) and deep learning (supervised and unsupervised). Evaluation includes cross-validation and metrics such as accuracy and precision, alongside biological analysis.</alt-text>
</graphic>
</fig>
<p>Initially, researchers must collect and curate data from diverse publicly accessible biomedical databases relevant to the onset and progression of cancer. These data are critical for identifying oncogenic drivers underlying tumorigenesis. With advances in computer technology, a variety of feature dimension reduction and classification algorithms have been developed. These tools are instrumental in constructing models that can accurately discriminate between different cancer types. Once developed, the performance of these methodologies should be assessed against state-of-the-art approaches. This involves comparing them across various metrics and prediction tasks using both standard and supplementary test datasets. Lastly, conducting relevant biological analyses and validations is vital to ensure the reliability and applicability of the findings.</p>
<p>Despite the existence of numerous classification methods for pan-cancer studies, there is a lack of comprehensive literature reviewing the data and methodologies employed. We addresses this gap by providing a thorough analysis of recent pan-cancer classification methods based on diverse models. We begin by exploring the data types commonly used in pancancer research and curating biomedical databases. This process improves our understanding of cancer heterogeneity and similarities and helps to validate research findings. We then examine prevalent classification approaches utilizing machine learning and deep learning models. Finally, we analyze standard datasets and evaluation metrics used in pan-cancer classification and provide a concise comparison of various methods. This comparison aims to assess the strengths and limitations of each approach.</p>
</sec>
<sec id="s2">
<label>2</label>
<title>Data and databases</title>
<sec id="s2-1">
<label>2.1</label>
<title>Available data</title>
<p>With the conclusion of the Human Genome Project and the onset of the post-genomic era, innovative sequencing technologies have emerged (<xref ref-type="bibr" rid="B77">Waterman, 2021</xref>). Currently, gene microarray technology and transcriptome sequencing are the primary methods for acquiring cancer multi-omics data. Gene microarray technology, also called DNA microarray, detects both qualitative and quantitative information of DNA or RNA within a sample (<xref ref-type="bibr" rid="B36">Karakach et al., 2010</xref>). Transcriptome sequencing (RNA-Seq), also known as second-generation sequencing, offers greater accuracy and sensitivity in gene expression detection compared to microarray technology (<xref ref-type="bibr" rid="B73">Wang et al., 2009</xref>). Advancements in sequencing technologies have generated vast multi-omics datasets encompassing genomic, transcriptomic, and proteomic profiles. These multi-omics datasets serve as foundational resources for systematic exploration of oncogenic mechanisms across genomic, transcriptomic, and proteomic dimensions. Subsequently, we provide a detailed description of the multi-omics data closely related to pan-cancer research.</p>
<sec id="s2-1-1">
<label>2.1.1</label>
<title>mRNA expression data</title>
<p>mRNA is a single-stranded RNA molecule that carries genetic information transcribed from DNA. It plays a crucial regulatory role in protein synthesis within the cell (<xref ref-type="bibr" rid="B62">Qin et al., 2022</xref>). mRNA expression data provide insights into gene function and activity. Investigating fluctuations in gene expression levels can elucidate disease development mechanisms. In cancer research, mRNA expression profiling has emerged as an essential element in elucidating cancer progression mechanisms. Studies show that dysregulation of specific genes can result in uncontrolled cell proliferation, a major factor in cancer development (<xref ref-type="bibr" rid="B42">Leibovitch and Topisirovic, 2018</xref>). For example, <xref ref-type="bibr" rid="B44">Li et al. (2017)</xref> used GA with a KNN classifier to classify mRNA data from 9,096 tumor samples of 31 types with 90% precision. Similarly, <xref ref-type="bibr" rid="B39">Kim et al. (2020)</xref> identified key genes that accurately distinguish 21 types of tumors by using ANOVA tests on mRNA data from cancer and normal samples. Therefore, studying mRNA expression data to find oncogenes helps in early cancer diagnosis and more accurate classification, improving treatment.</p>
</sec>
<sec id="s2-1-2">
<label>2.1.2</label>
<title>miRNA expression data</title>
<p>miRNAs are small noncoding RNAs present in plants and animals, typically 20 to 24 nucleotides long. They play a critical role in the regulation of cellular processes (<xref ref-type="bibr" rid="B22">Cui et al., 2025</xref>). miRNA controls oncogenes and tumor suppressor gene expression by degrading mRNAs or inhibiting their translation (<xref ref-type="bibr" rid="B71">Tang et al., 2021</xref>; <xref ref-type="bibr" rid="B30">Galagali, 2020</xref>). For example, in non-small cell lung cancer, high let-7 expression reduced lung cancer cell growth and inhibited differentiation (<xref ref-type="bibr" rid="B60">Pop-Bica et al., 2020</xref>). In gastric cancer, certain miRNAs inhibit the expression of the phosphatase and tensin homolog (PTEN) gene and promote cancer cell growth and invasion (<xref ref-type="bibr" rid="B10">Ashrafizadeh et al., 2020</xref>). <xref ref-type="bibr" rid="B75">Wang et al. (2019)</xref> combined GA with random forest (RF) for pan-cancer classification of miRNA data from 32 tumor types, achieving 92% sensitivity. To more robust and reliable set of miRNA features capable of distinguishing different types of tumor, <xref ref-type="bibr" rid="B51">Lopez-Rincon et al. (2019)</xref>. developed an integrated feature selection algorithm for an accfor ante classification of 28 types otypes of tumorsth reliable miRNA features. Therefore, studying miRNA functions is vital for accurate cancer classification and early diagnosis, significantly impacting treatment and prognosis.</p>
</sec>
<sec id="s2-1-3">
<label>2.1.3</label>
<title>lncRNA expression data</title>
<p>lncRNAs are RNA molecules with transcript sequences of more than 200 nucleotides. Although they do not encode proteins, they regulate biological processes such as gene expression, development, and differentiation (<xref ref-type="bibr" rid="B19">Chen et al., 2021</xref>). Initially considered as genomic noise, lncRNAs are now recognized as important in cancer development. Changes in their expression can serve as diagnostic markers (<xref ref-type="bibr" rid="B58">Nandwani et al., 2021</xref>; <xref ref-type="bibr" rid="B26">Fang and Fullwood, 2016</xref>). Analyzing lncRNA data has identified potential biomarkers and distinguished between tumor types (<xref ref-type="bibr" rid="B2">Al Mamun and Mondal, 2019a</xref>; <xref ref-type="bibr" rid="B3">Al Mamun and Mondal, 2019b</xref>; <xref ref-type="bibr" rid="B4">Al Mamun et al., 2020</xref>). Therefore, understanding the roles of lncRNAs is crucial for early cancer diagnosis and treatment.</p>
</sec>
<sec id="s2-1-4">
<label>2.1.4</label>
<title>Copy number variation (CNV)</title>
<p>CNV refers to the variation in the number of copies of a particular gene present in an individual&#x2019;s genome (<xref ref-type="bibr" rid="B61">P&#xf6;s et al., 2021</xref>). Genes such as BRCA1, CHEK2, ATM, and BRCA2 have strong associations with cancers like breast cancer (<xref ref-type="bibr" rid="B33">Hu et al., 2018</xref>). <xref ref-type="bibr" rid="B87">Zhang et al. (2016)</xref> proposed using a Dagging classifier to categorize CNV data from six cancer types, highlighting key features for accurate classification. Therefore, studying CNV helps explore cancer pathogenesis, aiding early diagnosis and treatment selection.</p>
</sec>
<sec id="s2-1-5">
<label>2.1.5</label>
<title>DNA methylation</title>
<p>DNA methylation, an epigenetic modification, involves adding a methyl group to DNA, usually suppressing gene expression (<xref ref-type="bibr" rid="B49">Liu et al., 2016</xref>). It is crucial for normal cellular functions and implicated in cell differentiation and tumorigenesis. Dysregulated methylation, such as hypermethylation of CpG islands in promoter regions, can silence tumor suppressor genes or reduce oncogenic miRNA transcription, increasing cancer risk (<xref ref-type="bibr" rid="B28">Formosa et al., 2013</xref>). Liu et al. (<xref ref-type="bibr" rid="B50">Liu et al., 2019</xref>) used methylation data from 27 cancers types and proposed machine learning and deep learning strategies for accurate cancer differentiation. Therefore, DNA methylation is closely related to the occurrence and development of cancer, and the analysis and study of methylation is very important in the field of cancer diagnosis.</p>
</sec>
<sec id="s2-1-6">
<label>2.1.6</label>
<title>Multi-omics</title>
<p>The development of cancer is a very complex process that is not simply caused by the occurrence of abnormalities in one type of data, but often involves multiple histological pathological processes. Therefore, data mining analysis based on single omic data has certain one-sidedness and limitations. In recent years, with the rapid development of next-generation genomic technologies, a large amount of genomic data of different types of cancers has been accumulated, and more and more researchers have started to integrate multiple omic data to conduct systematic and complete analysis of the mechanisms of cancer occurrence, and cancer research is developing from single omic to multi-omics. Integrated multi-omics analysis can make up for the lack of information in single-omics data and provide a comprehensive view of patients, and enable researchers to explore the relationship between cancer and genes from multiple perspectives, so as to perform early cancer diagnosis more accurately.</p>
<p>
<xref ref-type="table" rid="T1">Table 1</xref> summarizes the characteristics of common pan-cancer data types, including mRNA, miRNA, and DNA methylation.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Description of common data types of pan-cancer. The dimensions presented are the feature counts derived from the TCGA Pan-Cancer Atlas dataset.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Data type</th>
<th align="left">Data description</th>
<th align="left">Dimension</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">mRNA</td>
<td align="left">The real-time product of gene expression, which controls protein synthesis, and abnormal expression can lead to the development of cancer</td>
<td align="center">20,531</td>
</tr>
<tr>
<td align="center">miRNA</td>
<td align="left">Key molecules in the regulation of transcription and translation of oncogenes and tumor suppressor genes. Aberrant expression regulates tumor cell growth, proliferation and apoptosis</td>
<td align="center">1,882</td>
</tr>
<tr>
<td align="center">lncRNA</td>
<td align="left">An RNA molecule that does not have protein-coding ability and is involved in the development of cancer, and changes in its expression level can be used as a marker for the diagnosis of cancer</td>
<td align="center">19,166</td>
</tr>
<tr>
<td align="center">CNV</td>
<td align="left">Caused by genomic rearrangements, occurring in genes 1-kb or longer in length that are implicated in the development and progression of human cancers</td>
<td align="center">24,174</td>
</tr>
<tr>
<td align="center">DNA Methylation</td>
<td align="left">DNA methylation usually inhibits the expression of genes in cells and plays an important regulatory role, and abnormal expression silences tumor suppressor genes leading to the development of cancer</td>
<td align="center">48,578</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec id="s2-2">
<label>2.2</label>
<title>Biomedical database</title>
<p>With the rapid development of high-throughput sequencing technology, a large amount of tumor-related histological data has been accumulated, and meanwhile, various public medical databases have emerged continuously. These public databases can be classified into comprehensive databases, genomic, transcriptomic, epigenomic databases, etc. according to the research areas or data types. <xref ref-type="table" rid="T2">Table 2</xref> summarizes some cancer-related databases and provides brief descriptions and access links.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Overview of the cancer database.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Database</th>
<th align="left">Brief description</th>
<th align="left">Links</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">TCGA (<xref ref-type="bibr" rid="B79">Weinstein et al., 2013</xref>)</td>
<td align="left">Collected multiple omics data of 33 tumor types, the largest human tumor sequencing database in the world</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://www.cancergenome.nih.gov/">https://www.cancergenome.nih.gov/</ext-link>
</td>
</tr>
<tr>
<td align="left">EGA (L<xref ref-type="bibr" rid="B41">appalainen et al., 2015</xref>)</td>
<td align="left">Collection of over 800 medical studies of all types of sequencing data and typing data</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://ega-archive.org/">https://ega-archive.org/</ext-link>
</td>
</tr>
<tr>
<td align="left">CGHub (<xref ref-type="bibr" rid="B80">Wilks et al., 2014</xref>)</td>
<td align="left">Sequencing data of 25 different types of cancers from TCGA, TARGET, and CCLE were collected and organized</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://cghub.ucsc.edu/">https://cghub.ucsc.edu/</ext-link>
</td>
</tr>
<tr>
<td align="left">ICGC (<xref ref-type="bibr" rid="B21">Consortium et al., 2010</xref>)</td>
<td align="left">Collecting omics data from many different types of cancers, and comprehensively described the genomic changes of many cancer</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://dcc.icgc.org/">https://dcc.icgc.org/</ext-link>
</td>
</tr>
<tr>
<td align="left">COSMIC (<xref ref-type="bibr" rid="B27">Forbes et al., 2015</xref>)</td>
<td align="left">Collecting omics data on many types of cancer, it is the world&#x2019;s largest and most comprehensive database of somatic mutations</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://cancer.sanger.ac.uk/cosmic">https://cancer.sanger.ac.uk/cosmic</ext-link>
</td>
</tr>
<tr>
<td align="left">cBioPortal (<xref ref-type="bibr" rid="B31">Gao et al., 2013</xref>)</td>
<td align="left">Collects genomic data on many different types of cancer, providing visual analysis tools across genes, samples and data types</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="http://www.cbioportal.org/">http://www.cbioportal.org/</ext-link>
</td>
</tr>
<tr>
<td align="left">UCSC Xena (<xref ref-type="bibr" rid="B59">Navarro Gonzalez et al., 2021</xref>)</td>
<td align="left">Collecting data from several large cancer research projects, and provides convenient data analysis and visualization capabilities</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="http://genome.ucsc.edu/">http://genome.ucsc.edu/</ext-link>
</td>
</tr>
<tr>
<td align="left">arrayMap (<xref ref-type="bibr" rid="B14">Cai et al., 2015</xref>)</td>
<td align="left">Provide pre-processed tumor genome microarray data and CNA atlas</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="http://www.arraymap.org/">http://www.arraymap.org/</ext-link>
</td>
</tr>
<tr>
<td align="left">BioMuta (<xref ref-type="bibr" rid="B81">Wu et al., 2014</xref>)</td>
<td align="left">26 different types of cancers were collected SNV-data</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://hive.biochemistry.gwu.edu/home/">https://hive.biochemistry.gwu.edu/home/</ext-link>
</td>
</tr>
<tr>
<td align="left">GEO (<xref ref-type="bibr" rid="B11">Barrett et al., 2012</xref>)</td>
<td align="left">Collection and organization of high-throughput gene expression data submitted by research institutions around the world</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/geo/">https://www.ncbi.nlm.nih.gov/geo/</ext-link>
</td>
</tr>
<tr>
<td align="left">ArrayExpress (<xref ref-type="bibr" rid="B40">Kolesnikov et al., 2015</xref>)</td>
<td align="left">Collected and organized microarray chip-based and high-throughput sequencing of experimental genomics data</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://www.ebi.ac.uk/arrayexpress/">https://www.ebi.ac.uk/arrayexpress/</ext-link>
</td>
</tr>
<tr>
<td align="left">OncomiRDB (<xref ref-type="bibr" rid="B74">Wang et al., 2014</xref>)</td>
<td align="left">Collection and annotation of experimentally validated miRNAs with promotive or inhibitory effects on different cancer types</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="http://www.oncomir.org/">http://www.oncomir.org/</ext-link>
</td>
</tr>
<tr>
<td align="left">miRCancer (<xref ref-type="bibr" rid="B83">Xie et al., 2013</xref>)</td>
<td align="left">A comprehensive collection of miRNA expression profiles in various human cancers</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="http://mircancer.ecu.edu/">http://mircancer.ecu.edu/</ext-link>
</td>
</tr>
<tr>
<td align="left">SomaMiR (<xref ref-type="bibr" rid="B12">Bhattacharya et al., 2013</xref>)</td>
<td align="left">Collecting data on miRNAs and mutations on their targets</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://compbio.uthsc.edu/SomaMiR/">https://compbio.uthsc.edu/SomaMiR/</ext-link>
</td>
</tr>
<tr>
<td align="left">ChiTaRS (<xref ref-type="bibr" rid="B29">Frenkel-Morgenstern et al., 2015</xref>)</td>
<td align="left">Cancer genome sequence breakpoints were collected along with expression level data of the corresponding chimeric transcripts</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://chitars.bioinfo.cnio.es/">https://chitars.bioinfo.cnio.es/</ext-link>
</td>
</tr>
<tr>
<td align="left">MethylCancer (<xref ref-type="bibr" rid="B32">He et al., 2007</xref>)</td>
<td align="left">Collected tumor DNA methylation, cancer-related genes, mutations, CpG islands, and cancer information</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="http://methylcancer.psych.ac.cn/">http://methylcancer.psych.ac.cn/</ext-link>
</td>
</tr>
<tr>
<td align="left">MethHC (<xref ref-type="bibr" rid="B34">Huang et al., 2015</xref>)</td>
<td align="left">Organized DNA methylation, mRNA/miRNA gene expression, miRNA methylation, and association between methylation and gene expression levels from TCGA</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="http://methhc.mbc.nctu.edu.tw/">http://methhc.mbc.nctu.edu.tw/</ext-link>
</td>
</tr>
<tr>
<td align="left">CGC (<xref ref-type="bibr" rid="B69">Subramanian et al., 2021</xref>)</td>
<td align="left">NCI-funded cloud platform co-localizing large datasets, and compute power for secure, collaborative multi-omics analysis</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://www.cancergenomicscloud.org/">https://www.cancergenomicscloud.org/</ext-link>
</td>
</tr>
<tr>
<td align="left">CPTAC (<xref ref-type="bibr" rid="B55">Mesri et al., 2024</xref>)</td>
<td align="left">CPTAC provides a rich source of public data, serving as a critical resource for researchers studying pan-cancer proteomics</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://cptac-data-portal.georgetown.edu/cptac/">https://cptac-data-portal.georgetown.edu/cptac/</ext-link>
</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Next, we provide a detailed description of the most commonly used databases in pan-cancer research.</p>
<sec id="s2-2-1">
<label>2.2.1</label>
<title>TCGA</title>
<p>TCGA is the largest human tumor genome sequencing database globally (<xref ref-type="bibr" rid="B79">Weinstein et al., 2013</xref>). Jointly sponsored by the National Human Genome Research Institute (NHGRI) and the National Cancer Institute (NCI), this major research project was officially launched in 2005. TCGA has sequenced 33 common cancers and over 11,000 tumor samples, using genomic analysis technology to enhance understanding of tumor mechanisms and improve cancer diagnosis and treatment capabilities (<xref ref-type="bibr" rid="B72">Tomczak et al., 2015</xref>). TCGA currently provides mRNA expression data, miRNA expression data, DNA methylation data, CNV data, and other high-throughput sequencing data. Researchers can access these datasets through the Genomic Data Commons (GDC) Data Portal, the primary data source for many cancer researchers.</p>
</sec>
<sec id="s2-2-2">
<label>2.2.2</label>
<title>GEO</title>
<p>GEO is a subdatabase of the National Center for Biotechnology Information (NCBI). This free and publicly accessible repository houses biological data from gene chips, second-generation sequencing, and other high-throughput functional genomics experiments. It includes submissions from over 16,000 laboratories and research teams worldwide, featuring 175,825 datasets with 5,069,606 data samples. GEO supports data download capabilities, enabling users to obtain samples or datasets of interest. Additionally, it offers tools to discover genes of interest and their expression profiles, as well as to identify genes with similar expression patterns.</p>
</sec>
<sec id="s2-2-3">
<label>2.2.3</label>
<title>UCSC Xena</title>
<p>UCSC Xena is a cancer genomics data analysis platform developed by the UCSC Cancer Genome Browser (<xref ref-type="bibr" rid="B59">Navarro Gonzalez et al., 2021</xref>). This platform collects and standardizes data from several major cancer research projects such as TCGA, ICGC, and TARGET, facilitating subsequent analysis (<xref ref-type="bibr" rid="B21">Consortium et al., 2010</xref>). UCSC Xena encompasses multiple levels of data, including copy number, methylation, gene expression, protein expression, and mutation data. It provides user-friendly data analysis and visualization tools. Researchers can easily analyze or download organized data with link clicks and can also upload their data for analysis. This flexibility considerably aids in the advancement of genomic research.</p>
</sec>
<sec id="s2-2-4">
<label>2.2.4</label>
<title>CPTAC</title>
<p>The Clinical Proteomic Tumor Analysis Consortium (CPTAC) is a comprehensive proteomic and genomic research program initiated by the National Cancer Institute (NCI) that aims to accelerate the understanding of cancer biology through the integration of large-scale proteomic and genomic analysis (<xref ref-type="bibr" rid="B55">Mesri et al., 2024</xref>). The consortium systematically identifies, quantifies, and analyzes proteins from cancer biospecimens characterized by genomic data to improve cancer prevention, early diagnosis, treatment, and prognosis. CPTAC provides a rich source of public data, serving as a critical resource for researchers studying pan-cancer proteomics. Its data, which includes protein abundance, post-translational modifications, and mass spectrometry data, is often used in combination with genomic data to provide a multi-layered view of tumors, enabling the discovery of new biomarkers and therapeutic targets.</p>
</sec>
<sec id="s2-2-5">
<label>2.2.5</label>
<title>CGC</title>
<p>The Cancer Genomics Cloud (CGC), an NCI-funded resource powered by Seven Bridges, is a secure and scalable cloud-based platform designed to overcome the challenges associated with accessing, sharing, and analyzing massive, diverse multi-omics datasets (<xref ref-type="bibr" rid="B69">Subramanian et al., 2021</xref>). The platform achieves this by co-localizing three essential components within the cloud: major cancer datasets like The Cancer Genome Atlas (TCGA) and Clinical Proteomic Tumor Analysis Consortium (CPTAC); over 400 bioinformatics tools and best-practice workflows; and the high-performance computational capabilities for large-scale analysis. The CGC simplifies the user experience by enabling researchers to browse, query, and filter datasets, run their entire analysis workflow on the platform, and even integrate their own private tools and data.</p>
<p>Building on the data sources described above, the following section reviews computational methods for pan-cancer classification.</p>
</sec>
</sec>
</sec>
<sec sec-type="methods" id="s3">
<label>3</label>
<title>Methods</title>
<p>Advances in biotechnology have significantly expanded the application of gene sequencing in pan-cancer studies. The proliferation of high-throughput sequencing data offers a critical foundation for research. However, a key challenge lies in developing efficient computational algorithms to extract biologically meaningful insights from these complex datasets. Current methodologies for pan-cancer analysis are broadly categorized into two frameworks: classical machine learning and deep learning. As illustrated in <xref ref-type="fig" rid="F2">Figure 2</xref> deep learning models can be further subdivided into supervised and unsupervised approaches, depending on the utilization of labeled data.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Pan-Cancer classification methods based on various models.</p>
</caption>
<graphic xlink:href="fgene-16-1667325-g002.tif">
<alt-text content-type="machine-generated">Flowchart depicting a Pan-Cancer Classification framework divided into machine learning and deep learning. Machine learning includes KNN, SVM, and Random Forest. Deep learning is split into supervised and unsupervised. Supervised consists of CNN and DNN, while unsupervised includes VAE and Autoencoder.</alt-text>
</graphic>
</fig>
<sec id="s3-1">
<label>3.1</label>
<title>Pan-cancer classification model based on machine learning</title>
<p>Feature selection innovations and model optimization strategies in machine learning have significantly advanced pan-cancer classification accuracy. To balance feature relevance and parsimony, <xref ref-type="bibr" rid="B39">Kim et al. (2020)</xref> implemented a two-stage gene selection strategy: ANOVA-based F-statistic ranking identified top genes across 21 cancers, followed by frequency-based filtering. Neural networks trained on 300 selected genes achieved peak accuracy (90%), outperforming other classifiers. <xref ref-type="bibr" rid="B54">Mahin et al. (2022)</xref> refined this approach by retaining only genes consistently expressed across all 21 cancers and incorporating data smoothing/oversampling, enhancing model robustness. <xref ref-type="bibr" rid="B52">Luo et al. (2023)</xref> developed an ML approach to predict cancer prognosis considering 32 cancer types from TCGA.Initially, the approach was applied to hepatocellular carcinoma and then extended to other types of tumors.</p>
<p>Beyond conventional methods, researchers have explored hybrid and multi-algorithm frameworks. <xref ref-type="bibr" rid="B37">Khadirnaikar et al. (2023)</xref> analyzed mRNA, miRNA, DNA methylation, and protein of 33 different types of cancer from TCGA. Firstly, multi-omics data was combined by concatenating the features for each sample, and then the autoencoder was used to reduce the dimension of data. Novel subtypes of cancer samples were identified by clustering k-means. Further exploring the efficacy of the classifier, <xref ref-type="bibr" rid="B25">Elsadek et al. (2019)</xref> employed a machine learning approach using gene CNV data across six types of tumor. Their approach utilized an information gain algorithm for gene selection and evaluated various classifiers, with LR achieving superior performance, underscoring machine learning&#x2019;s role in cancer classification. <xref ref-type="bibr" rid="B48">Liu (2022)</xref> analyzed the association with a correlation test of epi-driver CpG sites between DNA methylation and gene expression profiles. XGBoost and SHAP algorithms identified the best biomarkers in five genes and used them as features for the generation of a random forest model to identify cancer subtypes. Finally, <xref ref-type="bibr" rid="B18">Cheerla and Gevaert (2017)</xref> and <xref ref-type="bibr" rid="B2">Al Mamun and Mondal (2019a)</xref> both explored two-stage feature selection approaches. Cheerla&#x2019;s team reduced miRNA features using correlation and recursive elimination, achieving the best classification with SVM radial among 21 tumor types. Mamun&#x2019;s approach selected common features for classifiers, finding SVM provided the best accuracy for eight different cancers. Collectively, these innovations underscore machine learning&#x2019;s adaptability in addressing omics complexity while balancing feature parsimony and accuracy.</p>
</sec>
<sec id="s3-2">
<label>3.2</label>
<title>Pan-cancer classification model based on deep learning</title>
<p>Although machine learning methods have been widely used to study pan-cancer classification problems and achieved good results, with the development of deep learning and the high performance shown on classification tasks, more and more researchers have started to use deep learning to improve the performance of tumor classification models. In the field of deep learning, deep learning methods can be classified into two categories based on whether the models use the labels of the data, namely, supervised learning and unsupervised learning (<xref ref-type="bibr" rid="B8">Alzubaidi et al., 2021</xref>).</p>
<sec id="s3-2-1">
<label>3.2.1</label>
<title>Supervised classification models</title>
<p>Recent advancements in supervised deep learning have demonstrated remarkable efficacy in pan-cancer classification through tailored architectural innovations. <xref ref-type="bibr" rid="B70">Sun et al., 2018</xref>) introduced GeneCT, an artificial neural network (ANN) framework designed to classify 11 tumor types using raw mRNA expression data without feature engineering, achieving 98.2% accuracy and underscoring the potential of end-to-end learning in omics analysis. Complementing this approach (<xref ref-type="bibr" rid="B16">Cava et al., 2023</xref>), applied principal component analysis (PCA) to reduce data dimensionality before deploying the model. The neural network achieved a mean accuracy of 84%, the random forest reached 86%, and XGBoost achieved the highest performance with a mean accuracy of 90%. To address the challenges of limited sample sizes in specific cancer types (<xref ref-type="bibr" rid="B20">Cho et al., 2023</xref>), proposed a meta-learning method that integrates multi-omics data (transcriptomics, proteomics, and clinical data from TCGA) to create predictive models using survival information from 17 cancer types. Their approach requires fewer samples than conventional deep learning models, effectively mitigating data scarcity issues. Expanding this paradigm (<xref ref-type="bibr" rid="B24">Divate et al., 2022</xref>), employed deep neural networks (DNNs) to classify 33 cancer types. Their methodology integrated expression-based gene screening with SHAP (Shapley Additive exPlanations) interpretability, identifying critical biomarkers and achieving superior performance in distinguishing cancers from healthy controls.</p>
<p>To address high-dimensional data challenges (<xref ref-type="bibr" rid="B82">Wu et al., 2024</xref>) developed DeepMoIC, a method combining deep graph convolutional networks (GCNs) with autoencoders for cancer subtype classification. By constructing a patient similarity network (PSN) and leveraging GCNs, DeepMoIC outperformed existing models on multi-omics datasets, highlighting its potential for precision oncology. (<xref ref-type="bibr" rid="B47">Li et al., 2025</xref>) introduced DGHNN, a deep graph and hypergraph neural network for pan-cancer related gene prediction that takes biological pathways into consideration. This method applies a deep graph and hypergraph neural network to encode higher-order information in protein interaction networks and biological pathways. This approach, along with the introduction of skip residual connections and a feature tokenizer with a transformer for classification, demonstrates how advanced network architectures can capture the multi-level complexity of biological systems, setting a new standard for performance. (<xref ref-type="bibr" rid="B45">Li et al., 2020</xref>) tackled CNV sparsity by coupling Monte Carlo feature selection (MCFS), which evaluates feature stability via randomized sampling, with self-normalizing neural networks (SNNs) to enhance training robustness. Their framework achieved 79.8% accuracy in classifying four cancer types. These studies collectively highlight how supervised architectures can be customized to diverse omics modalities while balancing performance and biological interpretability.</p>
<p>In recent years, due to the excellent performance of convolutional neural networks (CNNs) on image classification tasks, more and more researchers have started to apply these networks to the classification problem of pan-cancer. For instance (<xref ref-type="bibr" rid="B9">Ameen et al., 2025</xref>) proposed a stacked deep learning ensemble model for multi-omics cancer type classification, demonstrating that deep learning can be effectively applied to high-dimensional biological data. Similarly (<xref ref-type="bibr" rid="B53">Lyu and Haque, 2018</xref>), firstly proposed the use of a convolutional neural network to classify mRNA expression data by embedding high-dimensional gene expression data into a two-dimensional image as the input of the convolutional neural network to classify 33 different types of tumors. Building on this, Mostavi et al. (<xref ref-type="bibr" rid="B57">Mostavi et al., 2020</xref>) systematically compared CNN architectures (e.g., Inception modules, residual connections), revealing that deeper networks achieved 95. 82% precision on 33-class tasks that highlight the impact of structural optimization. Addressing computational inefficiency <xref ref-type="bibr" rid="B38">Khalifa et al., 2020</xref>), applied binary particle swarm optimization (BPSO) to reduce the dimensionality of mRNA from 20,531 to 512 features before CNN training, achieving accuracy of 96. 9% on five types of tumors. Hybrid models also emerged as a promising frontier: (<xref ref-type="bibr" rid="B35">Huynh et al., 2019</xref>) combined deep CNNs (DCNN) with SVM classifiers, where DCNNs extracted high-order features and SVMs performed classification, reaching 76. 33% precision for 25 cancers. (<xref ref-type="bibr" rid="B1">Abdullahi et al., 2020</xref>) further demonstrated the efficiency of fine-tuning pre-trained AlexNet models on mRNA data, reaching 98.1% accuracy for five cancers with minimal computational overhead. Beyond expression data (<xref ref-type="bibr" rid="B86">Ye et al., 2021</xref>) encoded somatic mutation profiles into heatmap-like &#x201c;mutation maps,&#x201d; enabling ResNet-50 and Inception-v3 models to outperform traditional methods (89.7% vs. SVM&#x2019;s 72.3%). Finally (<xref ref-type="bibr" rid="B7">AlShibli and Mathkour, 2019</xref>) validated CNNs&#x2019; versatility in CNV analysis, showing that a six-layer residual network (ResCNN6) surpassed standard CNNs and VGG-16 (86% accuracy for six cancers), underscoring the efficacy of residual connections in combating gradient vanishing. These innovations exemplify CNNs&#x2019; adaptability to multi-omics integration through data transformation, architectural refinement, and cross-domain transfer learning.</p>
</sec>
<sec id="s3-2-2">
<label>3.2.2</label>
<title>Unsupervised classification models</title>
<p>Unsupervised deep learning techniques have emerged as powerful tools for pan-cancer classification, particularly in scenarios with limited labeled data. Rong et al. (<xref ref-type="bibr" rid="B64">Rong et al., 2022</xref>) proposed a computational approach, multi-omics clustering variational autoencoders (Mcluster-VAEs), based on a new probabilistic model of a deep learning method consisting of clustering algorithm for multi-omics data to estimate posterior cancer subtypes. Building on this (<xref ref-type="bibr" rid="B4">Al Mamun et al., 2020</xref>) introduced the Concrete Autoencoder (CAE), an unsupervised framework for identifying discriminative lncRNAs. The CAE outperformed supervised methods (Lasso, RF, SVM-RFE) in classifying 33 tumors, achieving 93% accuracy. To address feature instability across CAE iterations (<xref ref-type="bibr" rid="B5">Al Mamun et al., 2021</xref>) later proposed the multi-run CAE (mrCAE), which aggregated high-frequency lncRNAs from 100 CAE runs to derive a stable subset of 69 markers. This refined set enabled accurate classification of 12 cancers, resolving reproducibility challenges inherent to stochastic deep learning models. Expanding to multi-omics integration (<xref ref-type="bibr" rid="B88">Zhang et al., 2019</xref>) developed OmiVAE, an end-to-end model combining VAEs with a classification network. OmiVAE first compressed the mRNA and DNA methylation data into low-dimensional embeddings, then predicted 33 tumor types using a three-layer neural network, achieving precision of 97. 49%. Finally (<xref ref-type="bibr" rid="B6">Albaradei et al., 2021</xref>) designed MetaCancer, which used convolutional VAE to extract features from mRNA, miRNA and methylation data. When fed into a deep neural network (DNN), this multi-omics integration classified 11 cancers with 88.85% accuracy-surpassing mRNA-only approaches by 14.2%. (<xref ref-type="bibr" rid="B46">Li et al., 2024</xref>) proposed AVBAE-MODFR, a two-phase framework that combines adversarial variational Bayes autoencoder for multi-omics embedding with a dual-net feature ranking module. Tested on TCGA pan-cancer data, AVBAE-MODFR outperformed four state-of-the-art methods, highlighting its robustness in representation learning and biomarker discovery. Compared with earlier VAE-based models such as OmiVAE and MetaCancer, AVBAE-MODFR not only integrates heterogeneous omics but also incorporates an explicit feature ranking mechanism, thereby enhancing interpretability and facilitating the identification of biologically meaningful markers. These innovations underscore unsupervised learning&#x2019;s potential to uncover robust biomarkers and integrate heterogeneous omics data without reliance on labeled datasets.</p>
<p>
<xref ref-type="fig" rid="F3">Figure 3</xref> illustrates the growing prominence of deep learning in pan-cancer research. It shows the percentage of all pan-cancer-related articles that used deep learning methods for classification over the past few years. A systematic review of papers published on the PubMed and Web of Science platforms using search terms &#x201c;pan-cancer classification&#x201d;, &#x201c;deep learning&#x201d; and &#x201c;machine learning&#x201d; from 2018-2024 revealed a steady increase in this ratio from 2018 to 2024. To summarize the current landscape of pan-cancer classification, we present an overview of relevant studies in recent years in <xref ref-type="table" rid="T3">Table 3</xref>. This table highlights the variety of machine learning and deep learning approaches, as well as the multi-omics data they employ.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>The ratio of pan-cancer research using deep learning technologies.A systematic review of the relevant literature shows a steady increase in the use of deep learning in pan-cancer research in recent years.</p>
</caption>
<graphic xlink:href="fgene-16-1667325-g003.tif">
<alt-text content-type="machine-generated">Bar chart showing percentages from 2018 to 2024. In 2018, 50%; 2019, 25%; 2020, 60%; 2021, 62.50%; 2022, 83.33%; 2023, 80%; 2024, 86.50%. Percentages generally increase over time.</alt-text>
</graphic>
</fig>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Overview of pan-cancer classification methods.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">References</th>
<th align="left">Method type(s)</th>
<th align="left">Data type(s)</th>
<th align="left">Data source</th>
<th align="left">Cancer types</th>
<th align="left">Code link</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">
<xref ref-type="bibr" rid="B39">Kim et al. (2020)</xref>
</td>
<td align="left">ML (SVM,KNN)</td>
<td align="left">mRNA</td>
<td align="center">&#x2013;</td>
<td align="center">21</td>
<td align="left">&#x2013;</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B54">Mahin et al. (2022)</xref>
</td>
<td align="left">ML (KNN)</td>
<td align="left">mRNA</td>
<td align="center">TCGA</td>
<td align="center">22</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://github.com/Zwei-inc/panclassif">https://github.com/Zwei-inc/panclassif</ext-link>
</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B52">Luo et al. (2023)</xref>
</td>
<td align="left">ML (SVM)</td>
<td align="left">Gene expression</td>
<td align="center">TCGA</td>
<td align="center">32</td>
<td align="left">&#x2013;</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B37">Khadirmaikar et al. (2023)</xref>
</td>
<td align="left">ML (SVM)</td>
<td align="left">mRNA, miRNA,DNA Methylation</td>
<td align="center">GDC</td>
<td align="center">33</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://github.com/seemark11/Pancancer-subgroup-identification">https://github.com/seemark11/Pancancer-subgroup-identification</ext-link>
</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B18">Cheerla and Gevaert (2017)</xref>
</td>
<td align="left">ML (SVM)</td>
<td align="left">miRNA</td>
<td align="center">&#x2013;</td>
<td align="center">21</td>
<td align="left">&#x2013;</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B2">Al Mamun and Mondal, (2019a)</xref>
</td>
<td align="left">ML</td>
<td align="left">lncRNA</td>
<td align="center">&#x2013;</td>
<td align="center">8</td>
<td align="left">&#x2013;</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B25">Elsadek et al. (2019)</xref>
</td>
<td align="left">ML (SVM,Random forest)</td>
<td align="left">CNV</td>
<td align="center">&#x2013;</td>
<td align="center">6</td>
<td align="left">&#x2013;</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B48">Liu (2022)</xref>
</td>
<td align="left">ML (Random forest)</td>
<td align="left">DNA Methylation, Gene expression</td>
<td align="center">&#x2013;</td>
<td align="center">11</td>
<td align="left">&#x2013;</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B70">Sun et al. (2018)</xref>
</td>
<td align="left">SDL</td>
<td align="left">mRNA</td>
<td align="center">&#x2013;</td>
<td align="center">11</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="http://sunlab.cpy.cuhk.edu.hk/GeneCT/">http://sunlab.cpy.cuhk.edu.hk/GeneCT/</ext-link>
</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B16">Cava et al. (2023)</xref>
</td>
<td align="left">SDL (Neural Network)</td>
<td align="left">Gene expression</td>
<td align="center">&#x2013;</td>
<td align="center">16</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://github.com/claudiacava/Applied-Sciences">https://github.com/claudiacava/Applied-Sciences</ext-link>
</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B20">Cho et al. (2023)</xref>
</td>
<td align="left">SDL (Neural Network)</td>
<td align="left">Gene expression</td>
<td align="center">TCGA</td>
<td align="center">17</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://github.com/berkuva/TCGA-omics-integration">https://github.com/berkuva/TCGA-omics-integration</ext-link>
</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B57">Mostavi et al. (2020)</xref>
</td>
<td align="left">SDL (CNN)</td>
<td align="left">mRNA</td>
<td align="center">&#x2013;</td>
<td align="center">33</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://github.com/chenlabgccri/CancerTypePrediction">https://github.com/chenlabgccri/CancerTypePrediction</ext-link>
</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B38">Khalifa et al. (2020)</xref>
</td>
<td align="left">SDL (CNN)</td>
<td align="left">mRNA</td>
<td align="center">&#x2013;</td>
<td align="center">5</td>
<td align="left">&#x2013;</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B35">Huynh et al. (2019)</xref>
</td>
<td align="left">SDL (DCNN)</td>
<td align="left">mRNA</td>
<td align="center">&#x2013;</td>
<td align="center">25</td>
<td align="left">&#x2013;</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B1">Abdullahi et al. (2020)</xref>
</td>
<td align="left">SDL (CNN)</td>
<td align="left">mRNA</td>
<td align="center">&#x2013;</td>
<td align="center">5</td>
<td align="left">&#x2013;</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B45">Li et al. (2020)</xref>
</td>
<td align="left">SDL (SNN)</td>
<td align="left">CNV</td>
<td align="center">&#x2013;</td>
<td align="center">4</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://github.com/KohTseh/CancerClassification">https://github.com/KohTseh/CancerClassification</ext-link>
</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B64">Rong et al. (2022)</xref>
</td>
<td align="left">DL</td>
<td align="left">miRNA,DNA methylation,CNV</td>
<td align="center">UCSC</td>
<td align="center">32</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://github.com/luyiyun/MCluster-VAEs">https://github.com/luyiyun/MCluster-VAEs</ext-link>
</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B6">Albaradei et al. (2021)</xref>
</td>
<td align="left">UDL (CVAE)</td>
<td align="left">mRNA, miRNA,DNA Methylation</td>
<td align="center">&#x2013;</td>
<td align="center">11</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://github.com/SomayahAlbaradei/MetaCancer">https://github.com/SomayahAlbaradei/MetaCancer</ext-link>
</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B4">Al Mamun et al. (2020)</xref>
</td>
<td align="left">UDL (CAE)</td>
<td align="left">lncRNA</td>
<td align="center">&#x2013;</td>
<td align="center">33</td>
<td align="left">&#x2013;</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B5">Al Mamun et al. (2021)</xref>
</td>
<td align="left">UDL (CAE)</td>
<td align="left">lncRNA</td>
<td align="center">&#x2013;</td>
<td align="center">12</td>
<td align="left">&#x2013;</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B88">Zhang et al. (2019)</xref>
</td>
<td align="left">UDL (VAE)</td>
<td align="left">mRNA,DNA Methylation</td>
<td align="center">UCSC</td>
<td align="center">33</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://github.com/zhangxiaoyu11/OmiVAE">https://github.com/zhangxiaoyu11/OmiVAE</ext-link>
</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B46">Li et al. (2024)</xref>
</td>
<td align="left">UDL (VAE/CVAE)</td>
<td align="left">mRNA, miRNA,DNA Methylation</td>
<td align="center">&#x2013;</td>
<td align="center">33</td>
<td align="left">
<ext-link ext-link-type="uri" xlink:href="https://github.com/zhanglabNKU/AVBAE-MODFR">https://github.com/zhanglabNKU/AVBAE-MODFR</ext-link>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>ML: machine learning; SDL: supervised deep learning; UDL: unsupervised deep learning; CNN: convolutional neural network; SNN: self-normalizing neural network; CVAE: convolutional variational autoencoder; CAE: concrete autoencoder; VAE: variational autoencoder.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
</sec>
<sec id="s3-3">
<label>3.3</label>
<title>Integration strategies</title>
<p>The integration of multi-omics data is a critical step in pan-cancer research, as it provides a more comprehensive view of cancer&#x2019;s molecular mechanisms by combining information from multiple platforms. Integration strategies are typically categorized by the stage at which the data is combined. For instance, an early integration approach, where mRNA and CNV data are simply concatenated, may be easy to implement but can lead to a high-dimensional feature space and potentially introduce noise (<xref ref-type="bibr" rid="B90">Zhao et al., 2024</xref>). In contrast, an intermediate integration approach using a variational autoencoder (VAE) to create a joint latent space can handle the high dimensionality and may reveal more complex, underlying relationships between omics types, but the learned features are often less interpretable.</p>
<p>To better evaluate the performance of these pan-cancer classification models, researchers are developing new benchmarks. These include integrating multi-omics data from large consortia, assessing cross-cohort generalization, and shifting the focus to more specific clinical endpoints beyond simple cancer type classification. For example, integrating genomics from TCGA with proteomics from CPTAC offers a more comprehensive understanding of cancer&#x2019;s molecular mechanisms, as proteins are the functional molecules that execute cellular processes. A related large-scale multi-omics benchmark, CMOB, integrates data from the TCGA platform, providing an accessible and usable resource for machine learning research (<xref ref-type="bibr" rid="B85">Yang et al., 2024</xref>).</p>
<p>Beyond these comprehensive datasets, evaluating a model&#x2019;s generalization ability across different patient cohorts is essential for validating its robustness and reliability in diverse clinical settings. In addition, new benchmarks are moving beyond the simple classification of cancer types to include more refined clinical endpoints such as subtype classification, stage prediction, survival analysis, and prediction of response to treatment. These more granular predictions are crucial for personalized medicine, as they inform specific patient care strategies. Several recent case studies highlight these advances. AVBAE-MODFR is a deep learning framework that integrates multi-omics data through embedding and feature selection, showing potential clinical applications in tumor diagnosis and precision medicine (<xref ref-type="bibr" rid="B46">Li et al., 2024</xref>). TMO-Net is another model that is pre-trained on multi-omics pan-cancer datasets to facilitate cross-omics interactions and enable joint representation learning and inference on incomplete omics data, thereby supporting various downstream oncology tasks (<xref ref-type="bibr" rid="B76">Wang et al., 2024</xref>).</p>
<p>Future research is also expanding to incorporate new data types and modalities that offer a more holistic view of tumor biology. Single-cell multi-omics (e.g., scRNA-seq, scATAC-seq) provides an unprecedented resolution of tumor heterogeneity at the cellular level, capturing differences between individual cells that are lost in bulk omics data. In addition, integration of radiology and pathology images with molecular data is a promising area. This represents a different data modality (unstructured images) that requires specialized models such as CNNs. Combining these visual cues with molecular data can provide a more comprehensive view of the tumor, bridging the gap between molecular mechanisms and the morphological features observed in clinical practice.</p>
</sec>
</sec>
<sec id="s4">
<label>4</label>
<title>Evaluation and discussion</title>
<sec id="s4-1">
<label>4.1</label>
<title>Selection criteria</title>
<p>We systematically reviewed papers published on the Ovid and Web of Science platforms. Our search criteria focused on machine learning and multi-omics data for pan-cancer studies. We only included full-text, English-language papers from peer-reviewed journals that used artificial intelligence to analyze multi-omics data on cancer samples. We excluded any papers that only applied machine learning to a single cancer type or data type, did not use cancer samples, or were themselves reviews or proceedings.</p>
</sec>
<sec id="s4-2">
<label>4.2</label>
<title>Classification evaluation metrics</title>
<p>Classification performance evaluation metrics are essential to objectively assess the effectiveness of classification models. Selecting a high-performing classifier relies on using rigorous evaluation criteria. Accuracy is a common metric for evaluating overall model performance in classification tasks. However, in pan-cancer classification, sample size imbalance is a prevalent issue, where some cancer types have many samples while others have few. In such cases, the majority class can disproportionately influence overall accuracy, diminishing its evaluative significance. For example, a model trained on an imbalanced dataset might achieve a deceptively high accuracy simply by correctly classifying all samples from the majority class, while failing to identify samples from the rarer, minority classes. Thus, relying solely on accuracy is insufficient.</p>
<p>Therefore, it is necessary to consider other metrics that provide a more complete picture of a model&#x2019;s performance on multi-label, imbalanced datasets. We analyze several evaluation metrics as reported in the reviewed literature, including Precision (PR), Recall (RC), F1-score, Area Under the Receiver Operating Characteristic Curve (AUC), and Matthews Correlation Coefficient (MCC). Precision measures the proportion of true positive predictions among all positive predictions, while recall measures the proportion of true positives correctly identified from all actual positives. The F1-score provides a single value that balances both precision and recall, making it particularly useful for evaluating models on imbalanced data. The AUC and MCC are also important for assessing overall performance, with MCC providing a balanced measure that accounts for all four values in a confusion matrix, regardless of class size.</p>
</sec>
<sec id="s4-3">
<label>4.3</label>
<title>Data sets</title>
<p>For pan-cancer classification research, multiple of the following 33 cancer types are commonly used for analysis. The types and sample information of these 33 cancers are shown in <xref ref-type="table" rid="T4">Table 4</xref>.</p>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>Types of cancer and number of samples.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">No.</th>
<th align="left">Cancer name</th>
<th align="left">Code</th>
<th align="right">Cases</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">1</td>
<td align="left">Adeno-cortical carcinoma</td>
<td align="left">ACC</td>
<td align="right">79</td>
</tr>
<tr>
<td align="center">2</td>
<td align="left">Bladder-Urothelial-Carcinoma</td>
<td align="left">BLCA</td>
<td align="right">408</td>
</tr>
<tr>
<td align="center">3</td>
<td align="left">Breast-invasive carcinoma</td>
<td align="left">BRCA</td>
<td align="right">1093</td>
</tr>
<tr>
<td align="center">4</td>
<td align="left">Cervical and endocervical cancers</td>
<td align="left">CESC</td>
<td align="right">304</td>
</tr>
<tr>
<td align="center">5</td>
<td align="left">Cholangiocarcinoma</td>
<td align="left">CHOL</td>
<td align="right">36</td>
</tr>
<tr>
<td align="center">6</td>
<td align="left">Colon-adenocarcinoma</td>
<td align="left">COAD</td>
<td align="right">457</td>
</tr>
<tr>
<td align="center">7</td>
<td align="left">Lymphoid-Neoplasm-Diffuse-Large B-cell-Lymphoma</td>
<td align="left">DLBCL</td>
<td align="right">48</td>
</tr>
<tr>
<td align="center">8</td>
<td align="left">Esophageal carcinoma</td>
<td align="left">ESCA</td>
<td align="right">184</td>
</tr>
<tr>
<td align="center">9</td>
<td align="left">Glioblastoma multiforme</td>
<td align="left">GBM</td>
<td align="right">160</td>
</tr>
<tr>
<td align="center">10</td>
<td align="left">Head and Neck squamous cell carcinoma</td>
<td align="left">HNSC</td>
<td align="right">520</td>
</tr>
<tr>
<td align="center">11</td>
<td align="left">Kidney-Chromophobe</td>
<td align="left">KICH</td>
<td align="right">66</td>
</tr>
<tr>
<td align="center">12</td>
<td align="left">Kidney renal clear cell carcinoma</td>
<td align="left">KIRC</td>
<td align="right">533</td>
</tr>
<tr>
<td align="center">13</td>
<td align="left">Kidney renal papillary cell carcinoma</td>
<td align="left">KIRP</td>
<td align="right">290</td>
</tr>
<tr>
<td align="center">14</td>
<td align="left">Acute-Myeloid Leukemia</td>
<td align="left">LAML</td>
<td align="right">179</td>
</tr>
<tr>
<td align="center">15</td>
<td align="left">Brain Lower-Grade Glioma</td>
<td align="left">LGG</td>
<td align="right">516</td>
</tr>
<tr>
<td align="center">16</td>
<td align="left">Liver-hepatocellular carcinoma</td>
<td align="left">LIHC</td>
<td align="right">371</td>
</tr>
<tr>
<td align="center">17</td>
<td align="left">Lung adenocarcinoma</td>
<td align="left">LUAD</td>
<td align="right">515</td>
</tr>
<tr>
<td align="center">18</td>
<td align="left">Lung squamous cell carcinoma</td>
<td align="left">LUSC</td>
<td align="right">501</td>
</tr>
<tr>
<td align="center">19</td>
<td align="left">Mesothelioma</td>
<td align="left">MESO</td>
<td align="right">87</td>
</tr>
<tr>
<td align="center">20</td>
<td align="left">Ovarian serous cystadenocarcinoma</td>
<td align="left">OV</td>
<td align="right">304</td>
</tr>
<tr>
<td align="center">21</td>
<td align="left">Pancreatic adenocarcinoma</td>
<td align="left">PAAD</td>
<td align="right">178</td>
</tr>
<tr>
<td align="center">22</td>
<td align="left">Pheochromocytoma and Paraganglioma</td>
<td align="left">PCPG</td>
<td align="right">179</td>
</tr>
<tr>
<td align="center">23</td>
<td align="left">Prostate-adenocarcinoma</td>
<td align="left">PRAD</td>
<td align="right">497</td>
</tr>
<tr>
<td align="center">24</td>
<td align="left">Rectum-adenocarcinoma</td>
<td align="left">READ</td>
<td align="right">166</td>
</tr>
<tr>
<td align="center">25</td>
<td align="left">Sarcoma</td>
<td align="left">SARC</td>
<td align="right">259</td>
</tr>
<tr>
<td align="center">26</td>
<td align="left">Skin Cutaneous Melanoma</td>
<td align="left">SKCM</td>
<td align="right">469</td>
</tr>
<tr>
<td align="center">27</td>
<td align="left">Stomach adenocarcinoma</td>
<td align="left">STAD</td>
<td align="right">415</td>
</tr>
<tr>
<td align="center">28</td>
<td align="left">Testicular Germ Cell Tumors</td>
<td align="left">TGCT</td>
<td align="right">150</td>
</tr>
<tr>
<td align="center">29</td>
<td align="left">Thyroid carcinoma</td>
<td align="left">THCA</td>
<td align="right">501</td>
</tr>
<tr>
<td align="center">30</td>
<td align="left">Thymoma</td>
<td align="left">THYM</td>
<td align="right">120</td>
</tr>
<tr>
<td align="center">31</td>
<td align="left">Uterine Corpus Endometrial Carcinoma</td>
<td align="left">UCEC</td>
<td align="right">545</td>
</tr>
<tr>
<td align="center">32</td>
<td align="left">Uterine Carcinosarcoma</td>
<td align="left">UCS</td>
<td align="right">57</td>
</tr>
<tr>
<td align="center">33</td>
<td align="left">Uveal Melanoma</td>
<td align="left">UVM</td>
<td align="right">80</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Next, the analysis performed in terms of datasets employed by the distinct research works is elaborated. <xref ref-type="fig" rid="F4">Figure 4</xref> depicts several datasets utilized for pan-cancer classification. BRCA is the most frequently utilized dataset in pan-cancer classification research. In addition, the most commonly used datasets in pan-cancer classification also include KIRC, LUAD, COAD, KIRP, LIHC, etc.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Frequency of cancer types used in pan-cancer classification studies reviewed in this paper. The x-axis indicates the specific cancer types, while the y-axis shows the number of research papers that utilized each cancer type&#x2019;s dataset. The data presented here is based on a statistical analysis of the literature reviewed in this manuscript.</p>
</caption>
<graphic xlink:href="fgene-16-1667325-g004.tif">
<alt-text content-type="machine-generated">Bar chart showing the number of research papers for various cancer types. KIRC has the highest at twenty-six papers, while LAML has the lowest with ten. Other values range between thirteen and twenty-three papers.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s4-4">
<label>4.4</label>
<title>Comparison and analysis</title>
<p>As reported in the reviewed literature, a performance comparison of various pan-cancer classification methods on the mRNA gene expression dataset for 33 cancer types reveals that deep learning models generally achieve higher classification accuracies than traditional machine learning methods. For instance (<xref ref-type="bibr" rid="B53">Lyu and Haque, 2018</xref>) reported a 95.59% accuracy using a convolutional neural network, a performance that surpasses many of the reported accuracies of traditional machine learning algorithms on similar tasks. This qualitative comparison of architectures suggests that deep learning models are often more capable of distinguishing between 33 different cancer types due to their ability to learn complex, hierarchical features from high-dimensional data.</p>
<p>Next, the classifiers used in different research works are elaborated and analyzed. <xref ref-type="fig" rid="F5">Figure 5</xref> illustrates several common classifiers utilized for pan-cancer classification. This figure was generated by counting the primary classifiers used in the reviewed articles. A classifier was counted if it was the main model for the classification task. The raw counts were then converted to percentages to show the proportion of each classifier type. As shown in the figure, the most frequently used machine learning classifiers in pan-cancer classification studies are SVM, RF, ANN, and KNN, respectively. Meanwhile, among deep learning classifiers, CNNs and fully connected deep neural networks (DNNs, e.g., multilayer perceptrons) were the most frequently used.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>The frequency of different classifiers used in the pan-cancer classification research reviewed in this paper. Here, DNN refers to fully connected architectures (e.g., multilayer perceptrons), excluding convolutional neural networks (CNNs).</p>
</caption>
<graphic xlink:href="fgene-16-1667325-g005.tif">
<alt-text content-type="machine-generated">Bar chart showing the number of research papers by machine learning model: LR and DT have 3 papers each, KNN has 5, SVM has 10, RF has 8, ANN has 7, Bagging has 4, NB has 2, CNN has 9, and DNN has 6.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s4-5">
<label>4.5</label>
<title>Discussion</title>
<p>In our review, we have summarized the diverse ML and DL algorithms applied to pan-cancer multi-omics analysis. In many cases, proposed methods were evaluated against existing algorithms, often showing comparable levels of performance. However, no systematic comparison of different approaches on a common dataset has yet been conducted. Despite the variety of methods, there is still no standardized framework applicable in clinical practice. A major challenge remains the difficulty of generalizing results across studies and ensuring reproducibility. To address this, automatic and standardized methodologies that can be readily applied by non-expert users should be developed to better support clinical decision-making.</p>
<p>The application of ML and DL to multi-omics data also presents significant challenges. As multi-omics data derived from different platforms have varying distributions, this must be carefully considered before data integration (<xref ref-type="bibr" rid="B63">Reel et al., 2021</xref>). Furthermore, the integration of multiple omics datasets can generate noise and introduce redundant information. New algorithms must also be designed to effectively handle missing observations, as samples may be absent in one or more omics datasets (<xref ref-type="bibr" rid="B43">Leng et al., 2022</xref>).</p>
<p>In addition, class imbalance and overfitting are commonly reported issues in biomedical datasets. A training set composed of imbalanced classes can negatively influence the accuracy of a classifier, necessitating the use of statistical techniques such as under- or oversampling (<xref ref-type="bibr" rid="B56">Misra et al., 2019</xref>). Moreover, the high-dimensional nature of multi-omics features can impact a classifier&#x2019;s performance, as correlated features introduce redundant information. To address this, optimal feature selection algorithms should be applied to select a limited, yet representative, subset of features.</p>
</sec>
</sec>
<sec id="s5">
<label>5</label>
<title>Challenges and future work</title>
<p>Current pan-cancer classification methods leverage diverse data types and models to improve cancer type differentiation and inform clinical decision-making. This review systematically summarizes the methodologies, data sets, and evaluation metrics used in pan-cancer research, highlighting the progress in utilizing genomics, transcriptomics, and epigenomics to analyze tumor heterogeneity. We reviewed current pan-cancer classification methods, categorizing them based on the models used and assessing their performance across different data types.</p>
<p>Despite these advancements, challenges persist. Many models heavily depend on labeled data, overlooking the potential of abundant unlabeled data. Pan-cancer studies often focus on molecular features, neglecting clinical correlations with diagnosis and treatment. Additionally, data imbalance and the underrepresentation of some tumor types lead to unstable models.</p>
<p>Moreover, a lack of standardized benchmarks, limited cross-cohort validation, and a need for uncertainty quantification and calibration remain significant obstacles for the field. The absence of standardized and reproducible benchmarks hampers fair comparison across methods. We encourage the community to establish unified benchmark datasets with consistent splitting protocols&#x2014;such as 5-fold stratified cross-validation (CV) standardized in TCGA-33 mRNA data with fixed preprocessing steps (e.g., gene filtering, normalization, and batch-effect correction) to facilitate transparent and reproducible evaluation. In addition, the use of common baseline models (e.g., logistic regression, random forest, standard deep neural networks) alongside more advanced architectures will help future studies assess genuine performance gains. Data imbalance, especially the underrepresentation of rare cancers, further restricts the generalizability of the model, calling for strategies such as data augmentation, few-shot learning, or federated learning to mitigate scarcity.</p>
<p>Future studies should prioritize semi-supervised learning (SSL) frameworks to leverage both annotated and unannotated datasets, thereby addressing data scarcity challenges. Self-supervised pre-training on large-scale unlabeled datasets could uncover tumor heterogeneity and enhance downstream classification tasks. Incorporating multi-modal data fusion&#x2014;combining genomics, proteomics, and normal tissue data&#x2014;could bridge the gap between molecular research and clinical applications.Beyond general cancer classification, future research must pivot toward more granular, clinically actionable predictions. This includes predicting cancer subtypes, disease stage, patient survival rates, and response to specific treatments, which directly informs personalized medicine.</p>
<p>In conclusion, addressing data limitations, imbalance, and clinical integration using advanced techniques such as SSL and multimodal fusion will enable more robust pan-cancer classification models, improving cancer prediction, diagnosis, and treatment for better patient outcomes.</p>
</sec>
<sec id="s6">
<label>6</label>
<title>Clinical translation and ethics</title>
<p>Developing robust pan-cancer models is the first step; translating them into effective clinical tools requires addressing a second set of critical challenges related to translation, generalizability, and ethics. Although a model may perform well on a single curated dataset, its utility in real-world clinical practice depends on its performance in diverse patient populations and healthcare systems.</p>
<p>Currently Available vs. Necessary Validation. Pancancer models are mainly in the research and development stages. Models that can now be used are typically those integrated into established platforms (like the CGC) for secondary research analysis, offering broad tumor type classification or basic survival predictions on standardized datasets (e.g., TCGA, CPTAC). However, most high-performing models require rigorous, multi-center external validation before they can influence patient care. To ensure external validity, models must be evaluated in data from multiple centers, reducing batch effects and acquisition bias that can arise when trained in the data set of a single institution (<xref ref-type="bibr" rid="B17">Cen et al., 2025</xref>). Batch effects, often stemming from variations in sequencing platforms or laboratory protocols across different institutions, can introduce confounding signals that a model may mistakenly learn as biological features. Similarly, acquisition bias can occur if certain rare cancer subtypes or patient demographics are disproportionately represented in the training data from a single center, limiting the model&#x2019;s ability to generalize to a broader patient cohort.</p>
<p>Equally important is equitable performance across diverse demographic groups. The precision of a model must remain consistent regardless of the race, sex, or age of the patient, to ensure fair clinical outcomes and prevent health disparities from being exacerbated (<xref ref-type="bibr" rid="B23">Desai et al., 2022</xref>). These validation efforts must be accompanied by strict attention to data privacy and informed consent, particularly given the reliance of pan-cancer studies on large-scale, sensitive patient data. Concurrently, the increasing complexity of deep learning models highlights a critical need for interpretability, enabling clinicians to understand model predictions and extract meaningful biomarkers that inform clinical decision-making with confidence (<xref ref-type="bibr" rid="B68">Su et al., 2024</xref>). Going beyond simply identifying individual genes, interpretable models can provide pathway-level attribution, linking predictions to entire biological processes (e.g., the p53 signaling pathway), which offers more clinically actionable and biologically meaningful insights.</p>
<p>To be reliable for high-stakes clinical decisions, a model must also provide more than a single prediction. It is crucial for models to offer uncertainty estimation, which allows clinicians to gauge the confidence of the model in its prediction. A well-calibrated model, for example, will have its predicted probability (e.g., a 90% chance of a certain tumor type) accurately reflect its true correctness. Such reliability measures are essential to build trust and ensure the safe deployment of these models in patient care. Furthermore, potential regulatory considerations are paramount; any model intended for diagnostic or prognostic use must undergo rigorous review by regulatory bodies (such as the FDA) to ensure safety, efficacy, and clinical benefit.</p>
<p>In conclusion, the path from a pan-cancer model to a clinical tool is complex. It requires a holistic approach that moves beyond technical performance metrics to embrace the crucial factors of external validation, cost-effectiveness, and ethical responsibility. This comprehensive perspective is essential for developing models that are not only accurate in a research setting but are also robust, trustworthy, and beneficial in real-world clinical applications.</p>
</sec>
</body>
<back>
<sec sec-type="author-contributions" id="s7">
<title>Author contributions</title>
<p>JW: Conceptualization, Supervision, Writing &#x2013; review and editing, Investigation. JZ: Writing &#x2013; review and editing, Validation, Methodology, Writing &#x2013; original draft. XD: Writing &#x2013; original draft, Investigation, Conceptualization, Writing &#x2013; review and editing. CY: Formal Analysis, Methodology, Writing &#x2013; review and editing, Supervision. CF: Funding acquisition, Resources, Writing &#x2013; review and editing.</p>
</sec>
<ack>
<title>Acknowledgements</title>
<p>JW conceived and designed the algorithm and analysis. JW and JZ gathered all the data, designed the study, conducted experiments, and drafted the manuscript. JW, JZ, XD, CF, and CY contributed to results analysis and discussions, and gave the final approval of the version to be published. CF and CY supervised the study and revised the manuscript. We thank LetPub (<ext-link ext-link-type="uri" xlink:href="http://www.letpub.com">www.letpub.com</ext-link>) for its linguistic assistance during the preparation of this manuscript.</p>
</ack>
<sec sec-type="COI-statement" id="s9">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s10">
<title>Correction note</title>
<p>A correction has been made to this article. Details can be found at: <ext-link ext-link-type="uri" xlink:href="http://doi.org/10.3389/fgene.2025.1743847">10.3389/fgene.2025.1743847</ext-link>.</p>
</sec>
<sec sec-type="ai-statement" id="s11">
<title>Generative AI statement</title>
<p>The author(s) declare that no Generative AI was used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="s12">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<fn-group>
<fn fn-type="custom" custom-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/648133/overview">Valentina Silvestri</ext-link>, Sapienza University of Rome, Italy</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/761274/overview">Shixiang Wang</ext-link>, Central South University, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2702826/overview">Asim Waqas</ext-link>, Moffitt Cancer Center, United States</p>
</fn>
</fn-group>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Abdullahi</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Bawazeer</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Alotaibai</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Almoaither</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Al-Otaibi</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Alaskar</surname>
<given-names>H.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). &#x201c;<article-title>Pretrained convolutional neural networks for cancer genome classification</article-title>,&#x201d; in <source>2020 3rd international conference on computer applications and information security (ICCAIS)</source> (<publisher-name>IEEE</publisher-name>), <fpage>1</fpage>&#x2013;<lpage>5</lpage>.</mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Al Mamun</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Mondal</surname>
<given-names>A. M.</given-names>
</name>
</person-group> (<year>2019a</year>). &#x201c;<article-title>Feature selection and classification reveal key lncRNAs for multiple cancers</article-title>,&#x201d; in <source>2019 IEEE international conference on bioinformatics and biomedicine (BIBM)</source> (<publisher-name>IEEE</publisher-name>), <fpage>2825</fpage>&#x2013;<lpage>2831</lpage>.</mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Al Mamun</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Mondal</surname>
<given-names>A. M.</given-names>
</name>
</person-group> (<year>2019b</year>). &#x201c;<article-title>Long non-coding RNA based cancer classification using deep neural networks</article-title>,&#x201d; in <source>Proceedings of the 10th ACM international conference on bioinformatics, computational biology and health informatics</source>, <fpage>541</fpage>.</mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Al Mamun</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Duan</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Mondal</surname>
<given-names>A. M.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Pan-cancer feature selection and classification reveals important long non-coding RNAs</article-title>,&#x201d; in <source>2020 IEEE international conference on bioinformatics and biomedicine (BIBM)</source> (<publisher-name>IEEE</publisher-name>), <fpage>2417</fpage>&#x2013;<lpage>2424</lpage>.</mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Al Mamun</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Tanvir</surname>
<given-names>R. B.</given-names>
</name>
<name>
<surname>Sobhan</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Mathee</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Narasimhan</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Holt</surname>
<given-names>G. E.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Multi-run concrete autoencoder to identify prognostic lncRNAs for 12 cancers</article-title>. <source>Int. J. Mol. Sci.</source> <volume>22</volume>, <fpage>11919</fpage>. <pub-id pub-id-type="doi">10.3390/ijms222111919</pub-id>
<pub-id pub-id-type="pmid">34769351</pub-id>
</mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Albaradei</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Napolitano</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Thafar</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Gojobori</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Essack</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Metacancer: a deep learning-based pan-cancer metastasis prediction model developed using multi-omics data</article-title>. <source>Comput. Struct. Biotechnol. J.</source> <volume>19</volume>, <fpage>4404</fpage>&#x2013;<lpage>4411</lpage>. <pub-id pub-id-type="doi">10.1016/j.csbj.2021.08.006</pub-id>
<pub-id pub-id-type="pmid">34429856</pub-id>
</mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>AlShibli</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Mathkour</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>A shallow convolutional learning network for classification of cancers based on copy number variations</article-title>. <source>Sensors</source> <volume>19</volume>, <fpage>4207</fpage>. <pub-id pub-id-type="doi">10.3390/s19194207</pub-id>
<pub-id pub-id-type="pmid">31569801</pub-id>
</mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Alzubaidi</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Humaidi</surname>
<given-names>A. J.</given-names>
</name>
<name>
<surname>Al-Dujaili</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Duan</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Al-Shamma</surname>
<given-names>O.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Review of deep learning: concepts, CNN architectures, challenges, applications, future directions</article-title>. <source>J. big Data</source> <volume>8</volume>, <fpage>53</fpage>&#x2013;<lpage>74</lpage>. <pub-id pub-id-type="doi">10.1186/s40537-021-00444-8</pub-id>
<pub-id pub-id-type="pmid">33816053</pub-id>
</mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ameen</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Alganmi</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Bajnaid</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Stacked deep learning ensemble for multiomics cancer type classification: development and validation study</article-title>. <source>JMIR Bioinforma. Biotechnol.</source> <volume>6</volume>, <fpage>e70709</fpage>. <pub-id pub-id-type="doi">10.2196/70709</pub-id>
</mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ashrafizadeh</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Najafi</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Ang</surname>
<given-names>H. L.</given-names>
</name>
<name>
<surname>Moghadam</surname>
<given-names>E. R.</given-names>
</name>
<name>
<surname>Mahabady</surname>
<given-names>M. K.</given-names>
</name>
<name>
<surname>Zabolian</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>PTEN, a barrier for proliferation and metastasis of gastric cancer cells: from molecular pathways to targeting and regulation</article-title>. <source>Biomedicines</source> <volume>8</volume>, <fpage>264</fpage>. <pub-id pub-id-type="doi">10.3390/biomedicines8080264</pub-id>
<pub-id pub-id-type="pmid">32756305</pub-id>
</mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Barrett</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Wilhite</surname>
<given-names>S. E.</given-names>
</name>
<name>
<surname>Ledoux</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Evangelista</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>I. F.</given-names>
</name>
<name>
<surname>Tomashevsky</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2012</year>). <article-title>NCBI GEO: archive for functional genomics data Sets&#x2014;Update</article-title>. <source>Nucleic acids Res.</source> <volume>41</volume>, <fpage>D991</fpage>&#x2013;<lpage>D995</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gks1193</pub-id>
<pub-id pub-id-type="pmid">23193258</pub-id>
</mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bhattacharya</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Ziebarth</surname>
<given-names>J. D.</given-names>
</name>
<name>
<surname>Cui</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>SomamiR: a database for somatic mutations impacting microRNA function in cancer</article-title>. <source>Nucleic acids Res.</source> <volume>41</volume>, <fpage>D977</fpage>&#x2013;<lpage>D982</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gks1138</pub-id>
<pub-id pub-id-type="pmid">23180788</pub-id>
</mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bray</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Laversanne</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Sung</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Ferlay</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Siegel</surname>
<given-names>R. L.</given-names>
</name>
<name>
<surname>Soerjomataram</surname>
<given-names>I.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <article-title>Global cancer statistics 2022: globocan estimates of incidence and mortality worldwide for 36 cancers in 185 countries</article-title>. <source>CA a cancer J. Clin.</source> <volume>74</volume>, <fpage>229</fpage>&#x2013;<lpage>263</lpage>. <pub-id pub-id-type="doi">10.3322/caac.21834</pub-id>
<pub-id pub-id-type="pmid">38572751</pub-id>
</mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cai</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Gupta</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Rath</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Ai</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Baudis</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>arrayMap 2014: an updated cancer genome resource</article-title>. <source>Nucleic acids Res.</source> <volume>43</volume>, <fpage>D825</fpage>&#x2013;<lpage>D830</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gku1123</pub-id>
<pub-id pub-id-type="pmid">25428357</pub-id>
</mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Capper</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Jones</surname>
<given-names>D. T.</given-names>
</name>
<name>
<surname>Sill</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Hovestadt</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Schrimpf</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Sturm</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>DNA methylation-based classification of central nervous system tumours</article-title>. <source>Nature</source> <volume>555</volume>, <fpage>469</fpage>&#x2013;<lpage>474</lpage>. <pub-id pub-id-type="doi">10.1038/nature26000</pub-id>
<pub-id pub-id-type="pmid">29539639</pub-id>
</mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cava</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Salvatore</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Castiglioni</surname>
<given-names>I.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Pan-cancer classification of gene expression data based on artificial neural network model</article-title>. <source>Appl. Sci.</source> <volume>13</volume>, <fpage>7355</fpage>. <pub-id pub-id-type="doi">10.3390/app13137355</pub-id>
</mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cen</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Lan</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zou</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Tong</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2025</year>). <article-title>Pan-cancer analysis shapes the understanding of cancer biology and medicine</article-title>. <source>Cancer Commun.</source> <volume>45</volume>, <fpage>728</fpage>&#x2013;<lpage>746</lpage>. <pub-id pub-id-type="doi">10.1002/cac2.70008</pub-id>
<pub-id pub-id-type="pmid">40120098</pub-id>
</mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cheerla</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Gevaert</surname>
<given-names>O.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>MicroRNA based pan-cancer diagnosis and treatment recommendation</article-title>. <source>BMC Bioinforma.</source> <volume>18</volume>, <fpage>32</fpage>&#x2013;<lpage>11</lpage>. <pub-id pub-id-type="doi">10.1186/s12859-016-1421-y</pub-id>
<pub-id pub-id-type="pmid">28086747</pub-id>
</mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Long non-coding RNAs: from disease code to drug role</article-title>. <source>Acta Pharm. Sin. B</source> <volume>11</volume>, <fpage>340</fpage>&#x2013;<lpage>354</lpage>. <pub-id pub-id-type="doi">10.1016/j.apsb.2020.10.001</pub-id>
<pub-id pub-id-type="pmid">33643816</pub-id>
</mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cho</surname>
<given-names>H. J.</given-names>
</name>
<name>
<surname>Shu</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Bekiranov</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Interpretable meta-learning of multi-omics data for survival analysis and pathway enrichment</article-title>. <source>Bioinformatics</source> <volume>39</volume>, <fpage>btad113</fpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btad113</pub-id>
<pub-id pub-id-type="pmid">36864611</pub-id>
</mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Consortium</surname>
<given-names>I. C. G.</given-names>
</name>
<name>
<surname>Hudson</surname>
<given-names>T. J.</given-names>
</name>
<name>
<surname>Anderson</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Artez</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Barker</surname>
<given-names>A. D.</given-names>
</name>
<name>
<surname>Bell</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2010</year>). <article-title>International network of cancer genome projects</article-title>. <source>Nature</source> <volume>464</volume>, <fpage>993</fpage>&#x2013;<lpage>998</lpage>. <pub-id pub-id-type="doi">10.1038/nature08987</pub-id>
<pub-id pub-id-type="pmid">20393554</pub-id>
</mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cui</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>H.-Y.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>Y.-C.-D.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>B.</given-names>
</name>
<etal/>
</person-group> (<year>2025</year>). <article-title>miRTarBase 2025: updates to the collection of experimentally validated microRNA&#x2013;target interactions</article-title>. <source>Nucleic Acids Res.</source> <volume>53</volume>, <fpage>D147</fpage>&#x2013;<lpage>D156</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkae1072</pub-id>
<pub-id pub-id-type="pmid">39578692</pub-id>
</mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Desai</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Hlaing</surname>
<given-names>S. S.</given-names>
</name>
<name>
<surname>Goyal</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Keogh</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Racial disparities in oncology clinical trials</article-title>, <volume>40</volume>, <fpage>356</fpage>, <pub-id pub-id-type="doi">10.1200/jco.2022.40.28_suppl.356</pub-id>
</mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Divate</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Tyagi</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Richard</surname>
<given-names>D. J.</given-names>
</name>
<name>
<surname>Prasad</surname>
<given-names>P. A.</given-names>
</name>
<name>
<surname>Gowda</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Nagaraj</surname>
<given-names>S. H.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Deep learning-based pan-cancer classification model reveals tissue-of-origin specific gene expression signatures</article-title>. <source>Cancers</source> <volume>14</volume>, <fpage>1185</fpage>. <pub-id pub-id-type="doi">10.3390/cancers14051185</pub-id>
<pub-id pub-id-type="pmid">35267493</pub-id>
</mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Elsadek</surname>
<given-names>S. F. A.</given-names>
</name>
<name>
<surname>Makhlouf</surname>
<given-names>M. A. A.</given-names>
</name>
<name>
<surname>Aldeen</surname>
<given-names>M. A.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Supervised classification of cancers based on copy number variation</article-title>,&#x201d; in <source>Proceedings of the International Conference on Advanced Intelligent Systems and Informatics 2018 4</source> (<publisher-name>Springer</publisher-name>), <fpage>198</fpage>&#x2013;<lpage>207</lpage>.</mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Fullwood</surname>
<given-names>M. J.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Roles, functions, and mechanisms of long non-coding RNAs in cancer</article-title>. <source>Genomics, proteomics &#x26; Bioinforma.</source> <volume>14</volume>, <fpage>42</fpage>&#x2013;<lpage>54</lpage>. <pub-id pub-id-type="doi">10.1016/j.gpb.2015.09.006</pub-id>
<pub-id pub-id-type="pmid">26883671</pub-id>
</mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Forbes</surname>
<given-names>S. A.</given-names>
</name>
<name>
<surname>Beare</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Gunasekaran</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Leung</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Bindal</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Boutselakis</surname>
<given-names>H.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>COSMIC: exploring the world&#x2019;s knowledge of somatic mutations in human cancer</article-title>. <source>Nucleic acids Res.</source> <volume>43</volume>, <fpage>D805</fpage>&#x2013;<lpage>D811</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gku1075</pub-id>
<pub-id pub-id-type="pmid">25355519</pub-id>
</mixed-citation>
</ref>
<ref id="B28">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Formosa</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Lena</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Markert</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Cortelli</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Miano</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Mauriello</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2013</year>). <article-title>DNA methylation silences mir-132 in prostate cancer</article-title>. <source>Oncogene</source> <volume>32</volume>, <fpage>127</fpage>&#x2013;<lpage>134</lpage>. <pub-id pub-id-type="doi">10.1038/onc.2012.14</pub-id>
<pub-id pub-id-type="pmid">22310291</pub-id>
</mixed-citation>
</ref>
<ref id="B29">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Frenkel-Morgenstern</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Gorohovski</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Vucenovic</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Maestre</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Valencia</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>ChiTaRS 2.1&#x2014;an improved database of the chimeric transcripts and RNA-seq data with novel sense&#x2013;antisense chimeric RNA transcripts</article-title>. <source>Nucleic acids Res.</source> <volume>43</volume>, <fpage>D68</fpage>&#x2013;<lpage>D75</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gku1199</pub-id>
<pub-id pub-id-type="pmid">25414346</pub-id>
</mixed-citation>
</ref>
<ref id="B30">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Galagali</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>J. K.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>The multifaceted roles of microRNAs in differentiation</article-title>. <source>Curr. Opin. Cell Biol.</source> <volume>67</volume>, <fpage>118</fpage>&#x2013;<lpage>140</lpage>. <pub-id pub-id-type="doi">10.1016/j.ceb.2020.08.015</pub-id>
<pub-id pub-id-type="pmid">33152557</pub-id>
</mixed-citation>
</ref>
<ref id="B31">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gao</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Aksoy</surname>
<given-names>B. A.</given-names>
</name>
<name>
<surname>Dogrusoz</surname>
<given-names>U.</given-names>
</name>
<name>
<surname>Dresdner</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Gross</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Sumer</surname>
<given-names>S. O.</given-names>
</name>
<etal/>
</person-group> (<year>2013</year>). <article-title>Integrative analysis of complex cancer genomics and clinical profiles using the cBioPortal</article-title>. <source>Sci. Signal.</source> <volume>6</volume>, <fpage>pl1</fpage>. <pub-id pub-id-type="doi">10.1126/scisignal.2004088</pub-id>
<pub-id pub-id-type="pmid">23550210</pub-id>
</mixed-citation>
</ref>
<ref id="B32">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>He</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Chang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Xiang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Kusonmano</surname>
<given-names>K.</given-names>
</name>
<etal/>
</person-group> (<year>2007</year>). <article-title>MethyCancer: the database of human DNA methylation and cancer</article-title>. <source>Nucleic acids Res.</source> <volume>36</volume>, <fpage>D836</fpage>&#x2013;<lpage>D841</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkm730</pub-id>
<pub-id pub-id-type="pmid">17890243</pub-id>
</mixed-citation>
</ref>
<ref id="B33">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Yao</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>Clinical significance of germline copy number variation in susceptibility of human diseases</article-title>. <source>J. Genet. Genomics</source> <volume>45</volume>, <fpage>3</fpage>&#x2013;<lpage>12</lpage>. <pub-id pub-id-type="doi">10.1016/j.jgg.2018.01.001</pub-id>
<pub-id pub-id-type="pmid">29396143</pub-id>
</mixed-citation>
</ref>
<ref id="B34">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname>
<given-names>W.-Y.</given-names>
</name>
<name>
<surname>Hsu</surname>
<given-names>S.-D.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>H.-Y.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>Y.-M.</given-names>
</name>
<name>
<surname>Chou</surname>
<given-names>C.-H.</given-names>
</name>
<name>
<surname>Weng</surname>
<given-names>S.-L.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>MethHC: a database of DNA methylation and gene expression in human cancer</article-title>. <source>Nucleic acids Res.</source> <volume>43</volume>, <fpage>D856</fpage>&#x2013;<lpage>D861</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gku1151</pub-id>
<pub-id pub-id-type="pmid">25398901</pub-id>
</mixed-citation>
</ref>
<ref id="B35">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huynh</surname>
<given-names>P.-H.</given-names>
</name>
<name>
<surname>Nguyen</surname>
<given-names>V.-H.</given-names>
</name>
<name>
<surname>Do</surname>
<given-names>T.-N.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Novel hybrid DCNN&#x2013;SVM model for classifying RNA-sequencing gene expression data</article-title>. <source>J. Inf. Telecommun.</source> <volume>3</volume>, <fpage>533</fpage>&#x2013;<lpage>547</lpage>. <pub-id pub-id-type="doi">10.1080/24751839.2019.1660845</pub-id>
</mixed-citation>
</ref>
<ref id="B36">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Karakach</surname>
<given-names>T. K.</given-names>
</name>
<name>
<surname>Flight</surname>
<given-names>R. M.</given-names>
</name>
<name>
<surname>Douglas</surname>
<given-names>S. E.</given-names>
</name>
<name>
<surname>Wentzell</surname>
<given-names>P. D.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>An introduction to DNA microarrays for gene expression analysis</article-title>. <source>Chemom. Intelligent Laboratory Syst.</source> <volume>104</volume>, <fpage>28</fpage>&#x2013;<lpage>52</lpage>. <pub-id pub-id-type="doi">10.1016/j.chemolab.2010.04.003</pub-id>
</mixed-citation>
</ref>
<ref id="B37">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Khadirnaikar</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Shukla</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Prasanna</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Integration of pan-cancer multi-omics data for novel mixed subgroup identification using machine learning methods</article-title>. <source>Plos one</source> <volume>18</volume>, <fpage>e0287176</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0287176</pub-id>
<pub-id pub-id-type="pmid">37856446</pub-id>
</mixed-citation>
</ref>
<ref id="B38">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Khalifa</surname>
<given-names>N. E. M.</given-names>
</name>
<name>
<surname>Taha</surname>
<given-names>M. H. N.</given-names>
</name>
<name>
<surname>Ali</surname>
<given-names>D. E.</given-names>
</name>
<name>
<surname>Slowik</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Hassanien</surname>
<given-names>A. E.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Artificial intelligence technique for gene expression by tumor RNA-Seq data: a novel optimized deep learning approach</article-title>. <source>IEEE Access</source> <volume>8</volume>, <fpage>22874</fpage>&#x2013;<lpage>22883</lpage>. <pub-id pub-id-type="doi">10.1109/access.2020.2970210</pub-id>
</mixed-citation>
</ref>
<ref id="B39">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kim</surname>
<given-names>B.-H.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>P. C.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Cancer classification of single-cell gene expression data by neural network</article-title>. <source>Bioinformatics</source> <volume>36</volume>, <fpage>1360</fpage>&#x2013;<lpage>1366</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btz772</pub-id>
<pub-id pub-id-type="pmid">31603465</pub-id>
</mixed-citation>
</ref>
<ref id="B40">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kolesnikov</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Hastings</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Keays</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Melnichuk</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>Y. A.</given-names>
</name>
<name>
<surname>Williams</surname>
<given-names>E.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>ArrayExpress update&#x2014;simplifying data submissions</article-title>. <source>Nucleic acids Res.</source> <volume>43</volume>, <fpage>D1113</fpage>&#x2013;<lpage>D1116</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gku1057</pub-id>
<pub-id pub-id-type="pmid">25361974</pub-id>
</mixed-citation>
</ref>
<ref id="B41">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lappalainen</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Almeida-King</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Kumanduri</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Senf</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Spalding</surname>
<given-names>J. D.</given-names>
</name>
<name>
<surname>Ur-Rehman</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>The European Genome-phenome Archive of human data consented for biomedical research</article-title>. <source>Nat. Genet.</source> <volume>47</volume>, <fpage>692</fpage>&#x2013;<lpage>695</lpage>. <pub-id pub-id-type="doi">10.1038/ng.3312</pub-id>
<pub-id pub-id-type="pmid">26111507</pub-id>
</mixed-citation>
</ref>
<ref id="B42">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Leibovitch</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Topisirovic</surname>
<given-names>I.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Dysregulation of mRNA translation and energy metabolism in cancer</article-title>. <source>Adv. Biol. Regul.</source> <volume>67</volume>, <fpage>30</fpage>&#x2013;<lpage>39</lpage>. <pub-id pub-id-type="doi">10.1016/j.jbior.2017.11.001</pub-id>
<pub-id pub-id-type="pmid">29150352</pub-id>
</mixed-citation>
</ref>
<ref id="B43">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Leng</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Wen</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>A benchmark study of deep learning-based multi-omics data fusion methods for cancer</article-title>. <source>Genome Biol.</source> <volume>23</volume>, <fpage>171</fpage>. <pub-id pub-id-type="doi">10.1186/s13059-022-02739-2</pub-id>
<pub-id pub-id-type="pmid">35945544</pub-id>
</mixed-citation>
</ref>
<ref id="B44">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Kang</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Krahn</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Croutwater</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Umbach</surname>
<given-names>D. M.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>A comprehensive genomic pan-cancer classification using the Cancer Genome Atlas gene expression data</article-title>. <source>BMC genomics</source> <volume>18</volume>, <fpage>508</fpage>&#x2013;<lpage>513</lpage>. <pub-id pub-id-type="doi">10.1186/s12864-017-3906-0</pub-id>
<pub-id pub-id-type="pmid">28673244</pub-id>
</mixed-citation>
</ref>
<ref id="B45">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Pan-cancer classification based on self-normalizing neural networks and feature selection</article-title>. <source>Front. Bioeng. Biotechnol.</source> <volume>8</volume>, <fpage>766</fpage>. <pub-id pub-id-type="doi">10.3389/fbioe.2020.00766</pub-id>
<pub-id pub-id-type="pmid">32850695</pub-id>
</mixed-citation>
</ref>
<ref id="B46">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Kang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Yin</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>AVBAE-MODFR: a novel deep learning framework of embedding and feature selection on multi-omics data for pan-cancer classification</article-title>. <source>Comput. Biol. Med.</source> <volume>177</volume>, <fpage>108614</fpage>. <pub-id pub-id-type="doi">10.1016/j.compbiomed.2024.108614</pub-id>
<pub-id pub-id-type="pmid">38796884</pub-id>
</mixed-citation>
</ref>
<ref id="B47">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Xiao</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Xiao</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>DGHNN: a deep graph and hypergraph neural network for pan-cancer related gene prediction</article-title>. <source>Bioinformatics</source> <volume>btaf379</volume>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btaf379</pub-id>
<pub-id pub-id-type="pmid">40580449</pub-id>
</mixed-citation>
</ref>
<ref id="B48">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Pan-cancer DNA methylation analysis and tumor origin identification of carcinoma of unknown primary site based on multi-omics</article-title>. <source>Front. Genet.</source> <volume>12</volume>, <fpage>798748</fpage>. <pub-id pub-id-type="doi">10.3389/fgene.2021.798748</pub-id>
<pub-id pub-id-type="pmid">35069697</pub-id>
</mixed-citation>
</ref>
<ref id="B49">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>X. S.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Ji</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Stelzer</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Czauderna</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>Editing DNA methylation in the mammalian genome</article-title>. <source>Cell</source> <volume>167</volume>, <fpage>233</fpage>&#x2013;<lpage>247.e17</lpage>. <pub-id pub-id-type="doi">10.1016/j.cell.2016.08.056</pub-id>
<pub-id pub-id-type="pmid">27662091</pub-id>
</mixed-citation>
</ref>
<ref id="B50">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Pan</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>S. C.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>DNA methylation markers for pan-cancer prediction by deep learning</article-title>. <source>Genes</source> <volume>10</volume>, <fpage>778</fpage>. <pub-id pub-id-type="doi">10.3390/genes10100778</pub-id>
<pub-id pub-id-type="pmid">31590287</pub-id>
</mixed-citation>
</ref>
<ref id="B51">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lopez-Rincon</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Martinez-Archundia</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Martinez-Ruiz</surname>
<given-names>G. U.</given-names>
</name>
<name>
<surname>Schoenhuth</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Tonda</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Automatic discovery of 100-miRNA signature for cancer classification using ensemble feature selection</article-title>. <source>BMC Bioinforma.</source> <volume>20</volume>, <fpage>480</fpage>&#x2013;<lpage>17</lpage>. <pub-id pub-id-type="doi">10.1186/s12859-019-3050-8</pub-id>
<pub-id pub-id-type="pmid">31533612</pub-id>
</mixed-citation>
</ref>
<ref id="B52">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Luo</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Qian</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Lv</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Shao</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>Characterization of the metabolic alteration-modulated tumor microenvironment mediated by TP53 mutation and hypoxia</article-title>. <source>Comput. Biol. Med.</source> <volume>163</volume>, <fpage>107078</fpage>. <pub-id pub-id-type="doi">10.1016/j.compbiomed.2023.107078</pub-id>
<pub-id pub-id-type="pmid">37356294</pub-id>
</mixed-citation>
</ref>
<ref id="B53">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Lyu</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Haque</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Deep learning based tumor type classification using gene expression data</article-title>,&#x201d; in <source>Proceedings of the 2018 ACM international conference on bioinformatics, computational biology, and health informatics</source>, <fpage>89</fpage>&#x2013;<lpage>96</lpage>.</mixed-citation>
</ref>
<ref id="B54">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mahin</surname>
<given-names>K. F.</given-names>
</name>
<name>
<surname>Robiuddin</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Islam</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Ashraf</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Yeasmin</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Shatabda</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>PanClassif: improving pan cancer classification of single cell RNA-seq gene expression data using machine learning</article-title>. <source>Genomics</source> <volume>114</volume>, <fpage>110264</fpage>. <pub-id pub-id-type="doi">10.1016/j.ygeno.2022.01.001</pub-id>
<pub-id pub-id-type="pmid">34998929</pub-id>
</mixed-citation>
</ref>
<ref id="B55">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mesri</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>An</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Bavarva</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Robles</surname>
<given-names>A. I.</given-names>
</name>
<name>
<surname>Hiltke</surname>
<given-names>T.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <article-title>Abstract 1852: nci&#x2019;s Clinical Proteomic Tumor analysis Consortium: a proteogenomic cancer analysis program</article-title>. <source>Cancer Res.</source> <volume>84</volume>, <fpage>1852</fpage>. <pub-id pub-id-type="doi">10.1158/1538-7445.am2024-1852</pub-id>
</mixed-citation>
</ref>
<ref id="B56">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Misra</surname>
<given-names>B. B.</given-names>
</name>
<name>
<surname>Langefeld</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Olivier</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Cox</surname>
<given-names>L. A.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Integrated omics: tools, advances and future approaches</article-title>. <source>J. Mol. Endocrinol.</source> <volume>62</volume>, <fpage>R21</fpage>&#x2013;<lpage>R45</lpage>. <pub-id pub-id-type="doi">10.1530/JME-18-0055</pub-id>
<pub-id pub-id-type="pmid">30006342</pub-id>
</mixed-citation>
</ref>
<ref id="B57">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mostavi</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Chiu</surname>
<given-names>Y.-C.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Convolutional neural network models for cancer type prediction based on gene expression</article-title>. <source>BMC Med. genomics</source> <volume>13</volume>, <fpage>44</fpage>&#x2013;<lpage>13</lpage>. <pub-id pub-id-type="doi">10.1186/s12920-020-0677-2</pub-id>
<pub-id pub-id-type="pmid">32241303</pub-id>
</mixed-citation>
</ref>
<ref id="B58">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nandwani</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Rathore</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Datta</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>LncRNAs in cancer: regulatory and therapeutic implications</article-title>. <source>Cancer Lett.</source> <volume>501</volume>, <fpage>162</fpage>&#x2013;<lpage>171</lpage>. <pub-id pub-id-type="doi">10.1016/j.canlet.2020.11.048</pub-id>
<pub-id pub-id-type="pmid">33359709</pub-id>
</mixed-citation>
</ref>
<ref id="B59">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Navarro Gonzalez</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zweig</surname>
<given-names>A. S.</given-names>
</name>
<name>
<surname>Speir</surname>
<given-names>M. L.</given-names>
</name>
<name>
<surname>Schmelter</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Rosenbloom</surname>
<given-names>K. R.</given-names>
</name>
<name>
<surname>Raney</surname>
<given-names>B. J.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>The UCSC genome browser database: 2021 update</article-title>. <source>Nucleic acids Res.</source> <volume>49</volume>, <fpage>D1046</fpage>&#x2013;<lpage>D1057</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkaa1070</pub-id>
<pub-id pub-id-type="pmid">33221922</pub-id>
</mixed-citation>
</ref>
<ref id="B60">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pop-Bica</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Pintea</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Magdo</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Cojocneanu</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Gulei</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Ferracin</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>The clinical utility of mir-21 and let-7 in non-small cell lung cancer (NSCLC). A systematic review and meta-analysis</article-title>. <source>Front. Oncol.</source> <volume>10</volume>, <fpage>516850</fpage>. <pub-id pub-id-type="doi">10.3389/fonc.2020.516850</pub-id>
<pub-id pub-id-type="pmid">33194579</pub-id>
</mixed-citation>
</ref>
<ref id="B61">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>P&#xf6;s</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Radvanszky</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Bugly&#xf3;</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>P&#xf6;s</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Rusnakova</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Nagy</surname>
<given-names>B.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>DNA copy number variation: main characteristics, evolutionary significance, and pathological aspects</article-title>. <source>Biomed. J.</source> <volume>44</volume>, <fpage>548</fpage>&#x2013;<lpage>559</lpage>. <pub-id pub-id-type="doi">10.1016/j.bj.2021.02.003</pub-id>
<pub-id pub-id-type="pmid">34649833</pub-id>
</mixed-citation>
</ref>
<ref id="B62">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Qin</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>K. K.</given-names>
</name>
<name>
<surname>Fan</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Xiao</surname>
<given-names>W.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>mRNA-based therapeutics: powerful and versatile tools to combat diseases</article-title>. <source>Signal Transduct. Target. Ther.</source> <volume>7</volume>, <fpage>166</fpage>. <pub-id pub-id-type="doi">10.1038/s41392-022-01007-w</pub-id>
<pub-id pub-id-type="pmid">35597779</pub-id>
</mixed-citation>
</ref>
<ref id="B63">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Reel</surname>
<given-names>P. S.</given-names>
</name>
<name>
<surname>Reel</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Pearson</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Trucco</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Jefferson</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Using machine learning approaches for multi-omics data analysis: a review</article-title>. <source>Biotechnol. Adv.</source> <volume>49</volume>, <fpage>107739</fpage>. <pub-id pub-id-type="doi">10.1016/j.biotechadv.2021.107739</pub-id>
<pub-id pub-id-type="pmid">33794304</pub-id>
</mixed-citation>
</ref>
<ref id="B64">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rong</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Cao</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Qiu</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>MCluster-VAEs: an end-to-end variational deep learning-based clustering method for subtype discovery using multi-omics data</article-title>. <source>Comput. Biol. Med.</source> <volume>150</volume>, <fpage>106085</fpage>. <pub-id pub-id-type="doi">10.1016/j.compbiomed.2022.106085</pub-id>
<pub-id pub-id-type="pmid">36162197</pub-id>
</mixed-citation>
</ref>
<ref id="B65">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Santucci</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Carioli</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Bertuccio</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Malvezzi</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Pastorino</surname>
<given-names>U.</given-names>
</name>
<name>
<surname>Boffetta</surname>
<given-names>P.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Progress in cancer mortality, incidence, and survival: a global overview</article-title>. <source>Eur. J. Cancer Prev.</source> <volume>29</volume>, <fpage>367</fpage>&#x2013;<lpage>381</lpage>. <pub-id pub-id-type="doi">10.1097/CEJ.0000000000000594</pub-id>
<pub-id pub-id-type="pmid">32740162</pub-id>
</mixed-citation>
</ref>
<ref id="B66">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Siegel</surname>
<given-names>R. L.</given-names>
</name>
<name>
<surname>Miller</surname>
<given-names>K. D.</given-names>
</name>
<name>
<surname>Goding Sauer</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Fedewa</surname>
<given-names>S. A.</given-names>
</name>
<name>
<surname>Butterly</surname>
<given-names>L. F.</given-names>
</name>
<name>
<surname>Anderson</surname>
<given-names>J. C.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Colorectal cancer statistics, 2020</article-title>. <source>CA a cancer J. Clin.</source> <volume>70</volume>, <fpage>145</fpage>&#x2013;<lpage>164</lpage>. <pub-id pub-id-type="doi">10.3322/caac.21601</pub-id>
<pub-id pub-id-type="pmid">32133645</pub-id>
</mixed-citation>
</ref>
<ref id="B67">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sinha</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Luna</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Schultz</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Sander</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>A pan-cancer survey of cell line tumor similarity by feature-weighted molecular profiles</article-title>. <source>Cell Rep. Methods</source> <volume>1</volume>, <fpage>100039</fpage>. <pub-id pub-id-type="doi">10.1016/j.crmeth.2021.100039</pub-id>
<pub-id pub-id-type="pmid">35475239</pub-id>
</mixed-citation>
</ref>
<ref id="B68">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Su</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Hounye</surname>
<given-names>A. H.</given-names>
</name>
<name>
<surname>Pan</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Miao</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Hou</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <article-title>Explainable cancer factors discovery: shapley additive explanation for machine learning models demonstrates the best practices in the case of pancreatic cancer</article-title>. <source>Pancreatology</source> <volume>24</volume>, <fpage>404</fpage>&#x2013;<lpage>423</lpage>. <pub-id pub-id-type="doi">10.1016/j.pan.2024.02.002</pub-id>
<pub-id pub-id-type="pmid">38342661</pub-id>
</mixed-citation>
</ref>
<ref id="B69">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Subramanian</surname>
<given-names>S. L.</given-names>
</name>
<name>
<surname>Ray</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>DiGiovanna</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Radenkovic</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Tosic</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Mirkovic</surname>
<given-names>N.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Abstract 253: the Cancer genomics cloud: a secure and scalable cloud-based platform to access, share and analyze multi-omics datasets</article-title>. <source>Cancer Res.</source> <volume>81</volume>, <fpage>253</fpage>. <pub-id pub-id-type="doi">10.1158/1538-7445.am2021-253</pub-id>
</mixed-citation>
</ref>
<ref id="B70">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sun</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>GeneCT: a generalizable cancerous status and tissue origin classifier for pan-cancer biopsies</article-title>. <source>Bioinformatics</source> <volume>34</volume>, <fpage>4129</fpage>&#x2013;<lpage>4130</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/bty524</pub-id>
<pub-id pub-id-type="pmid">29947737</pub-id>
</mixed-citation>
</ref>
<ref id="B71">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>MicroRNAs: emerging oncogenic and tumor-suppressive regulators, biomarkers and therapeutic targets in lung cancer</article-title>. <source>Cancer Lett.</source> <volume>502</volume>, <fpage>71</fpage>&#x2013;<lpage>83</lpage>. <pub-id pub-id-type="doi">10.1016/j.canlet.2020.12.040</pub-id>
<pub-id pub-id-type="pmid">33453304</pub-id>
</mixed-citation>
</ref>
<ref id="B72">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tomczak</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Czerwi&#x144;ska</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Wiznerowicz</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Review The Cancer genome atlas (TCGA): an immeasurable source of knowledge</article-title>. <source>Contemp. Oncology/Wsp&#xf3;&#x142;czesna Onkol.</source> <volume>2015</volume>, <fpage>68</fpage>&#x2013;<lpage>77</lpage>. <pub-id pub-id-type="doi">10.5114/wo.2014.47136</pub-id>
</mixed-citation>
</ref>
<ref id="B73">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Gerstein</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Snyder</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>RNA-Seq: a revolutionary tool for transcriptomics</article-title>. <source>Nat. Rev. Genet.</source> <volume>10</volume>, <fpage>57</fpage>&#x2013;<lpage>63</lpage>. <pub-id pub-id-type="doi">10.1038/nrg2484</pub-id>
<pub-id pub-id-type="pmid">19015660</pub-id>
</mixed-citation>
</ref>
<ref id="B74">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Gu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Ding</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>OncomiRDB: a database for the experimentally verified oncogenic and tumor-suppressive microRNAs</article-title>. <source>Bioinformatics</source> <volume>30</volume>, <fpage>2237</fpage>&#x2013;<lpage>2238</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btu155</pub-id>
<pub-id pub-id-type="pmid">24651967</pub-id>
</mixed-citation>
</ref>
<ref id="B75">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Tumor classification and biomarker discovery based on the 5&#x2019;isomir expression level</article-title>. <source>BMC Cancer</source> <volume>19</volume>, <fpage>127</fpage>. <pub-id pub-id-type="doi">10.1186/s12885-019-5340-y</pub-id>
<pub-id pub-id-type="pmid">30732570</pub-id>
</mixed-citation>
</ref>
<ref id="B76">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>F.-a.</given-names>
</name>
<name>
<surname>Zhuang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>L.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <article-title>TMO-Net: an explainable pretrained multi-omics model for multi-task learning in oncology</article-title>. <source>Genome Biol.</source> <volume>25</volume>, <fpage>149</fpage>. <pub-id pub-id-type="doi">10.1186/s13059-024-03293-9</pub-id>
<pub-id pub-id-type="pmid">38845006</pub-id>
</mixed-citation>
</ref>
<ref id="B77">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Waterman</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>The Human genome Project: the beginning of the beginning</article-title>. <source>Quant. Biol.</source> <volume>9</volume>, <fpage>4</fpage>&#x2013;<lpage>7</lpage>. <pub-id pub-id-type="doi">10.15302/j-qb-021-0243</pub-id>
</mixed-citation>
</ref>
<ref id="B78">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wei</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Hou</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>X.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Current status of and barriers to the treatment of advanced-stage liver cancer in China: a questionnaire-based study from the perspective of doctors</article-title>. <source>BMC Gastroenterol.</source> <volume>22</volume>, <fpage>351</fpage>. <pub-id pub-id-type="doi">10.1186/s12876-022-02425-4</pub-id>
<pub-id pub-id-type="pmid">35871649</pub-id>
</mixed-citation>
</ref>
<ref id="B79">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Weinstein</surname>
<given-names>J. N.</given-names>
</name>
<name>
<surname>Collisson</surname>
<given-names>E. A.</given-names>
</name>
<name>
<surname>Mills</surname>
<given-names>G. B.</given-names>
</name>
<name>
<surname>Shaw</surname>
<given-names>K. R.</given-names>
</name>
<name>
<surname>Ozenberger</surname>
<given-names>B. A.</given-names>
</name>
<name>
<surname>Ellrott</surname>
<given-names>K.</given-names>
</name>
<etal/>
</person-group> (<year>2013</year>). <article-title>The cancer Genome Atlas Pan-Cancer analysis project</article-title>. <source>Nat. Genet.</source> <volume>45</volume>, <fpage>1113</fpage>&#x2013;<lpage>1120</lpage>. <pub-id pub-id-type="doi">10.1038/ng.2764</pub-id>
<pub-id pub-id-type="pmid">24071849</pub-id>
</mixed-citation>
</ref>
<ref id="B80">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wilks</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Cline</surname>
<given-names>M. S.</given-names>
</name>
<name>
<surname>Weiler</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Diehkans</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Craft</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Martin</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2014</year>). <article-title>The Cancer Genomics Hub (CGHub): overcoming cancer through the power of torrential data</article-title>. <source>Database</source> <volume>2014</volume>, <fpage>bau093</fpage>. <pub-id pub-id-type="doi">10.1093/database/bau093</pub-id>
<pub-id pub-id-type="pmid">25267794</pub-id>
</mixed-citation>
</ref>
<ref id="B81">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname>
<given-names>T.-J.</given-names>
</name>
<name>
<surname>Shamsaddini</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Pan</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Smith</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Crichton</surname>
<given-names>D. J.</given-names>
</name>
<name>
<surname>Simonyan</surname>
<given-names>V.</given-names>
</name>
<etal/>
</person-group> (<year>2014</year>). <article-title>A framework for organizing cancer-related variations from existing databases, publications and NGS data using a High-performance Integrated Virtual environment (HIVE)</article-title>. <source>Database.</source> <volume>2014</volume>, <fpage>bau022</fpage>. <pub-id pub-id-type="doi">10.1093/database/bau022</pub-id>
<pub-id pub-id-type="pmid">24667251</pub-id>
</mixed-citation>
</ref>
<ref id="B82">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Xiao</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>DeepMoIC: multi-omics data integration <italic>via</italic> deep graph convolutional networks for cancer subtype classification</article-title>. <source>BMC genomics</source> <volume>25</volume>, <fpage>1209</fpage>&#x2013;<lpage>1213</lpage>. <pub-id pub-id-type="doi">10.1186/s12864-024-11112-5</pub-id>
<pub-id pub-id-type="pmid">39695368</pub-id>
</mixed-citation>
</ref>
<ref id="B83">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xie</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Ding</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>miRCancer: a microRNA&#x2013;cancer association database constructed by text mining on literature</article-title>. <source>Bioinformatics</source> <volume>29</volume>, <fpage>638</fpage>&#x2013;<lpage>644</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btt014</pub-id>
<pub-id pub-id-type="pmid">23325619</pub-id>
</mixed-citation>
</ref>
<ref id="B84">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Fan</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Small cell lung cancer transformations from non-small cell lung cancer: biological mechanism and clinical relevance</article-title>. <source>Chin. Med. J. Pulm. Crit. Care Med.</source> <volume>2</volume>, <fpage>42</fpage>&#x2013;<lpage>47</lpage>. <pub-id pub-id-type="doi">10.1016/j.pccm.2023.10.005</pub-id>
<pub-id pub-id-type="pmid">39170959</pub-id>
</mixed-citation>
</ref>
<ref id="B85">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Kotoge</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Piao</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Matsubara</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Sakurai</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Cmob: large-Scale cancer multi-omics benchmark with open datasets, tasks, and baselines</article-title>. <source>arXiv e-prints</source>. <pub-id pub-id-type="doi">10.48550/arXiv.2409.02143</pub-id>
</mixed-citation>
</ref>
<ref id="B86">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ye</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Genomic pan-cancer classification using image-based deep learning</article-title>. <source>Comput. Struct. Biotechnol. J.</source> <volume>19</volume>, <fpage>835</fpage>&#x2013;<lpage>846</lpage>. <pub-id pub-id-type="doi">10.1016/j.csbj.2021.01.010</pub-id>
<pub-id pub-id-type="pmid">33598099</pub-id>
</mixed-citation>
</ref>
<ref id="B87">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Classification of cancers based on copy number variation landscapes</article-title>. <source>Biochimica Biophysica Acta (BBA)-General Subj.</source> <volume>1860</volume>, <fpage>2750</fpage>&#x2013;<lpage>2755</lpage>. <pub-id pub-id-type="doi">10.1016/j.bbagen.2016.06.003</pub-id>
<pub-id pub-id-type="pmid">27266344</pub-id>
</mixed-citation>
</ref>
<ref id="B88">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Dai</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Integrated multi-omics analysis using variational autoencoders: application to pan-cancer classification</article-title>,&#x201d; in <source>2019 IEEE international conference on bioinformatics and biomedicine (BIBM)</source> (<publisher-name>IEEE</publisher-name>), <fpage>765</fpage>&#x2013;<lpage>769</lpage>.</mixed-citation>
</ref>
<ref id="B89">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Xie</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>C. K. W.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Qin</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Tumour heterogeneity and personalized treatment screening based on single-cell transcriptomics</article-title>. <source>Comput. Struct. Biotechnol. J.</source> <volume>27</volume>, <fpage>307</fpage>&#x2013;<lpage>320</lpage>. <pub-id pub-id-type="doi">10.1016/j.csbj.2024.12.020</pub-id>
<pub-id pub-id-type="pmid">39877290</pub-id>
</mixed-citation>
</ref>
<ref id="B90">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Peng</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <article-title>A review of cancer data fusion methods based on deep learning</article-title>. <source>Inf. Fusion</source> <volume>108</volume>, <fpage>102361</fpage>. <pub-id pub-id-type="doi">10.1016/j.inffus.2024.102361</pub-id>
</mixed-citation>
</ref>
<ref id="B91">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zheng</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zeng</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>R.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Cancer incidence and mortality in China, 2016</article-title>. <source>J. Natl. cancer Cent.</source> <volume>2</volume>, <fpage>1</fpage>&#x2013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1016/j.jncc.2022.02.002</pub-id>
<pub-id pub-id-type="pmid">39035212</pub-id>
</mixed-citation>
</ref>
</ref-list>
</back>
</article>
