<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="review-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Artif. Intell.</journal-id>
<journal-title>Frontiers in Artificial Intelligence</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Artif. Intell.</abbrev-journal-title>
<issn pub-type="epub">2624-8212</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/frai.2024.1408843</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Artificial Intelligence</subject>
<subj-group>
<subject>Review</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Multimodal data integration for oncology in the era of deep neural networks: a review</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name><surname>Waqas</surname> <given-names>Asim</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<xref ref-type="author-notes" rid="fn002"><sup>&#x02020;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2702826/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Tripathi</surname> <given-names>Aakash</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="author-notes" rid="fn002"><sup>&#x02020;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2700575/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Ramachandran</surname> <given-names>Ravi P.</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2793967/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Stewart</surname> <given-names>Paul A.</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/886673/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Rasool</surname> <given-names>Ghulam</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/113420/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Department of Machine Learning, Moffitt Cancer Center</institution>, <addr-line>Tampa, FL</addr-line>, <country>United States</country></aff>
<aff id="aff2"><sup>2</sup><institution>Department of Cancer Epidemiology, Moffitt Cancer Center</institution>, <addr-line>Tampa, FL</addr-line>, <country>United States</country></aff>
<aff id="aff3"><sup>3</sup><institution>Department of Electrical and Computer Engineering, Rowan University</institution>, <addr-line>Glassboro, NJ</addr-line>, <country>United States</country></aff>
<aff id="aff4"><sup>4</sup><institution>Department of Biostatistics and Bioinformatics, Moffitt Cancer Center</institution>, <addr-line>Tampa, FL</addr-line>, <country>United States</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Francesco Napolitano, University of Sannio, Italy</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Sunyoung Jang, The Pennsylvania State University, United States</p>
<p>Muhammad Shahid Iqbal, Anhui University, China</p></fn>
<corresp id="c001">&#x0002A;Correspondence: Asim Waqas <email>asim.waqas&#x00040;moffitt.org</email></corresp>
<fn fn-type="equal" id="fn002"><p>&#x02020;These authors have contributed equally to this work and share first authorship</p></fn></author-notes>
<pub-date pub-type="epub">
<day>25</day>
<month>07</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>7</volume>
<elocation-id>1408843</elocation-id>
<history>
<date date-type="received">
<day>28</day>
<month>03</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>09</day>
<month>07</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2024 Waqas, Tripathi, Ramachandran, Stewart and Rasool.</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Waqas, Tripathi, Ramachandran, Stewart and Rasool</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract>
<p>Cancer research encompasses data across various scales, modalities, and resolutions, from screening and diagnostic imaging to digitized histopathology slides to various types of molecular data and clinical records. The integration of these diverse data types for personalized cancer care and predictive modeling holds the promise of enhancing the accuracy and reliability of cancer screening, diagnosis, and treatment. Traditional analytical methods, which often focus on isolated or unimodal information, fall short of capturing the complex and heterogeneous nature of cancer data. The advent of deep neural networks has spurred the development of sophisticated multimodal data fusion techniques capable of extracting and synthesizing information from disparate sources. Among these, Graph Neural Networks (GNNs) and Transformers have emerged as powerful tools for multimodal learning, demonstrating significant success. This review presents the foundational principles of multimodal learning including oncology data modalities, taxonomy of multimodal learning, and fusion strategies. We delve into the recent advancements in GNNs and Transformers for the fusion of multimodal data in oncology, spotlighting key studies and their pivotal findings. We discuss the unique challenges of multimodal learning, such as data heterogeneity and integration complexities, alongside the opportunities it presents for a more nuanced and comprehensive understanding of cancer. Finally, we present some of the latest comprehensive multimodal pan-cancer data sources. By surveying the landscape of multimodal data integration in oncology, our goal is to underline the transformative potential of multimodal GNNs and Transformers. Through technological advancements and the methodological innovations presented in this review, we aim to chart a course for future research in this promising field. This review may be the first that highlights the current state of multimodal modeling applications in cancer using GNNs and transformers, presents comprehensive multimodal oncology data sources, and sets the stage for multimodal evolution, encouraging further exploration and development in personalized cancer care.</p></abstract>
<kwd-group>
<kwd>multimodal</kwd>
<kwd>graph neural networks</kwd>
<kwd>transformers</kwd>
<kwd>oncology</kwd>
<kwd>deep learning</kwd>
<kwd>cancer</kwd>
<kwd>multi-omics</kwd>
<kwd>machine learning</kwd>
</kwd-group>
<counts>
<fig-count count="9"/>
<table-count count="5"/>
<equation-count count="3"/>
<ref-count count="205"/>
<page-count count="24"/>
<word-count count="18179"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Medicine and Public Health</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<title>1 Introduction</title>
<p>Cancer represents a significant global health challenge, characterized by the uncontrolled growth of abnormal cells, leading to millions of deaths annually. In 2023, the United States had around 1.9 million new cancer diagnoses, with cancer being the second leading cause of death and anticipated to result in approximately 1670 deaths daily (Siegel et al., <xref ref-type="bibr" rid="B138">2023</xref>). However, advancements in oncology research hold the promise of preventing nearly 42% of these cases through early detection and lifestyle modifications. The complexity of cancer, involving intricate changes at both the microscopic and macroscopic levels, requires innovative approaches to its understanding and management. In recent years, the application of machine learning (ML) techniques, especially deep learning (DL), has emerged as a transformative force in oncology. DL employs deep neural networks to analyze vast datasets, offering unprecedented insights into cancer&#x00027;s development and progression (&#x000C7;al&#x00131;&#x0015F;kan and Tazaki, <xref ref-type="bibr" rid="B20">2023</xref>; Chen et al., <xref ref-type="bibr" rid="B25">2023</xref>; Siam et al., <xref ref-type="bibr" rid="B137">2023</xref>; Muhammad et al., <xref ref-type="bibr" rid="B105">2024</xref>; Talebi et al., <xref ref-type="bibr" rid="B149">2024</xref>). This approach has led to the development of computer-aided diagnostic systems capable of detecting and classifying cancerous tissues in medical images, such as mammograms and MRI scans, with increasing accuracy. Beyond imaging, DL also plays a crucial role in analyzing molecular data, aiding in the prediction of treatment responses, and the identification of new biomarkers (Dera et al., <xref ref-type="bibr" rid="B34">2019</xref>, <xref ref-type="bibr" rid="B33">2021</xref>; Waqas et al., <xref ref-type="bibr" rid="B172">2021</xref>; Barhoumi et al., <xref ref-type="bibr" rid="B15">2023</xref>; Khan et al., <xref ref-type="bibr" rid="B78">2023</xref>; Muhammad and Bria, <xref ref-type="bibr" rid="B104">2023</xref>; Varlamova et al., <xref ref-type="bibr" rid="B160">2024</xref>). DL methods can be categorized based on the level of supervision involved. Supervised learning includes techniques like Convolutional Neural Networks (CNNs) for tumor image classification and Recurrent Neural Networks (RNNs) for predicting patient outcomes, both requiring labeled data (LeCun et al., <xref ref-type="bibr" rid="B80">2015</xref>; Iqbal et al., <xref ref-type="bibr" rid="B63">2019</xref>, <xref ref-type="bibr" rid="B62">2022</xref>). Unsupervised deep learning methods, such as Autoencoders and Generative Adversarial Networks (GANs), learn from unlabeled data to perform tasks like clustering patients based on gene expression profiles or generating synthetic medical images. Semi-supervised deep learning methods, like Semi-Supervised GANs, leverage a mix of labeled and unlabeled data to enhance model performance when labeled medical data is limited. Self-supervised learning methods, such as BERT (Bidirectional Encoder Representations from Transformers) and GPT (Generative Pre-trained Transformer), use the structure of training data itself for supervision, enabling tasks like predicting patient outcomes or understanding the progression of cancer with limited labeled examples. Reinforcement learning in cancer studies, exemplified by Deep Q-Networks (DQN) and Proximal Policy Optimization (PPO), involves an agent learning optimal treatment strategies through rewards and penalties.</p>
<p>As the volume of oncology data continues to grow, DL stands at the forefront of this field, enhancing our understanding of cancer, improving diagnostic precision, predicting clinical outcomes, and paving the way for innovative treatments. This review explores the latest advancements in DL applications within oncology, highlighting its potential to revolutionize cancer care (Chan et al., <xref ref-type="bibr" rid="B22">2020</xref>; Ibrahim et al., <xref ref-type="bibr" rid="B61">2022</xref>; Ghaffari Laleh et al., <xref ref-type="bibr" rid="B46">2023</xref>; Tripathi et al., <xref ref-type="bibr" rid="B155">2024a</xref>).</p>
<p>Multimodal Learning (MML) enhances task accuracy and reliability by leveraging information from various data sources or modalities (Huang et al., <xref ref-type="bibr" rid="B59">2021</xref>). This approach has witnessed a surge in popularity, as indicated by the growing body of MML-related publications (see <xref ref-type="fig" rid="F1">Figure 1</xref>). By facilitating the fusion of multimodal data, such as radiological images, digitized pathology slides, molecular data, and electronic health records (EHR), MML offers a richer understanding of complex problems (Tripathi et al., <xref ref-type="bibr" rid="B157">2024c</xref>). It enables the extraction and integration of relevant features that might be overlooked when analyzing data modalities separately. Recent advancements in MML, powered by Deep Neural Networks (DNNs), have shown remarkable capability in learning from diverse data sources, including computer vision (CV) and natural language processing (NLP) (Bommasani et al., <xref ref-type="bibr" rid="B18">2022</xref>; Achiam et al., <xref ref-type="bibr" rid="B1">2023</xref>). Prominent multimodal foundation models such as Contrastive Language-Image Pretraining (CLIP) and Generative Pretraining Transformer (GPT-4) by OpenAI have set new benchmarks in the field (Radford et al., <xref ref-type="bibr" rid="B118">2021</xref>; Achiam et al., <xref ref-type="bibr" rid="B1">2023</xref>). Additionally, the Foundational Language And Vision Alignment Model (FLAVA) represents another significant stride, merging vision and language representation learning to facilitate multimodal reasoning (Singh et al., <xref ref-type="bibr" rid="B139">2022</xref>). Within the realm of oncology, innovative applications of MML are emerging. The RadGenNets model, for instance, integrates clinical and genomics data with Positron Emission Tomography (PET) scans and gene mutation data, employing a combination of Convolutional Neural Networks (CNNs) and Dense Neural Networks to predict gene mutations in Non-small cell lung cancer (NSCLC) patients (Tripathi et al., <xref ref-type="bibr" rid="B158">2022</xref>). Moreover, GNNs and Transformers are being explored for a variety of oncology-related tasks, such as tumor classification (Khan et al., <xref ref-type="bibr" rid="B77">2020</xref>), prognosis prediction (Schulz et al., <xref ref-type="bibr" rid="B133">2021</xref>), and assessing treatment response (Joo et al., <xref ref-type="bibr" rid="B74">2021</xref>).</p>


<fig id="F1" position="float">
<label>Figure 1</label>
<caption><p>Number of publications involving DL, GNNs, GNNs in the medical domain, overall multimodal and multimodal in biomedical and clinical sciences in the period 2015&#x02013;2024 (Hook et al., <xref ref-type="bibr" rid="B56">2018</xref>).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-07-1408843-g0001.tif"/>
</fig>


<p>Recent literature has seen an influx of survey and review articles exploring MML (Baltru&#x00161;aitis et al., <xref ref-type="bibr" rid="B14">2018</xref>; Boehm et al., <xref ref-type="bibr" rid="B17">2021</xref>; Ektefaie et al., <xref ref-type="bibr" rid="B39">2023</xref>; Xu et al., <xref ref-type="bibr" rid="B182">2023</xref>; Hartsock and Rasool, <xref ref-type="bibr" rid="B55">2024</xref>). These works have provided valuable insights into the evolving landscape of MML, charting key trends and challenges within the field. Despite this growing body of knowledge, there remains a notable gap in the literature regarding the application of advanced multimodal DL models, such as Graph Neural Networks (GNNs) and Transformers, in the domain of oncology. Our article aims to fill this gap by offering the following contributions:</p>
<list list-type="order">
<list-item><p><italic>Identifying large-scale MML approaches in oncology</italic>. We provide an overview of the state-of-the-art MML with a special focus on GNNs and Transformers for multimodal data fusion in oncology.</p></list-item>
<list-item><p><italic>Highlighting the challenges and limitations of MML in oncology data fusion</italic>. We discuss the challenges and limitations of implementing multimodal data-fusion models in oncology, including the need for large datasets, the complexity of integrating diverse data types, data alignment, and missing data modalities and samples.</p></list-item>
<list-item><p><italic>Providing a taxonomy for describing multimodal architectures</italic>. We present a comprehensive taxonomy for describing MML architectures, including both traditional ML and DL, to facilitate future research in this area.</p></list-item>
<list-item><p><italic>Identifying future directions for multimodal data fusion in oncology</italic>. We identify GNNs and Transformers as potential solutions for comprehensive multimodal integration and present the associated challenges.</p></list-item>
</list>
<p>By addressing these aspects, our article seeks to advance the understanding of MML&#x00027;s potential in oncology, paving the way for innovative solutions that could revolutionize cancer diagnosis and treatment through comprehensive data integration.</p>
<p>Our paper is organized as follows. Section 2 covers the fundamentals of MML, including data modalities, taxonomy, data fusion stages, and neural network architectures. Section 3 focuses on GNNs in MML, explaining graph data, learning on graphs, architectures, and applications to unimodal and multimodal oncology datasets. Section 4 discusses Transformers in MML, including architecture, multimodal Transformers, applications to oncology datasets, and methods of fusing data modalities. Section 5 highlights challenges in MML, such as data availability, alignment, generalization, missing data, explainability, and others. Section 6 provides information on data sources. Finally, we conclude by emphasizing the promise of integrating data across modalities and the need for scalable DL frameworks with desirable properties.</p></sec>
<sec id="s2">
<title>2 Fundamentals of multimodal learning (MML)</title>
<sec>
<title>2.1 Data modalities in oncology</title>
<p>A data <italic>modality</italic> represents the expression of an entity or a particular form of sensory perception, such as the characters&#x00027; visual actions, sounds of spoken dialogues, or the background music (Sleeman et al., <xref ref-type="bibr" rid="B141">2022</xref>). A collective notion of these modalities is called <italic>multi-modality</italic> (Baltru&#x00161;aitis et al., <xref ref-type="bibr" rid="B14">2018</xref>). Traditional data analysis and ML methods to study cancer data use single data modalities [e.g., EHR (Miotto et al., <xref ref-type="bibr" rid="B101">2016</xref>), radiology (Waqas et al., <xref ref-type="bibr" rid="B172">2021</xref>), pathology (Litjens et al., <xref ref-type="bibr" rid="B93">2017</xref>), or molecular, including genomics (Angermueller et al., <xref ref-type="bibr" rid="B11">2017</xref>), transcriptomics (Yousefi et al., <xref ref-type="bibr" rid="B191">2017</xref>), proteomics (Wang et al., <xref ref-type="bibr" rid="B168">2017</xref>), etc.]. However, the data is inherently multimodal, as it includes information from multiple sources or modalities that are related in many ways. <xref ref-type="fig" rid="F2">Figure 2</xref> provides a view of multiple modalities of cancer at various scales, from the population level to single-cell analysis. Oncology data can be broadly classified into 3 categories: clinical, molecular, and imaging, where each category provides complementary information about the patient&#x00027;s disease. <xref ref-type="fig" rid="F2">Figure 2</xref> highlights different clinical, molecular, and imaging modalities. Multimodal analysis endeavors to gain holistic insights into the disease process using multimodal data.</p>


<fig id="F2" position="float">
<label>Figure 2</label>
<caption><p>We present various data modalities that capture specific aspects of cancer at different scales. For example, radiological images capture organ or sub-organ level abnormalities, while tissue analysis may provide changes in the cellular structure and morphology. On the other hand, various molecular data types may provide insights into genetic mutations and epigenetic changes. <bold>(A)</bold> An overview of data collected from population to a tissue. <bold>(B)</bold> Detailed look into data modalities acquired for cancer care.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-07-1408843-g0002.tif"/>
</fig>


<sec>
<title>2.1.1 Molecular data</title>
<p>Molecular data modalities provide information about the underlying genetic changes and alterations in the cancer cells (Liu et al., <xref ref-type="bibr" rid="B94">2021</xref>). Efforts toward integrating molecular data resulted in the <italic>multi-omics</italic> research field (Waqas et al., <xref ref-type="bibr" rid="B174">2024a</xref>). Two principal areas of molecular analysis in oncology are proteomics and genomics. <italic>Proteomics</italic> is the study of proteins and their changes in response to cancer, and it provides information about the biological processes taking place in cancer cells. <italic>Genomics</italic> is the study of the entire genome of cancer cells, including changes in DNA sequence, gene expression, and structural variations (Boehm et al., <xref ref-type="bibr" rid="B17">2021</xref>). Other molecular modalities include transcriptomics, pathomics, radiomics and their combinations, radiogenomics, and proteogenomics. Many publicly available datasets provide access to molecular data, including the Proteomics Data Commons for proteomics data and the Genome Data Commons for genetic data (Grossman et al., <xref ref-type="bibr" rid="B50">2016</xref>; Thangudu et al., <xref ref-type="bibr" rid="B151">2020</xref>).</p></sec>
<sec>
<title>2.1.2 Imaging data</title>
<p>Imaging modalities play a crucial role in diagnosing and monitoring cancer. The imaging category can be divided into 2 main categories: (1) radiological imaging and (2) digitized histopathology slides, referred to as Whole Slide Imaging (WSI). <italic>Radiological</italic> imaging encompasses various techniques such as X-rays, CT scans, MRI, PET, and others, which provide information about the location and extent of cancer within the body. These images can be used to determine the size and shape of a tumor, monitor its growth, and assess the effectiveness of treatments. <italic>Histopathological</italic> imaging is the examination of tissue samples obtained through biopsy or surgery (Rowe and Pomper, <xref ref-type="bibr" rid="B126">2022</xref>; Waqas et al., <xref ref-type="bibr" rid="B171">2023</xref>). Digitized slides, saved as WSIs, provide detailed information about the micro-structural changes in cancer cells and can be used to diagnose cancer and determine its subtype.</p></sec>
<sec>
<title>2.1.3 Clinical data</title>
<p>Clinical data provides information about the patient&#x00027;s medical history, physical examination, and laboratory results, saved in the patient&#x00027;s electronic health records (EHR) at the clinic. EHR consists of digital records of a patient&#x00027;s health information stored in a centralized database. These records provide a comprehensive view of a patient&#x00027;s medical history, past diagnoses, treatments, laboratory test results, and other information, which helps clinicians understand the disease (Asan et al., <xref ref-type="bibr" rid="B12">2018</xref>). Within EHR, time-series data may refer to the clinical data recorded over time, such as repeated blood tests, lab values, or physical attributes. Such data informs the changes in the patient&#x00027;s condition and monitors the disease progression (Quinn et al., <xref ref-type="bibr" rid="B117">2019</xref>).</p></sec></sec>
<sec>
<title>2.2 Taxonomy of MML</title>
<p>We follow the taxonomy proposed by Sleeman et al. (<xref ref-type="bibr" rid="B141">2022</xref>) (see <xref ref-type="fig" rid="F3">Figure 3</xref>), which defines 5 main stages of multimodal classification: preprocessing, feature extraction, data fusion, primary learner, and final classifier, as given below:</p>






<fig id="F3" position="float">
<label>Figure 3</label>
<caption><p>Taxonomy, stages, and techniques of multimodal data fusion are presented. <italic>Early, late, cross-modality</italic> fusion methods integrate individual data modalities (or extracted features) <italic>before, after</italic>, or <italic>at</italic> the primary learning step, respectively.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-07-1408843-g0003.tif"/>
</fig>


<sec>
<title>2.2.1 Pre-processing</title>
<p>Pre-processing involves modifying the input data to a suitable format before feeding it into the model for training. It includes data cleaning, normalization, class balancing, and augmentation. Data cleaning removes unwanted noise or bias, errors, and missing data points (Al-jabery et al., <xref ref-type="bibr" rid="B8">2020</xref>). Normalization scales the input data within a specific range to ensure that each modality contributes equally to the training (Gonzalez Zelaya, <xref ref-type="bibr" rid="B49">2019</xref>). Class balancing is done in cases where one class may have a significantly larger number of samples than another, resulting in a model bias toward the dominant class. Data augmentation artificially increases the size of the dataset by generating new samples based on the existing data to improve the model&#x00027;s robustness and generalizability (Al-jabery et al., <xref ref-type="bibr" rid="B8">2020</xref>).</p></sec>
<sec>
<title>2.2.2 Feature extraction</title>
<p>Different data modalities may have different features, and extracting relevant features may improve model learning. Several manual and automated feature engineering techniques generate representations (or <italic>embeddings</italic>) for each data modality. Feature engineering involves designing features relevant to the task and extracting them from the input data. This can be time-consuming but may allow the model to incorporate prior knowledge about the problem. Text encoding techniques, such as bag-of-words, word embeddings, and topic models (Devlin et al., <xref ref-type="bibr" rid="B36">2019</xref>; Zhuang et al., <xref ref-type="bibr" rid="B204">2021</xref>), transform textual data into a numerical representation, which can be used as input to an ML model (Wang et al., <xref ref-type="bibr" rid="B165">2020a</xref>). In DL, feature extraction is learned automatically during model training (Dara and Tumma, <xref ref-type="bibr" rid="B31">2018</xref>).</p></sec>
<sec>
<title>2.2.3 Data fusion</title>
<p>Data fusion combines raw features, extracted features, or class prediction vectors from multiple modalities to create a single data representation. Fusion enables the model to use the complementary information provided by each modality and improve its learning. Data fusion can be done using early, late, or intermediate fusion. Section 2.3 discusses these fusion stages. The choice of fusion technique depends on the characteristics of the data and the specific problem being addressed (Jiang et al., <xref ref-type="bibr" rid="B71">2022</xref>).</p></sec>
<sec>
<title>2.2.4 Primary learner</title>
<p>The primary learner stage is training the model on the pre-processed data or extracted features. Depending on the problem and data, the primary learner can be implemented using various ML techniques. DNNs are a popular choice for primary learners in MML because they can automatically learn high-level representations from the input data and have demonstrated state-of-the-art performance in many applications. CNNs are often used for image and video data, while recurrent neural networks (RNNs) and Transformers are commonly used for text and sequential data. The primary learner can be implemented independently for each modality or shared between modalities, depending on the problem and data.</p></sec>
<sec>
<title>2.2.5 Final classifier</title>
<p>The final stage of MML is the classifier, which produces category labels or class scores and can be trained on the output of the primary learner or the fused data. The final classifier can be implemented using a shallow neural network, a decision tree, or an ensemble model (Sleeman et al., <xref ref-type="bibr" rid="B141">2022</xref>). Ensemble methods, such as stacking or boosting, are often used to improve and robustify the performance of the final classifier. Stacking involves training multiple models and then combining their predictions at the output stage, while boosting involves repeatedly training weak learners and adjusting their weights based on the errors made by previous learners (Borisov et al., <xref ref-type="bibr" rid="B19">2022</xref>).</p></sec></sec>
<sec>
<title>2.3 Data fusion strategies</title>
<p>Fusion in MML can be performed at different levels, including early (feature level), intermediate (model level), or late (decision level) stages, as illustrated in <xref ref-type="fig" rid="F3">Figure 3</xref>. Each fusion stage has its advantages and challenges, and the choice of fusion stage depends on the characteristics of the data and the task.</p>
<sec>
<title>2.3.1 Early fusion</title>
<p>The early fusion involves merging features extracted from different data modalities into a single feature vector before model training. The feature vectors of the different modalities are combined into a single vector, which is used as the input to the ML model (Sleeman et al., <xref ref-type="bibr" rid="B141">2022</xref>). This approach can be used when the modalities have complementary information and can be easily aligned, such as combining visual and audio features in a video analysis application. The main challenge with early fusion is ensuring that the features extracted from different modalities are compatible and provide complementary information.</p></sec>
<sec>
<title>2.3.2 Intermediate fusion</title>
<p>Intermediate fusion involves training separate models for each data modality and then combining the outputs of these models for inference/prediction (Sleeman et al., <xref ref-type="bibr" rid="B141">2022</xref>). This approach is suitable when the data modalities are independent of each other and cannot be easily combined at the feature level using average, weighted average, or other methods. The main challenge with intermediate fusion is selecting an appropriate method for combining the output of different models.</p></sec>
<sec>
<title>2.3.3 Late fusion</title>
<p>In late fusion, the output of each modality-specific model is used to make a decision independently. All decisions are later combined to make a final decision. This approach is suitable when the modalities provide complementary information but are not necessarily independent of each other. The main challenge with late fusion is selecting an appropriate method for combining individual predictions. This can be done using majority voting, weighted voting, or employing other ML models.</p></sec></sec>
<sec>
<title>2.4 MML for oncology datasets</title>
<p>Syed et al. (<xref ref-type="bibr" rid="B148">2021</xref>) used a Random Forest classifier to fuse radiology image representations learned from the singular value decomposition method with the textual annotation representation learned from the fastText algorithm for prostate and lung cancer patients. Liu et al. (<xref ref-type="bibr" rid="B95">2022</xref>) proposed a hybrid DL framework for combining breast cancer patients&#x00027; genomic and pathology data using fully-connected (FC) network for genomic data, CNN for radiology data and a Simulated Annealing algorithm for late fusion. Multiview multimodal network (MVMM-Net) (Song J. et al., <xref ref-type="bibr" rid="B142">2021</xref>) combined 2 different modalities (low-energy and dual-energy subtracted) from contrast-enhanced spectral mammography images, each learned through CNN and late-fusion through FC network in breast cancer detection task. Yap et al. (<xref ref-type="bibr" rid="B187">2018</xref>) used a late-fusion method to fuse image representations from ResNet50 and clinical representations from a random forest model for a multimodal skin lesion classification task. An award-winning work (Ma and Jia, <xref ref-type="bibr" rid="B98">2020</xref>) on brain tumor grade classification adopted the late-fusion method (concatenation) for fusing outputs from two CNNs (radiology and pathology images). SeNMo, a self-normalizing deep learning model has shown that integrative analysis on 33 cancers having five different molecular (multi-omics) data modalities can improve the patient outcome predictions and primary cancer type classification (Waqas et al., <xref ref-type="bibr" rid="B174">2024a</xref>). Recently, GNNs-based pan-squamous cell carcinoma analysis on lung, bladder, cervicall, esophageal, and head and neck cancers has outperformed different classical and deep learning models (Waqas et al., <xref ref-type="bibr" rid="B175">2024b</xref>).</p>
<p>The single-cell unimodal data alignment is one technique in MML. Jansen et al. (<xref ref-type="bibr" rid="B66">2019</xref>) devised an approach (SOMatic) to combine ATAC-seq regions with RNA-seq genes using self-organizing maps. Single-Cell data Integration via Matching (SCIM) matched cells in multiple datasets in low-dimensional latent space using autoencoder (AEs) (Stark et al., <xref ref-type="bibr" rid="B144">2020</xref>). Graph-linked unified embedding (GLUE) model learned regulatory interactions across omics layers and aligned the cells using variational AEs (Cao and Gao, <xref ref-type="bibr" rid="B21">2022</xref>). These aforementioned methods cannot incorporate high-order interactions among cells or different modalities. Single-cell data integration using multiple modalities is mostly based on AEs [scDART (Zhang Z. et al., <xref ref-type="bibr" rid="B198">2022</xref>), Cross-modal Autoencoders (Yang K. D. et al., <xref ref-type="bibr" rid="B184">2021</xref>), Mutual Information Learning for Integration of Single Cell Omics Data (SMILE) (Xu et al., <xref ref-type="bibr" rid="B183">2022</xref>)].</p>
<p>The relevant works discussed in this section is summarized in <xref ref-type="table" rid="T1">Table 1</xref>.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>References discussed in Section 2.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Sections</bold></th>
<th/>
<th valign="top" align="left"><bold>References</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left" rowspan="3">Data modalities in oncology</td>
<td valign="top" align="left">Molecular</td>
<td valign="top" align="left">Grossman et al., <xref ref-type="bibr" rid="B50">2016</xref>; Thangudu et al., <xref ref-type="bibr" rid="B151">2020</xref>; Boehm et al., <xref ref-type="bibr" rid="B17">2021</xref>; Liu et al., <xref ref-type="bibr" rid="B94">2021</xref>; Waqas et al., <xref ref-type="bibr" rid="B174">2024a</xref></td>
</tr>
 <tr>
<td valign="top" align="left">Imaging</td>
<td valign="top" align="left">Rowe and Pomper, <xref ref-type="bibr" rid="B126">2022</xref>; Waqas et al., <xref ref-type="bibr" rid="B171">2023</xref></td>
</tr>
 <tr>
<td valign="top" align="left">Clinical</td>
<td valign="top" align="left">Asan et al., <xref ref-type="bibr" rid="B12">2018</xref>; Quinn et al., <xref ref-type="bibr" rid="B117">2019</xref></td>
</tr> <tr>
<td valign="top" align="left">Taxonomy of MML</td>
<td/>
<td valign="top" align="left">Dara and Tumma, <xref ref-type="bibr" rid="B31">2018</xref>; Devlin et al., <xref ref-type="bibr" rid="B36">2019</xref>; Gonzalez Zelaya, <xref ref-type="bibr" rid="B49">2019</xref>; Al-jabery et al., <xref ref-type="bibr" rid="B8">2020</xref>; Wang et al., <xref ref-type="bibr" rid="B165">2020a</xref>; Zhuang et al., <xref ref-type="bibr" rid="B204">2021</xref>; Borisov et al., <xref ref-type="bibr" rid="B19">2022</xref>; Jiang et al., <xref ref-type="bibr" rid="B71">2022</xref>; Sleeman et al., <xref ref-type="bibr" rid="B141">2022</xref></td>
</tr> <tr>
<td valign="top" align="left">Data fusion strategies</td>
<td/>
<td valign="top" align="left">Sleeman et al., <xref ref-type="bibr" rid="B141">2022</xref></td>
</tr> <tr>
<td valign="top" align="left">MML for oncology datasets</td>
<td/>
<td valign="top" align="left">Yap et al., <xref ref-type="bibr" rid="B187">2018</xref>; Jansen et al., <xref ref-type="bibr" rid="B66">2019</xref>; Ma and Jia, <xref ref-type="bibr" rid="B98">2020</xref>; Stark et al., <xref ref-type="bibr" rid="B144">2020</xref>; Song J. et al., <xref ref-type="bibr" rid="B142">2021</xref>; Syed et al., <xref ref-type="bibr" rid="B148">2021</xref>; Yang K. D. et al., <xref ref-type="bibr" rid="B184">2021</xref>; Cao and Gao, <xref ref-type="bibr" rid="B21">2022</xref>; Liu et al., <xref ref-type="bibr" rid="B95">2022</xref>; Xu et al., <xref ref-type="bibr" rid="B183">2022</xref>; Zhang Z. et al., <xref ref-type="bibr" rid="B198">2022</xref>; Waqas et al., <xref ref-type="bibr" rid="B174">2024a</xref>,<xref ref-type="bibr" rid="B175">b</xref></td>
</tr></tbody>
</table>
</table-wrap>

</sec></sec>
<sec id="s3">
<title>3 Graph Neural Networks in multimodal learning</title>
<p>Graphs are commonly used to represent the relational connectivity of any system that has interacting entities (Li M. et al., <xref ref-type="bibr" rid="B84">2022</xref>). Graphs have been used in various fields, such as to study brain networks (Farooq et al., <xref ref-type="bibr" rid="B40">2019</xref>), analyze driving maps (Derrow-Pinion et al., <xref ref-type="bibr" rid="B35">2021</xref>), and explore the structure of DNNs themselves (Waqas et al., <xref ref-type="bibr" rid="B173">2022</xref>). GNNs are specifically designed to process data represented as a graph (Waikhom and Patgiri, <xref ref-type="bibr" rid="B164">2022</xref>), which makes them well-suited for analyzing multimodal oncology data as each data modality (or sub-modality) can be considered as a single node and the structures/patterns that exist between data modalities can be modeled as edges (Ektefaie et al., <xref ref-type="bibr" rid="B39">2023</xref>).</p>
<sec>
<title>3.1 The graph data</title>
<p>A graph is represented as <italic>G</italic>=(<italic>V, E</italic>) having node-set <italic>V</italic>={<italic>v</italic><sub>1</sub>, <italic>v</italic><sub>2</sub>, ..., <italic>v</italic><sub><italic>n</italic></sub>}, where node <italic>v</italic> has feature vector <bold>x</bold><sub><bold><italic>v</italic></bold></sub>, and edge set <italic>E</italic>={(<italic>v</italic><sub><italic>i</italic></sub>, <italic>v</italic><sub><italic>j</italic></sub>)&#x02223;<italic>v</italic><sub><italic>i</italic></sub>, <italic>v</italic><sub><italic>j</italic></sub>&#x02208;<italic>V</italic>}. The neighborhood of node <italic>v</italic> is defined as <italic>N</italic>(<italic>v</italic>)={<italic>u</italic>&#x02223;(<italic>u, v</italic>)&#x02208;<italic>E</italic>}.</p>
<sec>
<title>3.1.1 Graph types</title>
<p>As illustrated in <xref ref-type="fig" rid="F4">Figure 4A</xref>, the common types of graphs include undirected, directed, homogeneous, heterogeneous, static, dynamic, unattributed, and attributed. <italic>Undirected graphs</italic> comprise undirected edges, i.e., the direction of relation is not important between any ordered pair of nodes. In the <italic>directed graphs</italic>, the nodes have a directional relationship(s). Homogeneous graphs have the same type of nodes, whereas heterogeneous graphs have different types of nodes within a single graph (Yang T. et al., <xref ref-type="bibr" rid="B186">2021</xref>). Static graphs do not change over time with respect to the existence of edges and nodes. In contrast, dynamic graphs change over time, resulting in changes in structure, attributes, and node relationships. <italic>Unattributed graphs</italic> have unweighted edges, indicating that the weighted value for all edges in a graph is the same, i.e., 1 if present, 0 if absent. <italic>Attributed graphs</italic> have different edge weights that capture the strength of relational importance (Waikhom and Patgiri, <xref ref-type="bibr" rid="B164">2022</xref>).</p>




<fig id="F4" position="float">
<label>Figure 4</label>
<caption><p><bold>(A)</bold> The commonly occurring graph types are presented, including (1) undirected and directed, (2) homogeneous and heterogeneous, (3) dynamic and static, (4) attributed (edges) and unattributed. <bold>(B)</bold> Three different types of tasks performed using the graph data are presented and include (1) node-level, (2) link-level, and (3) graph-level analyses. <bold>(C)</bold> Various categories of representation learning for graphs are presented.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-07-1408843-g0004.tif"/>
</fig>

</sec>
<sec>
<title>3.1.2 Tasks for graph data</title>
<p>In <xref ref-type="fig" rid="F4">Figure 4B</xref>, we present 3 major types of tasks defined on graphs, including (1) <italic>node-level tasks</italic>- these may include node classification, regression, clustering, attributions, and generation, (2) <italic>edge-level task</italic>- edge classification and prediction (presence or absence) are 2 common edge-level tasks, (3) <italic>graph-level tasks</italic>- these tasks involve predictions on the graph level, such as graph classification and generation.</p></sec></sec>
<sec>
<title>3.2 ML for graph data</title>
<p>Representing data as graphs can enable capturing and encoding the relationships among entities of the samples (Wu et al., <xref ref-type="bibr" rid="B179">2020</xref>). Based on the way the nodes are encoded, representation learning on graphs can be categorized into the traditional (or shallow) and DNN-based methods, as illustrated in <xref ref-type="fig" rid="F4">Figure 4C</xref> (Wu et al., <xref ref-type="bibr" rid="B179">2020</xref>; Jiao et al., <xref ref-type="bibr" rid="B72">2022</xref>).</p>
<sec>
<title>3.2.1 Traditional (shallow) methods</title>
<p>These methods usually employ classical ML methods, and their two categories commonly found in the literature are <italic>graph embedding</italic> and <italic>probabilistic methods</italic>. Graph embedding methods represent a graph with low-dimensional vectors (graph embedding and node embedding), preserving the structural properties of the graph. The learning tasks in graph embedding usually involve dimensionality reduction through linear (principal component or discriminant analysis), kernel (nonlinear mapping), or tensor (higher-order structures) methods (Jiao et al., <xref ref-type="bibr" rid="B72">2022</xref>). Probabilistic graphical methods use graph data to represent probability distribution, where nodes are considered random variables, and edges depict the probability relations among nodes (Jiao et al., <xref ref-type="bibr" rid="B72">2022</xref>). Bayesian networks, Markov&#x00027;s networks, variational inference, variable elimination, and others are used in probabilistic methods (Jiao et al., <xref ref-type="bibr" rid="B72">2022</xref>).</p></sec>
<sec>
<title>3.2.2 DNN-based methods - GNNs</title>
<p>GNNs are gaining popularity in the ML community, as evident from <xref ref-type="fig" rid="F1">Figure 1</xref>. In GNNs, the information aggregation from the neighborhood is fused into a node&#x00027;s representation. Traditional DL methods such as CNNs and their variants have shown remarkable success in processing the data in Euclidean space; however, they fail to perform well when faced with non-Euclidean or relational datasets. Compared to CNNs, where the locality of the nodes in the input is fixed, GNNs have no canonical ordering of the neighborhood of a node. They can learn the given task for any permutation of the input data, as depicted in <xref ref-type="fig" rid="F5">Figure 5</xref>. GNNs often employ a message-passing mechanism in which a node&#x00027;s representation is derived from its neighbors&#x00027; representations via a recursive computation. The message passing for a GNN is given as follows:</p>
<disp-formula id="E1"><label>(1)</label><mml:math id="M1"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msubsup><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>h</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>l</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>=</mml:mo></mml:mtd><mml:mtd><mml:mi>&#x003C3;</mml:mi><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mi>l</mml:mi></mml:mrow></mml:msub><mml:mstyle displaystyle="true"><mml:munder class="msub"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>u</mml:mi><mml:mo>&#x02208;</mml:mo><mml:mi>N</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:munder></mml:mstyle><mml:mfrac><mml:mrow><mml:msubsup><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>h</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>u</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mrow><mml:mo>|</mml:mo><mml:mi>N</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>|</mml:mo></mml:mrow></mml:mfrac><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>B</mml:mi></mml:mrow><mml:mrow><mml:mi>l</mml:mi></mml:mrow></mml:msub><mml:msubsup><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>h</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<fig id="F5" position="float">
<label>Figure 5</label>
<caption><p>Convolution operation for graphs vs. image data. The canonical order of the input is important in CNNs, whereas in GNNs, the order of the input nodes is not important. From the convolution operation perspective, CNNs can be considered a subset of GNNs (Hamilton, <xref ref-type="bibr" rid="B53">2020</xref>).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-07-1408843-g0005.tif"/>
</fig>
<p>where <inline-formula><mml:math id="M2"><mml:msubsup><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>l</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula> is the updated embedding of node <italic>v</italic> after <italic>l</italic>&#x0002B;1 layer, &#x003C3; is the non-linear function (e.g., rectified linear unit or ReLU), <inline-formula><mml:math id="M3"><mml:msubsup><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mi>u</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula> and <inline-formula><mml:math id="M4"><mml:msubsup><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula> represent the embeddings of nodes <italic>u</italic> and <italic>v</italic> at layer <italic>l</italic>. <italic>W</italic><sub><italic>l</italic></sub> and <italic>B</italic><sub><italic>l</italic></sub> are the trainable weight matrices for neighborhood aggregation and (self)hidden vector transformation, respectively. The message passing can encode high-order structural information in node embedding through multiple aggregation layers. GNNs smooth the features by aggregating neighbors&#x00027; embedding and filter eigenvalues of graph Laplacian, which provides an extra denoising mechanism (Ma Y. et al., <xref ref-type="bibr" rid="B99">2021</xref>). GNNs comprise multiple permutation equivariant and invariant functions, and they can handle heterogeneous data (Jin et al., <xref ref-type="bibr" rid="B73">2022</xref>). As described earlier, traditional ML models deal with Euclidean data. In oncology data, the correlations may not exist in Euclidean space; instead, its features may be highly correlated in the non-Euclidean space (Yi et al., <xref ref-type="bibr" rid="B188">2022</xref>). Based on the information fusion and aggregation methodology, GNNs-based deep methods are classified into the following:</p>
<sec>
<title>3.2.2.1 Recurrent GNNs</title>
<p>RecGNNs are built on top of the standard Recurrent Neural Network (RNN) by combining with GNN. RecGNNs can operate on graphs with variable sizes and topologies. The recurrent component of the RecGNN captures temporal dependencies and learns latent states over time, whereas the GNN component captures the local structure. The information fusion process is repeated a fixed number of times until an equilibrium or the desired state is achieved (Hamilton et al., <xref ref-type="bibr" rid="B52">2017</xref>). RecGNNs employ the model given by:</p>
<disp-formula id="E2"><label>(2)</label><mml:math id="M5"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msubsup><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>h</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>l</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mtext class="textrm" mathvariant="normal">RecNN</mml:mtext><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>h</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>u</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>M</mml:mtext></mml:mstyle><mml:mi>s</mml:mi><mml:mstyle mathvariant="bold"><mml:mtext>g</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>N</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where, RecNN is any RNN, and <inline-formula><mml:math id="M6"><mml:mi>M</mml:mi><mml:mi>s</mml:mi><mml:msubsup><mml:mrow><mml:mi>g</mml:mi></mml:mrow><mml:mrow><mml:mi>N</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:math></inline-formula> is the neighborhood message-passing at layer <italic>l</italic>.</p></sec>
<sec>
<title>3.2.2.2 Convolutional GNNs</title>
<p>ConvGNNs undertake the convolution operation on graphs by aggregating neighboring nodes&#x00027; embeddings through a stack of multiple layers. ConvGNNs use the symmetric and normalized summation of the neighborhood and self-loops for updating the node embeddings given by:</p>
<disp-formula id="E3"><label>(3)</label><mml:math id="M7"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msubsup><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>h</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>l</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mi>&#x003C3;</mml:mi><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mi>l</mml:mi></mml:mrow></mml:msub><mml:mstyle displaystyle="true"><mml:munder class="msub"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>u</mml:mi><mml:mo>&#x02208;</mml:mo><mml:mi>N</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x0222A;</mml:mo><mml:mi>v</mml:mi></mml:mrow></mml:munder></mml:mstyle><mml:mfrac><mml:mrow><mml:msub><mml:mrow><mml:mstyle mathvariant="bold"><mml:mtext>h</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>v</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:msqrt><mml:mrow><mml:mo>|</mml:mo><mml:mi>N</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>|</mml:mo><mml:mo>|</mml:mo><mml:mi>N</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>u</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>|</mml:mo></mml:mrow></mml:msqrt></mml:mrow></mml:mfrac></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow><mml:mo>.</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>The ConvGNN can be spatial or spectral, depending on the type of convolution they implement. Convolution in spatial ConvGNNs involves taking a weighted average of the neighboring vertices. Examples of spatial ConvGNNs include GraphSAGE (Hamilton et al., <xref ref-type="bibr" rid="B52">2017</xref>), Message Passing Neural Network (MPNN) (Gilmer et al., <xref ref-type="bibr" rid="B47">2017</xref>), and Graph Attention Network (GAT) (Veli&#x0010D;kovi&#x00107; et al., <xref ref-type="bibr" rid="B162">2017</xref>). The spectral ConvGNNs operate in the spectral domain by using the eigendecomposition of the graph Laplacian matrix. The convolution operation is performed on the eigenvalues, which can be high-dimensional. Popular spectral ConvGNNs are ChebNet (Defferrard et al., <xref ref-type="bibr" rid="B32">2016</xref>) and Graph Convolutional Network (GCN) (Kipf and Welling, <xref ref-type="bibr" rid="B79">2016</xref>). An interesting aspect of these approaches is representational containment, which is defined as: convolution&#x02286;attention&#x02286;message passing.</p></sec>
<sec>
<title>3.2.2.3 Graph Auto-Encoder Networks</title>
<p>GAEs are unsupervised graph learning networks for dimensionality reduction, anomaly detection, and graph generation. They are built on top of the standard AEs to work with graph data. The encoder component of the GAE maps the input graph to a low-dimensional latent space, while the decoder component maps the latent space back to the original graph (Park et al., <xref ref-type="bibr" rid="B112">2021</xref>).</p></sec>
<sec>
<title>3.2.2.4 Graph Adversarial Networks</title>
<p>Based on Generative Adversarial Networks, GraphANs are designed to work with graph-structured data and can learn to generate new graphs with similar properties to the input data. The generator component of the GraphAN maps a random noise vector to a new graph, while the discriminator component tries to distinguish between the generated vs. the actual input. The generator generates graphs to fool the discriminator, while the discriminator tries to classify the given graph as real or generated.</p></sec>
<sec>
<title>3.2.2.5 Other GNNs</title>
<p>Other categories of GNNs may include scalable GNNs (Ma et al., <xref ref-type="bibr" rid="B97">2019</xref>), dynamic GNNs (Sankar et al., <xref ref-type="bibr" rid="B130">2018</xref>), hypergraph GNNs (Bai et al., <xref ref-type="bibr" rid="B13">2021</xref>), heterogeneous GNNs (Wei et al., <xref ref-type="bibr" rid="B177">2019</xref>), and many others (Ma and Tang, <xref ref-type="bibr" rid="B100">2021</xref>).</p></sec></sec>
<sec>
<title>3.2.3 Graph-based reinforcement learning</title>
<p>GNNs have been combined with Reinforcement Learning (RL) to solve complex problems involving graph-structured data (Jiang et al., <xref ref-type="bibr" rid="B70">2018</xref>). GNNs enable RL agents to effectively process and reason about relational information in environments represented as graphs (Nie et al., <xref ref-type="bibr" rid="B108">2023</xref>). This combination has shown promise in various domains, including multi-agent systems, robotics, and combinatorial optimization (Almasan et al., <xref ref-type="bibr" rid="B9">2022</xref>; Fathinezhad et al., <xref ref-type="bibr" rid="B41">2023</xref>). However, the use of Graph-based RL on cancer data is still less-explored area of research.</p></sec></sec>
<sec>
<title>3.3 GNNs and ML using unimodal oncology datasets</title>
<sec>
<title>3.3.1 Pathology datasets</title>
<p>Traditionally, CNN-based models are used to learn features from digital pathology data (Iqbal et al., <xref ref-type="bibr" rid="B62">2022</xref>). However, unlike GNNs, CNNs fail to capture the global contextual information important in the tissue phenotypical and structural micro and macro environment (Ahmedt-Aristizabal et al., <xref ref-type="bibr" rid="B4">2022</xref>). For using histology images in GNNs, the cells, tissue regions, or image patches are depicted as nodes. The relations and interactions among these nodes are represented as (un)weighted edges. Usually, a graph of the patient histology slide is used along with a patient-level label for training a GNN, as illustrated in <xref ref-type="fig" rid="F6">Figure 6A</xref>. Here, we review a few GNN-based pathology publications representative of a trove of works in this field. Histographs (Anand et al., <xref ref-type="bibr" rid="B10">2020</xref>) used breast cancer histology data to distinguish cancerous and non-cancerous images. Pre-trained VGG-UNet was used for nuclei detection, micro-features of the nuclei were used as node features, and Euclidean distance among nuclei was incorporated as edge features. The resulting cell graphs were used to train the GCN-based robust spatial filtering (RSF) model, which performed superior to the CNN-based classification. citewang2020weakly analyzed grade classification in tissue micro-arrays of prostate cancer using the weakly-supervised technique on a variant of GraphSAGE with self-attention pooling (SAGPool). Cell-Graph Signature (<italic>CG</italic><sub><italic>signature</italic></sub>) (Wang et al., <xref ref-type="bibr" rid="B170">2022</xref>) predicted patient survival in gastric cancer using cell-graphs of multiplexed immunohistochemistry images processed through two types of GNNs (GCNs and GINs) with two types of pooling (SAGPool, TopKPool). Besides the above-mentioned cell graphs, there is an elaborate review of GNN-based tissue graphs or patch-graphs methods implemented on unimodal pathology cancer data given in Ahmedt-Aristizabal et al. (<xref ref-type="bibr" rid="B4">2022</xref>). Instead of individual cell- and tissue-graphs, a combination of the multilevel information in histology slides can help understand the intrinsic features of the disease.</p>


<fig id="F6" position="float">
<label>Figure 6</label>
<caption><p><bold>(A)</bold> Data processing pipeline for histopathology images using GNNs (Chen et al., <xref ref-type="bibr" rid="B26">2020</xref>). <bold>(B)</bold> Graph processing pipeline on radiology data. Adapted from Singh et al. (<xref ref-type="bibr" rid="B140">2021</xref>).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-07-1408843-g0006.tif"/>
</fig>

</sec>
<sec>
<title>3.3.2 Radiology datasets</title>
<p>GNNs have been used in radiology-based cancer data for segmentation, classification, and prediction tasks, especially on X-rays, mammograms, MRI, PET, and CT scans. <xref ref-type="fig" rid="F6">Figure 6B</xref> illustrates a general pipeline of using radiology-based data to train GNNs. Here we give a non-exhaustive review of GNNs-based works on radiological oncology data as a single modality input. Mo et al. (<xref ref-type="bibr" rid="B103">2020</xref>) proposed a framework that improved the liver cancer lesion segmentation in the MRI-T1WI scans through guided learning of MRI-T2WI modality priors. Learned embeddings from fully convolutional networks on separate MRI modalities are projected into the graph domain for learning by GCNs through the co-attention mechanism and finally to get the refined segmentation by re-projection. Radiologists usually review radiology images by zooming into the region of interest (ROIs) on high-resolution monitors. Du et al. (<xref ref-type="bibr" rid="B38">2019</xref>) used a hierarchical GNN framework to automatically zoom into the abnormal lesion region of the mammograms and classify breast cancer. The pre-trained CNN model extracts image features, whereas a GAT model is used to classify the nodes for deciding whether to zoom in or not based on whether it is benign or malignant. Based on the established knowledge that lymph nodes (LNs) have connected lymphatic system and LNs cancer cells spread on certain pathways, Chao et al. (<xref ref-type="bibr" rid="B23">2020</xref>) proposed a lymph node gross tumor volume learning framework. The framework was able to delineate the LN appearance as well as the inter-LN relationship. The end-to-end learning framework was superior to the state-of-the-art on esophageal cancer radiotherapy dataset. Tian et al. (<xref ref-type="bibr" rid="B153">2020</xref>) suggested interactive segmentation of MRI scans of prostate cancer patients through a combination of CNN and two GCNs. CNN model outputs a segmentation feature map of MRI, and the GCNs predict the prostate contour from this feature map. Saueressig et al. (<xref ref-type="bibr" rid="B131">2021</xref>) used GNNs to segment brain tumors in 3D MRI images, formed by stacking different modalities of MRI (T1, T2, T1-CE, FLAIR) and representing them as supervoxel graph. The authors reported that GraphSAGE-pool was best for segmenting brain tumors. Besides radiology, a parallel field of radiomics has recently gained attraction. Radiomics is the automated extraction of quantitative features from radiology scans. A survey of radiomics and radiogenomic analysis on brain tumors is presented by Singh et al. (<xref ref-type="bibr" rid="B140">2021</xref>).</p></sec>
<sec>
<title>3.3.3 Molecular datasets</title>
<p>Graphs are a natural choice for representing molecular data such as omic-centric (DNA, RNA, or proteins) or single-cell centric. Individual modalities are processed separately to generate graph representations that are then processed through GNNs followed by the classifier to predict the downstream task, as illustrated in <xref ref-type="fig" rid="F7">Figure 7</xref>. One method of representing proteins as graphs is to depict the amino acid residue in the protein as the node and the relationship between residues denoted by edge (Fout et al., <xref ref-type="bibr" rid="B43">2017</xref>). The residue information is depicted as node embedding, whereas the relational information between two residues is represented as the edge feature vector. Fout et al. (<xref ref-type="bibr" rid="B43">2017</xref>) used spatial ConvGNNs to predict interfaces between proteins which is important in drug discovery problems. Deep predictor of drug-drug interactions (DPDDI) predicted the drug-drug interactions using GCN followed by a 5-layer classical neural network (Feng et al., <xref ref-type="bibr" rid="B42">2020</xref>). Molecular pre-training graph net (MPG) is a powerful framework based on GNN and Bidirectional Encoder Representations from Transformers (BERT) to learn drug-drug and drug-target interactions (Li et al., <xref ref-type="bibr" rid="B86">2021b</xref>). Graph-based Attention Model (GRAM) handled the data inefficiency by supplementing EHRs with hierarchical knowledge in the medical ontology (Choi et al., <xref ref-type="bibr" rid="B28">2017</xref>). A few recent works have applied GNNs to single-cell data. scGCN is a knowledge transfer framework in single-cell omics data such as mRNA or DNA (Song Q. et al., <xref ref-type="bibr" rid="B143">2021</xref>). scGNN processed cell-cell relations through GNNs for the task of missing-data imputation and cell clustering on single-cell RNA sequencing (scRNA-seq) data (Wang J. et al., <xref ref-type="bibr" rid="B167">2021</xref>).</p>
<fig id="F7" position="float">
<label>Figure 7</label>
<caption><p>Graph data processing pipeline for non-imagery data, including molecular and textual data. Adapted from Wang T. et al. (<xref ref-type="bibr" rid="B169">2021</xref>). GNN, graph neural network, FC, Fully-Connected; MLP, Multi-Layer Perception.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-07-1408843-g0007.tif"/>
</fig>

</sec></sec>
<sec>
<title>3.4 MML&#x02014;Data fusion at the pre-learning stage</title>
<p>The first and most primitive form of MML is the pre-learning fusion (see <xref ref-type="fig" rid="F3">Figure 3</xref>), where features extracted from individual modalities of data are merged, and the fused representations are then used for training the multimodal primary learner model. In the context of GNNs being the primary learning model, the extraction step of individual modality representations can be hand-engineered (e.g., dimensionality reduction) or learned by DL models (e.g., CNNs, Transformers). Cui et al. (<xref ref-type="bibr" rid="B29">2021</xref>) proposed a GNN-based early fusion framework to learn latent representations from radiological and clinical modalities for Lymph node metastasis (LNM) prediction in esophageal squamous cell carcinoma (ESCC). The extracted features from the two modalities using UNet and CNN-based encoders were fused together with category-wise attention as node representation. The message passing from conventional GAT and correlation-based GAT learned the neighborhood weights. The attention attributes were used to update the final node features before classification by a 3-layer fully connected network. For Autism spectrum disorder, Alzheimer&#x00027;s disease, and ocular diseases, a multimodal learning framework called Edge-Variational GCN (EV-GCN) fuses the radiology features extracted from fMRI images with clinical feature vectors for each patient (Huang and Chung, <xref ref-type="bibr" rid="B58">2020</xref>). An MLP-based pairwise association encoder is used to fuse the input feature vectors and to generate the edge weights of the population graph. The partially labeled population graph is then processed through GCN layers to generate the diagnostic graph of patients.</p></sec>
<sec>
<title>3.5 MML&#x02014;Data fusion using cross-modality learning</title>
<p>Cross-MML involves intermediate fusion and/or cross-links among the models being trained on individual modalities (see <xref ref-type="fig" rid="F3">Figure 3</xref>). For this survey, we consider the GNN-based hierarchical learning mechanisms as the cross-MML methods. Hierarchical frameworks involve learning for one modality and using the learned latent embeddings in tandem with other data modalities sequentially to get the final desired low-dimensional representations. Lian et al. (<xref ref-type="bibr" rid="B89">2022</xref>) used a sequential learning framework where tumor features learned from CT images using the ViT model were used as node features of the patient population graph for subsequent processing by the GraphSAGE model. The hierarchical learning from radiological and clinical data using Transformer-GNN outperformed the ResNet-Graph framework in survival prediction of early-stage NSCLC. scMoGNN is the first method to apply GNNs in multimodal single-cell data integration using a cross-learning fusion-based GNN framework (Wen et al., <xref ref-type="bibr" rid="B178">2022</xref>). Officially winning first place in modality prediction task at the NeurIPS 2021 competition, scMoGNN showed superior performance on various tasks by using paired data to generate cell-feature graphs. Hierarchical cell-to-tissue-graph network (HACT-Net) combined the low-level cell-graph features with the high-level tissue-graph features through two hierarchical GINs on breast cancer multi-class prediction (Pati et al., <xref ref-type="bibr" rid="B114">2020</xref>). Data imputation, a method of populating the missing values or false zero counts in single-cell data mostly done using DL autoencoders (AE) architecture, has recently been accomplished using GNNs. scGNN (Wang J. et al., <xref ref-type="bibr" rid="B167">2021</xref>) used imputation AE and graph AE in an iterative manner for imputation, and GraphSCI (Rao et al., <xref ref-type="bibr" rid="B122">2021</xref>) used GCN with AE to impute the single-cell RNA-seq data using the cross-learning fusion between the GCN and the AE networks. Clustering is a method of characterizing cell types within a tissue sample. Graph-SCC clustered cells based on scRNA-seq data through self-supervised cross-learning between GCN and a denoising AE network (Zeng et al., <xref ref-type="bibr" rid="B194">2020</xref>). Recently, a multilayer GNN framework, Explainable Multilayer GNN (EMGNN), has been proposed for cancer gene prediction tasks using multi-omics data from 16 different cancer types (Chatzianastasis et al., <xref ref-type="bibr" rid="B24">2023</xref>).</p></sec>
<sec>
<title>3.6 MML&#x02014;Data fusion in post-learning regime</title>
<p>Post-learning fusion methods include processing individual data modalities and later fusing them for the downstream predictive task (Tortora et al., <xref ref-type="bibr" rid="B154">2023</xref>). In the post-learning fusion paradigm, the hand-crafted features perform better than the deep features when the dimensionality of input data is low, and vice versa (Tortora et al., <xref ref-type="bibr" rid="B154">2023</xref>). Many interesting GNN-based works involving the post-learning fusion mechanism have recently been published. Decagon used a multimodal approach on GCNs using proteins and drug interactions to predict exact side effects as a multi-relational link prediction task (Zitnik et al., <xref ref-type="bibr" rid="B205">2018</xref>). Drug-target affinity (DTA) experimented with four different flavors of GNNs (GCN, GAT, GIN, GAT-GCN) along with a CNN to fuse together molecular embeddings and protein sequences for predicting drug-target affinity (Nguyen et al., <xref ref-type="bibr" rid="B107">2021</xref>). PathomicFusion combined the morphological features extracted from image patches (using CNNs), cell-graph features from cell-graphs of histology images (GraphSAGE-based GCNs), and genomic features (using a feed-forward network) for survival prediction on glioma and clear cell renal cell carcinoma (Chen et al., <xref ref-type="bibr" rid="B26">2020</xref>). Shi et al. (<xref ref-type="bibr" rid="B136">2019</xref>) proposed a late-fusion technique to study screening of cervical cancer at early stages by using CNNs to extract features from histology images, followed by K-means clustering to generate graphs which are processed through two-layer GCN. BDR-CNN-GCN (batch normalized, dropout, rank-based pooling) used the same mammographic images to extract image-level features using CNN and relation-aware features using GCN (Zhang et al., <xref ref-type="bibr" rid="B197">2021</xref>). The two feature sets are fused using a dot product followed by a trainable linear projection for breast cancer classification. Under the umbrella of multi-omics data, many GNN-based frameworks have been proposed recently. Molecular omics network(MOOMIN), a multi-modal heterogeneous GNN to predict oncology drug combinations, processed molecular structure, protein features, and cell lines through GCN-based encoders, followed by late-fusion using a bipartite drug-protein interaction graph (Rozemberczki et al., <xref ref-type="bibr" rid="B127">2022</xref>). Multi-omics graph convolutional networks (MOGONET) used a GCN-GAN late fusion technique for the classification of four different diseases, including three cancer types: breast, kidney, and glioma (Wang T. et al., <xref ref-type="bibr" rid="B169">2021</xref>). Leng et al. (<xref ref-type="bibr" rid="B82">2022</xref>) extended MOGONET to benchmark three multi-omics datasets on two different tasks using sixteen DL networks and concluded that GAT-based GNN had the best classification performance. Multi-Omics Graph Contrastive Learner(MOGCL) used graph structure and contrastive learning information to generate representations for improved downstream classification tasks on the breast cancer multi-omics dataset using late-fusion (Rajadhyaksha and Chitkara, <xref ref-type="bibr" rid="B121">2023</xref>). Similar to MOGCL, Park et al. (<xref ref-type="bibr" rid="B113">2022</xref>) developed a GNN-based multi-omics model that integrated mRNA expression, DNA methylation, and DNA sequencing data for NSCLC diagnosis.</p>
<p>The relevant works discussed in this section is summarized in <xref ref-type="table" rid="T2">Table 2</xref>.</p>
<table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p>References Discussed in Section 3.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Sections</bold></th>
<th/>
<th valign="top" align="left"><bold>References</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Graphs and GNNs</td>
<td/>
<td valign="top" align="left">Defferrard et al., <xref ref-type="bibr" rid="B32">2016</xref>; Kipf and Welling, <xref ref-type="bibr" rid="B79">2016</xref>; Gilmer et al., <xref ref-type="bibr" rid="B47">2017</xref>; Hamilton et al., <xref ref-type="bibr" rid="B52">2017</xref>; Veli&#x0010D;kovi&#x00107; et al., <xref ref-type="bibr" rid="B162">2017</xref>; Jiang et al., <xref ref-type="bibr" rid="B70">2018</xref>; Sankar et al., <xref ref-type="bibr" rid="B130">2018</xref>; Farooq et al., <xref ref-type="bibr" rid="B40">2019</xref>; Ma et al., <xref ref-type="bibr" rid="B97">2019</xref>; Wei et al., <xref ref-type="bibr" rid="B177">2019</xref>; Wu et al., <xref ref-type="bibr" rid="B179">2020</xref>; Bai et al., <xref ref-type="bibr" rid="B13">2021</xref>; Derrow-Pinion et al., <xref ref-type="bibr" rid="B35">2021</xref>; Ma and Tang, <xref ref-type="bibr" rid="B100">2021</xref>; Ma Y. et al., <xref ref-type="bibr" rid="B99">2021</xref>; Park et al., <xref ref-type="bibr" rid="B112">2021</xref>; Yang T. et al., <xref ref-type="bibr" rid="B186">2021</xref>; Almasan et al., <xref ref-type="bibr" rid="B9">2022</xref>; Jiao et al., <xref ref-type="bibr" rid="B72">2022</xref>; Jin et al., <xref ref-type="bibr" rid="B73">2022</xref>; Li M. et al., <xref ref-type="bibr" rid="B84">2022</xref>; Waikhom and Patgiri, <xref ref-type="bibr" rid="B164">2022</xref>; Waqas et al., <xref ref-type="bibr" rid="B173">2022</xref>; Yi et al., <xref ref-type="bibr" rid="B188">2022</xref>; Ektefaie et al., <xref ref-type="bibr" rid="B39">2023</xref>; Fathinezhad et al., <xref ref-type="bibr" rid="B41">2023</xref>; Nie et al., <xref ref-type="bibr" rid="B108">2023</xref></td>
</tr> <tr>
<td valign="top" align="left" rowspan="3">GNNs and ML using Unimodal Oncology Datasets</td>
<td valign="top" align="left">Pathology</td>
<td valign="top" align="left">Anand et al., <xref ref-type="bibr" rid="B10">2020</xref>; Wang et al., <xref ref-type="bibr" rid="B166">2020b</xref>, <xref ref-type="bibr" rid="B170">2022</xref>; Ahmedt-Aristizabal et al., <xref ref-type="bibr" rid="B4">2022</xref>; Iqbal et al., <xref ref-type="bibr" rid="B62">2022</xref></td>
</tr>
 <tr>
<td valign="top" align="left">Radiology</td>
<td valign="top" align="left">Du et al., <xref ref-type="bibr" rid="B38">2019</xref>; Chao et al., <xref ref-type="bibr" rid="B23">2020</xref>; Mo et al., <xref ref-type="bibr" rid="B103">2020</xref>; Tian et al., <xref ref-type="bibr" rid="B153">2020</xref>; Saueressig et al., <xref ref-type="bibr" rid="B131">2021</xref>; Singh et al., <xref ref-type="bibr" rid="B140">2021</xref></td>
</tr>
 <tr>
<td valign="top" align="left">Molecular</td>
<td valign="top" align="left">Choi et al., <xref ref-type="bibr" rid="B28">2017</xref>; Fout et al., <xref ref-type="bibr" rid="B43">2017</xref>; Feng et al., <xref ref-type="bibr" rid="B42">2020</xref>; Li et al., <xref ref-type="bibr" rid="B86">2021b</xref>; Song Q. et al., <xref ref-type="bibr" rid="B143">2021</xref>; Wang J. et al., <xref ref-type="bibr" rid="B167">2021</xref></td>
</tr> <tr>
<td valign="top" align="left">MML data fusion stages</td>
<td/>
<td valign="top" align="left">Zitnik et al., <xref ref-type="bibr" rid="B205">2018</xref>; Shi et al., <xref ref-type="bibr" rid="B136">2019</xref>; Chen et al., <xref ref-type="bibr" rid="B26">2020</xref>; Huang and Chung, <xref ref-type="bibr" rid="B58">2020</xref>; Pati et al., <xref ref-type="bibr" rid="B114">2020</xref>; Zeng et al., <xref ref-type="bibr" rid="B194">2020</xref>; Cui et al., <xref ref-type="bibr" rid="B29">2021</xref>; Nguyen et al., <xref ref-type="bibr" rid="B107">2021</xref>; Rao et al., <xref ref-type="bibr" rid="B122">2021</xref>; Wang J. et al., <xref ref-type="bibr" rid="B167">2021</xref>; Wang T. et al., <xref ref-type="bibr" rid="B169">2021</xref>; Zhang et al., <xref ref-type="bibr" rid="B197">2021</xref>; Leng et al., <xref ref-type="bibr" rid="B82">2022</xref>; Lian et al., <xref ref-type="bibr" rid="B89">2022</xref>; Park et al., <xref ref-type="bibr" rid="B113">2022</xref>; Rozemberczki et al., <xref ref-type="bibr" rid="B127">2022</xref>; Wen et al., <xref ref-type="bibr" rid="B178">2022</xref>; Chatzianastasis et al., <xref ref-type="bibr" rid="B24">2023</xref>; Rajadhyaksha and Chitkara, <xref ref-type="bibr" rid="B121">2023</xref>; Tortora et al., <xref ref-type="bibr" rid="B154">2023</xref></td>
</tr></tbody>
</table>
</table-wrap>

</sec></sec>
<sec id="s4">
<title>4 Transformers in MML</title>
<p>Transformers are attention-based DNN models originally proposed for NLP (Vaswani et al., <xref ref-type="bibr" rid="B161">2017</xref>). Transformers implement scaled dot-product of the input with itself and can process various types of data in parallel (Vaswani et al., <xref ref-type="bibr" rid="B161">2017</xref>). Transformers can handle sequential data and learn long-range dependencies, making them well-suited for tasks such as language translation, language modeling, question answering, and many more (Otter et al., <xref ref-type="bibr" rid="B111">2021</xref>). Unlike Recurrent Neural Networks (RNNs) and CNNs, Transformers use self-attention operations to weigh the importance of different input tokens (or embeddings) at each time step. This allows them to handle sequences of arbitrary length and to capture dependencies between input tokens that are far apart in the sequence (Vaswani et al., <xref ref-type="bibr" rid="B161">2017</xref>). Transformers can be viewed as a type of GNN (Xu et al., <xref ref-type="bibr" rid="B182">2023</xref>). Transformers are used to process other data types, such as images (Dosovitskiy et al., <xref ref-type="bibr" rid="B37">2020</xref>), audio (Zhang, <xref ref-type="bibr" rid="B196">2020</xref>), and time-series analysis (Ahmed et al., <xref ref-type="bibr" rid="B3">2022b</xref>), resulting in a new wave of multi-modal applications. Transformers can handle input sequences of different modalities in a unified way, using the same self-attention mechanism, which processes the inputs as a fully connected graph (Xu et al., <xref ref-type="bibr" rid="B182">2023</xref>). This allows Transformers to capture complex dependencies between different modalities, such as visual and textual information in visual question-answering (VQA) tasks (Ma J. et al., <xref ref-type="bibr" rid="B96">2021</xref>).</p>
<p>Pre-training Transformers on large amounts of data, using unsupervised or self-supervised learning, and then fine-tuning for specific downstream tasks, has led to the development of foundation models (Boehm et al., <xref ref-type="bibr" rid="B17">2021</xref>), such as BERT (Devlin et al., <xref ref-type="bibr" rid="B36">2019</xref>), GPT (Radford et al., <xref ref-type="bibr" rid="B119">2018</xref>), RoBERTa (Zhuang et al., <xref ref-type="bibr" rid="B204">2021</xref>), CLIP (Radford et al., <xref ref-type="bibr" rid="B118">2021</xref>), T5 (Raffel et al., <xref ref-type="bibr" rid="B120">2020</xref>), BART (Lewis et al., <xref ref-type="bibr" rid="B83">2019</xref>), BLOOM (Scao et al., <xref ref-type="bibr" rid="B132">2022</xref>), ALIGN (Jia et al., <xref ref-type="bibr" rid="B69">2021</xref>), CoCa (Yu et al., <xref ref-type="bibr" rid="B192">2022</xref>) and more. Multimodal Transformers are a recent development in the field of MML, which extends the capabilities of traditional Transformers to handle multiple data modalities. The inter-modality dependencies are captured by the cross-attention mechanism in multimodal Transformers, allowing the model to jointly reason and extract rich data representations. There are various types of multimodal Transformers, such as Unified Transformer (UniT) (Hu and Singh, <xref ref-type="bibr" rid="B57">2021</xref>), Multi-way Multimodal Transformer (MMT) (Tang et al., <xref ref-type="bibr" rid="B150">2022</xref>), CLIP (Radford et al., <xref ref-type="bibr" rid="B118">2021</xref>), Flamingo (Alayrac et al., <xref ref-type="bibr" rid="B6">2022</xref>), CoCa (Yu et al., <xref ref-type="bibr" rid="B192">2022</xref>), Perceiver IO (Jaegle et al., <xref ref-type="bibr" rid="B65">2021</xref>), and GPT-4 (Achiam et al., <xref ref-type="bibr" rid="B1">2023</xref>).</p>
<sec>
<title>4.1 Model architecture</title>
<p>The original Transformer (<xref ref-type="fig" rid="F8">Figure 8</xref>) was composed of multiple encoder and decoder blocks, each made up of several layers of self-attention and feed-forward neural networks. The encoder takes the input sequence and generates hidden representations, which are then fed to the decoder. The decoder generates the output sequence by attending to the encoder&#x00027;s hidden representations and the previous tokens (i.e., auto-regressive). The self-attention operation (or scaled dot-product) is a crucial component of the Transformer. It determines the significance of each element in the input sequence with respect to the whole input. Self-attention operates by computing a weighted sum of the input sequence&#x00027;s hidden representations, where the weights are determined by the dot product between the <italic>query</italic> vector and the <italic>key</italic> vector, followed by a scaling operation to stabilize the gradients. The resulting weighted sum is multiplied by a <italic>value</italic> vector to obtain the output of the self-attention operation. There has been a tremendous amount of work on various facets of Transformer architecture. The readers are referred to relevant review papers (Galassi et al., <xref ref-type="bibr" rid="B45">2021</xref>; Otter et al., <xref ref-type="bibr" rid="B111">2021</xref>; Han et al., <xref ref-type="bibr" rid="B54">2023</xref>; Xu et al., <xref ref-type="bibr" rid="B182">2023</xref>).</p>
<fig id="F8" position="float">
<label>Figure 8</label>
<caption><p>The original Transformer architecture is presented (Vaswani et al., <xref ref-type="bibr" rid="B161">2017</xref>). A Transformer can have multiple encoder and decoder blocks, as well as some additional layers.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-07-1408843-g0008.tif"/>
</fig>

</sec>
<sec>
<title>4.2 Multimodal transformers</title>
<p>Self-attention allows a Transformer model to process each input as a fully connected graph and attend to (or equivalently learn from) the global patterns present in the input. This makes Transformers compatible with various data modalities by treating each token (or its embedding) as a node in the graph. To use Transformers for a data modality, we need to tokenize the input and select an embedding space for the tokens. Tokenization and embedding selections are flexible and can be done at multiple granularity levels, such as using raw features, ML-extracted features, patches from the input image, or graph nodes. <xref ref-type="table" rid="T3">Table 3</xref> summarizes some common practices used for various types of data in cancer data sets. Handling inter-modality interactions is the main challenge in developing multimodal Transformer models. Usually, it is done through one of these fusion methods: <italic>early fusion</italic> of data modalities, <italic>cross-attention, hierarchical attention</italic>, and <italic>late fusion</italic>, as illustrated in <xref ref-type="fig" rid="F9">Figure 9</xref>. In the following, we present and compare data processing steps for these four methods using two data modalities as an example. The same analysis can be extended to multiple modalities.</p>

<table-wrap position="float" id="T3">
<label>Table 3</label>
<caption><p>Oncology data modalities and their respective tokenization and embeddings selection techniques.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Data modalities</bold></th>
<th valign="top" align="left"><bold>Tokenization level</bold></th>
<th valign="top" align="left"><bold>Token embeddings model</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Pathology images</td>
<td valign="top" align="left">Patch</td>
<td valign="top" align="left">CNNs (Chen et al., <xref ref-type="bibr" rid="B27">2021</xref>)</td>
</tr> <tr>
<td valign="top" align="left">Radiology images</td>
<td valign="top" align="left">Patch</td>
<td valign="top" align="left">CNNs (Xie et al., <xref ref-type="bibr" rid="B181">2021</xref>)</td>
</tr> <tr>
<td valign="top" align="left" rowspan="2">EHR data</td>
<td valign="top" align="left" rowspan="2">ICD code</td>
<td valign="top" align="left">GNNs (Shang et al., <xref ref-type="bibr" rid="B134">2019</xref>),</td>
</tr>
 <tr>
<td valign="top" align="left">ML models (Rasmy et al., <xref ref-type="bibr" rid="B123">2021</xref>)</td>
</tr> <tr>
<td valign="top" align="left" rowspan="2">-Omics</td>
<td valign="top" align="left">Graphs</td>
<td valign="top" align="left">GNNs (Kaczmarek et al., <xref ref-type="bibr" rid="B75">2021</xref>)</td>
</tr>
 <tr>
<td valign="top" align="left">K-mers</td>
<td valign="top" align="left">ML model (Ji et al., <xref ref-type="bibr" rid="B68">2020</xref>)</td>
</tr> <tr>
<td valign="top" align="left" rowspan="3">Clinical notes</td>
<td valign="top" align="left" rowspan="3">Word</td>
<td valign="top" align="left">BERT (Devlin et al., <xref ref-type="bibr" rid="B36">2019</xref>)</td>
</tr>
 <tr>
<td valign="top" align="left">RoBERTa (Zhuang et al., <xref ref-type="bibr" rid="B204">2021</xref>)</td>
</tr>
 <tr>
<td valign="top" align="left">BioBERT (Lee et al., <xref ref-type="bibr" rid="B81">2019</xref>)</td>
</tr></tbody>
</table>
</table-wrap>

<fig id="F9" position="float">
<label>Figure 9</label>
<caption><p>Four different strategies of fusing information from various data modalities in multimodal Transformers are presented. <bold>(A)</bold> Early Fusion. <bold>(B)</bold> late Fusion. <bold>(C)</bold> Hierarchial Attention. <bold>(D)</bold> Cross Attention.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-07-1408843-g0009.tif"/>
</fig>



<sec>
<title>4.2.1 Early fusion</title>
<p>Early fusion is the simplest way to combine data from multiple modalities. The data from different modalities are concatenated to a single input before being fed to the Transformer model, which processes the input as a single entity. Mathematically, the concatenation operation is represented as <italic>x</italic><sub><italic>cat</italic></sub>=[<italic>x</italic><sub>1</sub>, <italic>x</italic><sub>2</sub>], where <italic>x</italic><sub>1</sub> and <italic>x</italic><sub>2</sub> are the inputs from two data modalities, and <italic>x</italic><sub><italic>cat</italic></sub> is the concatenated input to the model. Early fusion is simple and efficient. However, it assumes that all modalities are equally important and relevant for the task at hand (Kalfaoglu et al., <xref ref-type="bibr" rid="B76">2020</xref>), which may not always be practically true (Zhong et al., <xref ref-type="bibr" rid="B202">2023</xref>).</p></sec>
<sec>
<title>4.2.2 Cross-attention fusion</title>
<p>Cross-attention is a relatively more flexible approach to modeling the interactions between data modalities and learning their joint representations. The self-attention layers attend to different modalities at different stages of data processing. Cross-attention allows the model to selectively attend to different modalities based on their relevance to the task (Li et al., <xref ref-type="bibr" rid="B85">2021a</xref>) and capture complex interactions between the modalities (Rombach et al., <xref ref-type="bibr" rid="B125">2022</xref>).</p></sec>
<sec>
<title>4.2.3 Hierarchical fusion</title>
<p>Hierarchical fusion is a complex approach to combining multiple modalities. For instance, the Depth-supervised Fusion Transformer for Salient Object Detection (DFTR) employs hierarchical feature extraction to improve salient object detection performance by fusing low-level spatial features and high-level semantic features from different scales (Zhu et al., <xref ref-type="bibr" rid="B203">2022</xref>). Yang et al. (<xref ref-type="bibr" rid="B185">2020</xref>) introduced a hierarchical approach to fine-grained classification using a fusion Transformer. Furthermore, the Hierarchical Multimodal Transformer (HMT) for video summarization can capture global dependencies and multi-hop relationships among video frames (Zhao et al., <xref ref-type="bibr" rid="B199">2022</xref>).</p></sec>
<sec>
<title>4.2.4 Late fusion</title>
<p>In late fusion, each data modality is processed independently by its own Transformer model, the branch outputs are concatenated and passed through the final classifier. Late fusion allows the model to capture the unique features of each modality while still learning their joint representation. Sun et al. (<xref ref-type="bibr" rid="B146">2021</xref>) proposed a multi-modal adaptive late fusion Transformer network for estimating the levels of depression. Their model extracts long-term temporal information from audio and visual data independently and then fuses weights at the end to learn a joint representation of data.</p></sec></sec>
<sec>
<title>4.3 Transformers for processing oncology datasets</title>
<p>Transformers have been successfully applied to various tasks in oncology, including cancer screening, diagnosis, prognosis, treatment selection, and prediction of clinical variables (Boehm et al., <xref ref-type="bibr" rid="B17">2021</xref>; Chen et al., <xref ref-type="bibr" rid="B27">2021</xref>; Shao et al., <xref ref-type="bibr" rid="B135">2021</xref>; Lian et al., <xref ref-type="bibr" rid="B89">2022</xref>; Liang J. et al., <xref ref-type="bibr" rid="B90">2022</xref>). For instance, a Transformer-based model was used to predict the presence and grade of breast cancer using a combination of imaging and genomics data (Boehm et al., <xref ref-type="bibr" rid="B17">2021</xref>). TransMIL (Shao et al., <xref ref-type="bibr" rid="B135">2021</xref>), a Transformer model, was proposed to process histopathology images using self-attention to learn and classify histopathology slides by overcoming the challenges faced by multi-instance learning (MIL). Recently, a Transformer and convolution parallel network, TransConv (Liang J. et al., <xref ref-type="bibr" rid="B90">2022</xref>), was proposed for automatic brain tumor segmentation using MRI data. Transformers and GNNs have also been combined in MML for early-stage NSCLC prognostic prediction using the patient&#x00027;s clinical and pathological features and by modeling the patient&#x00027;s physiological network (Lian et al., <xref ref-type="bibr" rid="B89">2022</xref>). Similarly, a multimodal co-attention Transformer was proposed for survival prediction using WSIs and genomic sequences (Chen et al., <xref ref-type="bibr" rid="B27">2021</xref>). The authors used a co-attention mechanism to learn the interactions between the two data modalities.</p>
<p>Reinforcement learning with human feedback (RLHF) has emerged as a promising technique to infuse large language models with domain knowledge and human preferences for healthcare applications. Sun et al. (<xref ref-type="bibr" rid="B147">2023</xref>) proposed an approach to continuously improve a conversational agent for behavioral interventions by integrating few-shot generation, prompt engineering, and RLHF to leverage human feedback from therapists and clients. Giuffr&#x000E8; et al. (<xref ref-type="bibr" rid="B48">2024</xref>) discussed strategies to optimize large language models for digestive disease by using RLHF to infuse domain knowledge through supervised fine-tuning. Basit et al. (<xref ref-type="bibr" rid="B16">2024</xref>) introduced MedAide, an on-premise healthcare chatbot that employs RLHF during training to enhance its medical diagnostic capabilities on edge devices. Dai et al. (<xref ref-type="bibr" rid="B30">2023</xref>) presented Safe RLHF, a novel algorithm that decouples human preferences for helpfulness and harmlessness during RLHF to improve the safety and value alignment of large language models in sensitive healthcare domains.</p>
<p>The relevant works discussed in this section is summarized in <xref ref-type="table" rid="T4">Table 4</xref>.</p>
<table-wrap position="float" id="T4">
<label>Table 4</label>
<caption><p>References discussed in Section 4.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Sections</bold></th>
<th valign="top" align="left"><bold>References</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Multimodal transformers</td>
<td valign="top" align="left">Vaswani et al., <xref ref-type="bibr" rid="B161">2017</xref>; Radford et al., <xref ref-type="bibr" rid="B119">2018</xref>, <xref ref-type="bibr" rid="B118">2021</xref>; Devlin et al., <xref ref-type="bibr" rid="B36">2019</xref>; Lewis et al., <xref ref-type="bibr" rid="B83">2019</xref>; Dosovitskiy et al., <xref ref-type="bibr" rid="B37">2020</xref>; Raffel et al., <xref ref-type="bibr" rid="B120">2020</xref>; Zhang, <xref ref-type="bibr" rid="B196">2020</xref>; Boehm et al., <xref ref-type="bibr" rid="B17">2021</xref>; Galassi et al., <xref ref-type="bibr" rid="B45">2021</xref>; Hu and Singh, <xref ref-type="bibr" rid="B57">2021</xref>; Jaegle et al., <xref ref-type="bibr" rid="B65">2021</xref>; Jia et al., <xref ref-type="bibr" rid="B69">2021</xref>; Ma J. et al., <xref ref-type="bibr" rid="B96">2021</xref>; Otter et al., <xref ref-type="bibr" rid="B111">2021</xref>; Zhuang et al., <xref ref-type="bibr" rid="B204">2021</xref>; Ahmed et al., <xref ref-type="bibr" rid="B3">2022b</xref>; Alayrac et al., <xref ref-type="bibr" rid="B6">2022</xref>; Scao et al., <xref ref-type="bibr" rid="B132">2022</xref>; Tang et al., <xref ref-type="bibr" rid="B150">2022</xref>; Yu et al., <xref ref-type="bibr" rid="B192">2022</xref>; Achiam et al., <xref ref-type="bibr" rid="B1">2023</xref>; Han et al., <xref ref-type="bibr" rid="B54">2023</xref>; Xu et al., <xref ref-type="bibr" rid="B182">2023</xref></td>
</tr> <tr>
<td valign="top" align="left">MML data fusion stages</td>
<td valign="top" align="left">Kalfaoglu et al., <xref ref-type="bibr" rid="B76">2020</xref>; Yang et al., <xref ref-type="bibr" rid="B185">2020</xref>; Li et al., <xref ref-type="bibr" rid="B85">2021a</xref>; Sun et al., <xref ref-type="bibr" rid="B146">2021</xref>; Rombach et al., <xref ref-type="bibr" rid="B125">2022</xref>; Zhao et al., <xref ref-type="bibr" rid="B199">2022</xref>; Zhu et al., <xref ref-type="bibr" rid="B203">2022</xref>; Zhong et al., <xref ref-type="bibr" rid="B202">2023</xref></td>
</tr> <tr>
<td valign="top" align="left">Transformers for oncology datasets</td>
<td valign="top" align="left">Boehm et al., <xref ref-type="bibr" rid="B17">2021</xref>; Chen et al., <xref ref-type="bibr" rid="B27">2021</xref>; Shao et al., <xref ref-type="bibr" rid="B135">2021</xref>; Lian et al., <xref ref-type="bibr" rid="B89">2022</xref>; Liang J. et al., <xref ref-type="bibr" rid="B90">2022</xref>; Dai et al., <xref ref-type="bibr" rid="B30">2023</xref>; Sun et al., <xref ref-type="bibr" rid="B147">2023</xref>; Basit et al., <xref ref-type="bibr" rid="B16">2024</xref>; Giuffr&#x000E8; et al., <xref ref-type="bibr" rid="B48">2024</xref></td>
</tr></tbody>
</table>
</table-wrap>

</sec></sec>
<sec id="s5">
<title>5 MML&#x02014;Challenges and opportunities</title>
<p>Learning from multimodal oncology data is a complex and rapidly growing field that presents both challenges and opportunities. While MML has shown significant promise, there are many challenges owing to the inductive biases of the ML models (Ektefaie et al., <xref ref-type="bibr" rid="B39">2023</xref>). In this context, we present major challenges of MML in oncology settings that, if addressed, could unlock the full potential of this emerging field.</p>
<sec>
<title>5.1 Large amounts of high-quality data</title>
<p>DL models are traditionally trained on large datasets with enough samples for training, validation, and testing, such as JFT-300M (Sun et al., <xref ref-type="bibr" rid="B145">2017</xref>) and YFCC100M (Thomee et al., <xref ref-type="bibr" rid="B152">2016</xref>), which are not available in the cancer domain. For example, the largest genomics data repository, the Gene Expression Omnibus (GEO) database, has approximately 1.1 million samples with the keyword &#x02018;cancer&#x00027; compared to 3 billion images in JFT-300M (Jiang et al., <xref ref-type="bibr" rid="B71">2022</xref>). Annotating medical and oncology data is a time-consuming and manual process that requires significant expertise in many different areas of medical sciences. Factors like heterogeneity of the disease, noise in data recording, background, and training of medical professionals leading to inter- and intra-operator variability cause lack of reproducibility and inconsistent clinical outcomes (Lipkova et al., <xref ref-type="bibr" rid="B92">2022</xref>).</p></sec>
<sec>
<title>5.2 Data registration and alignment</title>
<p>Data alignment and registration refer to the process of combining and aligning data from different modalities in a useful manner (Zhao et al., <xref ref-type="bibr" rid="B201">2023</xref>). In multimodal oncology data, this process involves aligning data from multiple modalities such as CT, MRI, PET, and WSIs, as well as genomics, transcriptomics, and clinical records. Data registration involves aligning the data modalities to a common reference frame and may involve identifying common landmarks or fiducial markers. If the data is not registered or aligned correctly, it may be difficult to fuse the information from different modalities (Liang P. P. et al., <xref ref-type="bibr" rid="B91">2022</xref>).</p></sec>
<sec>
<title>5.3 Pan-cancer generalization and transference</title>
<p>Transference in MML aims to transfer knowledge between modalities and their representations to improve the performance of a model trained on a primary modality (Liang P. P. et al., <xref ref-type="bibr" rid="B91">2022</xref>). Because of the unique characteristics of each cancer type and site, it is challenging to develop models that can generalize across different cancer sites. Furthermore, models trained on a specific modality, such as radiology images, will not perform well with other imaging modalities, such as histopathology slides. Fine-tuning the model on a secondary modality, multimodal co-learning, and model induction are techniques to achieve transference and generalization (Wei et al., <xref ref-type="bibr" rid="B176">2020</xref>). To overcome this challenge, mechanisms for improved universality of ML models need to be devised.</p></sec>
<sec>
<title>5.4 Missing data samples and modalities</title>
<p>The unavailability of one or more modalities or the absence of samples in a modality affects the model learning, as most of the existing DL models cannot process the &#x0201C;missing information&#x0201D;. This requirement, in turn, constrains the already insufficient size of datasets in oncology. Almost all publicly available oncology datasets have missing data for a large number of samples (Jiang et al., <xref ref-type="bibr" rid="B71">2022</xref>). Various approaches for handling missing data samples and modalities are gradually gaining traction. However, this is still an open challenge (Mirza et al., <xref ref-type="bibr" rid="B102">2019</xref>).</p></sec>
<sec>
<title>5.5 Imbalanced data</title>
<p>Class imbalance refers to the phenomenon when one class (e.g., cancer negative/positive) is represented significantly more in the data than another class. Class imbalance is common in oncology data (Mirza et al., <xref ref-type="bibr" rid="B102">2019</xref>). DL models struggle to classify underrepresented classes accurately. Techniques such as data augmentation, ensemble, continual learning, and transfer learning are used to counter the class imbalance challenge (Mirza et al., <xref ref-type="bibr" rid="B102">2019</xref>).</p></sec>
<sec>
<title>5.6 Explainability and trustworthiness</title>
<p>The explainability in DL, e.g., how GNNs and Transformers make a specific decision, is still an area of active research (Li P. et al., <xref ref-type="bibr" rid="B87">2022</xref>; Nielsen et al., <xref ref-type="bibr" rid="B109">2022</xref>). GNNExplainer (Ying et al., <xref ref-type="bibr" rid="B189">2019</xref>), PGM-Explainer (Vu and Thai, <xref ref-type="bibr" rid="B163">2020</xref>), and SubgraphX (Yuan et al., <xref ref-type="bibr" rid="B193">2021</xref>) are some attempts to explain the decision-making process of GNNs. The explainability methods for Transformers have been analyzed in Remmer (<xref ref-type="bibr" rid="B124">2022</xref>). Existing efforts and a roadmap to improve the trustworthiness of GNNs have been presented in the latest survey (Zhang H. et al., <xref ref-type="bibr" rid="B195">2022</xref>). However, the explainability and trustworthiness of multimodal GNNs and Transformers is an open challenge.</p></sec>
<sec>
<title>5.7 Over-smoothing in GNNs</title>
<p>One particular challenge in using GNNs is over-smoothing, which occurs when the GNN is trained for too long, causing the node representations to become almost similar (Wu et al., <xref ref-type="bibr" rid="B179">2020</xref>). This leads to a loss of information, a decrease in the model&#x00027;s performance, and a lack of generalization (Valsesia et al., <xref ref-type="bibr" rid="B159">2021</xref>). Regularization techniques such as dropout, weight decay, skip-connection, and incorporating higher-order structures, such as motifs and graphlets, have been proposed. However, building deep architectures that can scale and adapt to varying structural patterns of graphs is still an open challenge.</p></sec>
<sec>
<title>5.8 Modality collapse</title>
<p>Modality collapse is a phenomenon that occurs in MML, where a model trained on multiple modalities may become over-reliant on a single modality, to the point where it ignores or neglects the other modalities (Javaloy et al., <xref ref-type="bibr" rid="B67">2022</xref>). Recent work explored the reasons and theoretical understanding of modality collapse (Huang et al., <xref ref-type="bibr" rid="B60">2022</xref>). However, the counter-actions needed to balance model dependence on data modalities require active investigation by the ML community.</p></sec>
<sec>
<title>5.9 Dynamic and temporal data</title>
<p>Dynamic and temporal data refers to the data that changes over time (Wu et al., <xref ref-type="bibr" rid="B179">2020</xref>). Tumor surveillance is a well-known technique to study longitudinal cancer growth over multiple data modalities (Waqas et al., <xref ref-type="bibr" rid="B172">2021</xref>). Spatio-temporal methods such as multiple instance learning, GNNs, and hybrid of multiple models can capture complex change in the data relationships over time; however, learning from multimodal dynamic data is very challenging and an active area of research (Fritz et al., <xref ref-type="bibr" rid="B44">2022</xref>).</p></sec>
<sec>
<title>5.10 Data privacy</title>
<p>Given the sensitive nature of medical data, privacy and security are critical considerations in the development and deployment of MML models for oncology applications. With the increased adoption of MML in healthcare settings, it is essential to adapt these techniques to enable local data processing and protect patient privacy while fostering collaborative research and analysis across different sites and institutions. Federated learning (FL) has emerged as a promising approach to train large multimodal models across various sites without the need for direct data sharing (Pati et al., <xref ref-type="bibr" rid="B115">2022</xref>). In an FL setup, each participating site trains a local model on its own data and shares only the model updates with a central server, which aggregates the updates and sends the updated global model back to the sites. This allows for collaborative model development while keeping the raw data securely within each site&#x00027;s premises.</p>
<p>To further enhance privacy protection in FL and other distributed learning scenarios, differential privacy (DP) can be integrated into the model training process. DP is a rigorous mathematical framework that involves adding carefully calibrated noise to data or model updates before sharing, in order to protect individual privacy while preserving the utility of the data for analysis (Akter et al., <xref ref-type="bibr" rid="B5">2022</xref>; Islam et al., <xref ref-type="bibr" rid="B64">2022</xref>; Nampalle et al., <xref ref-type="bibr" rid="B106">2023</xref>). Secure multi-party computation (SMPC) is another powerful technique for enabling joint analysis and model training on private datasets held by different healthcare providers or research institutions, without revealing the raw data to each other (&#x0015E;ahinba&#x0015F; and Catak, <xref ref-type="bibr" rid="B128">2021</xref>; Alghamdi et al., <xref ref-type="bibr" rid="B7">2023</xref>; Yogi and Mundru, <xref ref-type="bibr" rid="B190">2024</xref>). SMPC protocols leverage advanced cryptographic techniques to allow multiple parties to compute a function over their combined data inputs securely, such that each party learns only the output of the computation and nothing about the other parties&#x00027; inputs. In addition to these solutions, implementing appropriate access control and authentication mechanisms is crucial for restricting access to sensitive healthcare data to only authorized individuals and entities (Orii et al., <xref ref-type="bibr" rid="B110">2024</xref>). This involves defining and enforcing strict policies and procedures for granting, managing, and revoking access privileges based on the principle of least privilege and the need-to-know basis. Regular security risk assessments should also be conducted to identify and mitigate potential vulnerabilities proactively, ensuring the ongoing protection of patient data.</p></sec>
<sec>
<title>5.11 Other challenges</title>
<p>MML requires extensive computational resources to train models on a variety of datasets and tasks. Robustness and failure detection (Ahmed et al., <xref ref-type="bibr" rid="B2">2022a</xref>) are critical aspects of MML, particularly in applications such as oncology. Uncertainty quantification techniques, such as Bayesian neural networks (Dera et al., <xref ref-type="bibr" rid="B33">2021</xref>), are still under-explored avenues in the MML. By addressing these challenges, it is possible to develop MML models that are able to surpass the performance offered by single-modality models.</p></sec>
<sec>
<title>5.12 Potential future directions</title>
<p>The future of MML in oncology holds immense potential. A critical direction is the integration of large amounts of high-quality data from diverse modalities, such as imaging, genomic, and clinical data, to enhance the accuracy and comprehensiveness of cancer diagnostics and treatment predictions in an end-to-end manner. Overcoming challenges in data registration and alignment is crucial to ensure seamless integration and accurate interpretation of multimodal data. Developing robust models capable of pan-cancer generalization and transference can enable more universal applications across different cancer types. Addressing issues of missing data samples and modalities, and tackling imbalanced datasets, will be essential to improve model robustness and fairness. Enhancing explainability and trustworthiness in these models is vital for clinical adoption, necessitating transparent and interpretable AI systems. Preventing modality collapse is important for maintaining the distinct contributions of each data modality. Moreover, leveraging dynamic and temporal data can offer deeper insights into cancer progression and treatment responses. Ensuring data privacy and ethical considerations will be paramount as the field advances, balancing innovation with the protection of patient information. Lastly, implementing MML applications in clinical settings is crucial to fully realize the benefits of MML in cancer research.</p></sec>
<sec>
<title>5.13 Limitations of the study</title>
<p>MML is a broad research field that has recently gained traction. In this review, we have focused on the application of MML on oncology data. However, MML is widely being adopted in applications such as autonomous vehicles, education, earth science, climate change, and space exploration (Xiao et al., <xref ref-type="bibr" rid="B180">2020</xref>; Sanders et al., <xref ref-type="bibr" rid="B129">2023</xref>; Hadid et al., <xref ref-type="bibr" rid="B51">2024</xref>; Li et al., <xref ref-type="bibr" rid="B88">2024</xref>). Moreover, beyond GNNs and Transformers, MML has been explored using encoder-decoder methods, constraint-based methods, canonical correlations, Restricted Boltzmann Machines (RBMs), and many more (Qi et al., <xref ref-type="bibr" rid="B116">2020</xref>; Zhao et al., <xref ref-type="bibr" rid="B200">2024</xref>). Each of these topics require an extensive review of the literature in the form of separate articles.</p>
<p>The relevant works discussed in this section is summarized in <xref ref-type="table" rid="T5">Table 5</xref>.</p>
<table-wrap position="float" id="T5">
<label>Table 5</label>
<caption><p>References discussed in Section 5.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Sections</bold></th>
<th valign="top" align="left"><bold>References</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Large amounts of high-quality data</td>
<td valign="top" align="left">Thomee et al., <xref ref-type="bibr" rid="B152">2016</xref>; Sun et al., <xref ref-type="bibr" rid="B145">2017</xref>; Lipkova et al., <xref ref-type="bibr" rid="B92">2022</xref>; Ektefaie et al., <xref ref-type="bibr" rid="B39">2023</xref></td>
</tr> <tr>
<td valign="top" align="left">Data registration and alignment</td>
<td valign="top" align="left">Liang P. P. et al., <xref ref-type="bibr" rid="B91">2022</xref>; Zhao et al., <xref ref-type="bibr" rid="B201">2023</xref></td>
</tr> <tr>
<td valign="top" align="left">Pan-cancer generalization and transference</td>
<td valign="top" align="left">Wei et al., <xref ref-type="bibr" rid="B176">2020</xref>; Liang P. P. et al., <xref ref-type="bibr" rid="B91">2022</xref></td>
</tr> <tr>
<td valign="top" align="left">Missing data samples and modalities</td>
<td valign="top" align="left">Mirza et al., <xref ref-type="bibr" rid="B102">2019</xref>; Jiang et al., <xref ref-type="bibr" rid="B71">2022</xref></td>
</tr> <tr>
<td valign="top" align="left">Imbalanced Data</td>
<td valign="top" align="left">Mirza et al., <xref ref-type="bibr" rid="B102">2019</xref></td>
</tr> <tr>
<td valign="top" align="left">Explainability and trustworthiness</td>
<td valign="top" align="left">Ying et al., <xref ref-type="bibr" rid="B189">2019</xref>; Vu and Thai, <xref ref-type="bibr" rid="B163">2020</xref>; Yuan et al., <xref ref-type="bibr" rid="B193">2021</xref>; Li P. et al., <xref ref-type="bibr" rid="B87">2022</xref>; Nielsen et al., <xref ref-type="bibr" rid="B109">2022</xref>; Remmer, <xref ref-type="bibr" rid="B124">2022</xref>; Zhang H. et al., <xref ref-type="bibr" rid="B195">2022</xref></td>
</tr> <tr>
<td valign="top" align="left">Over-smoothing in GNNs</td>
<td valign="top" align="left">Wu et al., <xref ref-type="bibr" rid="B179">2020</xref>; Valsesia et al., <xref ref-type="bibr" rid="B159">2021</xref></td>
</tr> <tr>
<td valign="top" align="left">Modality Collapse</td>
<td valign="top" align="left">Huang et al., <xref ref-type="bibr" rid="B60">2022</xref>; Javaloy et al., <xref ref-type="bibr" rid="B67">2022</xref></td>
</tr> <tr>
<td valign="top" align="left">Dynamic and Temporal Data</td>
<td valign="top" align="left">Wu et al., <xref ref-type="bibr" rid="B179">2020</xref>; Waqas et al., <xref ref-type="bibr" rid="B172">2021</xref>; Fritz et al., <xref ref-type="bibr" rid="B44">2022</xref></td>
</tr> <tr>
<td valign="top" align="left">Data Privacy</td>
<td valign="top" align="left">&#x0015E;ahinba&#x0015F; and Catak, <xref ref-type="bibr" rid="B128">2021</xref>; Akter et al., <xref ref-type="bibr" rid="B5">2022</xref>; Islam et al., <xref ref-type="bibr" rid="B64">2022</xref>; Pati et al., <xref ref-type="bibr" rid="B115">2022</xref>; Alghamdi et al., <xref ref-type="bibr" rid="B7">2023</xref>; Nampalle et al., <xref ref-type="bibr" rid="B106">2023</xref>; Orii et al., <xref ref-type="bibr" rid="B110">2024</xref>; Yogi and Mundru, <xref ref-type="bibr" rid="B190">2024</xref></td>
</tr> <tr>
<td valign="top" align="left">Other Challenges</td>
<td valign="top" align="left">Dera et al., <xref ref-type="bibr" rid="B33">2021</xref>; Ahmed et al., <xref ref-type="bibr" rid="B2">2022a</xref></td>
</tr> <tr>
<td valign="top" align="left">Limitations of the Study</td>
<td valign="top" align="left">Qi et al., <xref ref-type="bibr" rid="B116">2020</xref>; Xiao et al., <xref ref-type="bibr" rid="B180">2020</xref>; Sanders et al., <xref ref-type="bibr" rid="B129">2023</xref>; Hadid et al., <xref ref-type="bibr" rid="B51">2024</xref>; Li et al., <xref ref-type="bibr" rid="B88">2024</xref>; Zhao et al., <xref ref-type="bibr" rid="B200">2024</xref></td>
</tr></tbody>
</table>
</table-wrap>

</sec></sec>
<sec id="s6">
<title>6 Multimodal oncology data sources</title>
<p>Unifying the various collections of oncology data into central archives necessitates a focused effort. We have assembled a list of datasets from data portals maintained by the National Institute of Health and other organizations, although this list is not exhaustive. The goal of this compilation is to offer machine learning researchers in oncology a consolidated data resource. The collection, which is updated regularly, can be accessed at <ext-link ext-link-type="uri" xlink:href="https://lab-rasool.github.io/pan-cancer-dataset-sources/">https://lab-rasool.github.io/pan-cancer-dataset-sources/</ext-link> (Tripathi et al., <xref ref-type="bibr" rid="B155">2024a</xref>). The compilation of pan-cancer datasets from sources such as The Cancer Imaging Archive (TCIA), Genomic Data Commons (GDC), and Proteomic Data Commons (PDC) serves as a valuable resource for cancer research. By providing a unified view of multimodal data that includes imaging, genomics, proteomics, and clinical records, this compilation facilitates the development of adaptable and scalable datasets specifically designed for machine learning applications in oncology (Tripathi et al., <xref ref-type="bibr" rid="B155">2024a</xref>). The compiled datasets encompass a broad spectrum of data modalities, such as radiology images (CT, MRI, PET), pathology slides, genomic data (DNA, RNA), proteomics, and clinical records. This multimodal nature enables the integration of different data types to capture the intricacies of cancer. Moreover, the compilation covers 32 cancer types, ranging from prevalent cancers like breast, lung, and colorectal to less common forms such as mesothelioma and uveal melanoma. The inclusion of hundreds to thousands of cases for each cancer type provides a substantial resource for training machine learning models, especially deep learning algorithms.</p>
<p>Standardizing the diverse data formats, annotations, and metadata across different sources is essential for creating datasets that are suitable for machine learning. The HoneyBee framework, a modular system designed to streamline the creation of machine learning-ready multimodal oncology datasets from diverse sources, can help address this challenge (Tripathi et al., <xref ref-type="bibr" rid="B156">2024b</xref>). HoneyBee supports data ingestion from various sources, handles different data formats and modalities, and ensures consistent data representation. It also facilitates the integration of multimodal data, enabling the creation of datasets that combine imaging, genomics, proteomics, and clinical data for a holistic view of each patient case. Furthermore, HoneyBee incorporates pre-trained foundational embedding models for different data modalities, such as image encoders, genomic sequence embedders, and clinical text encoders. These embeddings can serve as input features for downstream machine learning models, leveraging transfer learning and reducing the need for extensive labeled data. The framework&#x00027;s scalable and modular architecture allows for efficient processing of large-scale datasets and easy integration of new data sources, preprocessing techniques, and embedding models. By utilizing the HoneyBee framework, researchers can create high-quality, multimodal oncology datasets tailored to their specific research objectives, promoting collaboration and advancing machine learning applications in cancer research.</p></sec>
<sec sec-type="conclusions" id="s7">
<title>7 Conclusion</title>
<p>Existing research into the integration of data across various modalities has already yielded promising outcomes, highlighting the potential for significant advancements in cancer research. However, the lack of a comprehensive framework capable of encompassing the full spectrum of cancer dataset modalities presents a notable challenge. The synergy between diverse methodologies and data across different scales could unlock deeper insights into cancer, potentially leading to more accurate prognostic and predictive models than what is possible through single data modalities alone. In our survey, we have explored the landscape of multimodal learning applied to oncology datasets and the specific tasks they can address. Looking ahead, the key to advancing this field lies in the development of robust, deployment-ready MML frameworks. These frameworks must not only scale efficiently across all modalities of cancer data but also incorporate capabilities for uncertainty quantification, interpretability, and generalizability. Such advancements will be critical for effectively integrating oncology data across multiple scales, modalities, and resolutions. The journey toward achieving these goals is complex, yet essential for the next leaps in cancer research. By focusing on these areas, future research has the potential to significantly enhance our understanding of cancer, leading to improved outcomes for patients through more informed and personalized treatment strategies.</p></sec>
<sec sec-type="author-contributions" id="s8">
<title>Author contributions</title>
<p>AW: Conceptualization, Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing. AT: Conceptualization, Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing. RR: Conceptualization, Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing. PS: Conceptualization, Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing. GR: Conceptualization, Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing.</p></sec>
</body>
<back>
<sec sec-type="funding-information" id="s9">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research, authorship, and/or publication of this article. This work was partly supported by the National Science Foundation awards 1903466, 2008690, 2234836, and 2234468, and partly by the Biostatistics and Bioinformatics Shared Resource at the H. Lee Moffitt Cancer Center &#x00026; Research Institute, an NCI designated Comprehensive Cancer Center (P30-CA076292).</p>
</sec>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s10">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>

<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Achiam</surname> <given-names>J.</given-names></name> <name><surname>Adler</surname> <given-names>S.</given-names></name> <name><surname>Agarwal</surname> <given-names>S.</given-names></name> <name><surname>Ahmad</surname> <given-names>L.</given-names></name> <name><surname>Akkaya</surname> <given-names>I.</given-names></name> <name><surname>Aleman</surname> <given-names>F. L.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Gpt-4 technical report</article-title>. <source>arXiv</source> [preprint] arXiv:2303.08774. <pub-id pub-id-type="doi">10.48550/arXiv.2303.08774</pub-id></citation>
</ref>
<ref id="B2">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ahmed</surname> <given-names>S.</given-names></name> <name><surname>Dera</surname> <given-names>D.</given-names></name> <name><surname>Hassan</surname> <given-names>S. U.</given-names></name> <name><surname>Bouaynaya</surname> <given-names>N.</given-names></name> <name><surname>Rasool</surname> <given-names>G.</given-names></name></person-group> (<year>2022a</year>). <article-title>Failure detection in deep neural networks for medical imaging</article-title>. <source>Front. Med. Technol</source>. <volume>4</volume>:<fpage>919046</fpage>. <pub-id pub-id-type="doi">10.3389/fmedt.2022.919046</pub-id><pub-id pub-id-type="pmid">35958121</pub-id></citation></ref>
<ref id="B3">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ahmed</surname> <given-names>S.</given-names></name> <name><surname>Nielsen</surname> <given-names>I. E.</given-names></name> <name><surname>Tripathi</surname> <given-names>A.</given-names></name> <name><surname>Siddiqui</surname> <given-names>S.</given-names></name> <name><surname>Rasool</surname> <given-names>G.</given-names></name> <name><surname>Ramachandran</surname> <given-names>R. P.</given-names></name></person-group> (<year>2022b</year>). <article-title>Transformers in time-series analysis: a tutorial</article-title>. <source>arXiv</source> [preprint] arXiv:2205.01138. <pub-id pub-id-type="doi">10.1007/s00034-023-02454-8</pub-id></citation>
</ref>
<ref id="B4">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ahmedt-Aristizabal</surname> <given-names>D.</given-names></name> <name><surname>Armin</surname> <given-names>M. A.</given-names></name> <name><surname>Denman</surname> <given-names>S.</given-names></name> <name><surname>Fookes</surname> <given-names>C.</given-names></name> <name><surname>Petersson</surname> <given-names>L.</given-names></name></person-group> (<year>2022</year>). <article-title>A survey on graph-based deep learning for computational histopathology</article-title>. <source>Comp. Med. Imag. Graph</source>. <volume>95</volume>:<fpage>102027</fpage>. <pub-id pub-id-type="doi">10.1016/j.compmedimag.2021.102027</pub-id><pub-id pub-id-type="pmid">34959100</pub-id></citation></ref>
<ref id="B5">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Akter</surname> <given-names>M.</given-names></name> <name><surname>Moustafa</surname> <given-names>N.</given-names></name> <name><surname>Lynar</surname> <given-names>T.</given-names></name></person-group> (<year>2022</year>). &#x0201C;Edge intelligence-based privacy protection framework for iot-based smart healthcare systems,&#x0201D;? in <italic>IEEE INFOCOM 2022-IEEE Conference on Computer Communications Workshops (INFOCOM WKSHPS)</italic> (New York, NY: IEEE), <fpage>1</fpage>&#x02013;<lpage>8</lpage>.</citation>
</ref>
<ref id="B6">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Alayrac</surname> <given-names>J.-B.</given-names></name> <name><surname>Donahue</surname> <given-names>J.</given-names></name> <name><surname>Luc</surname> <given-names>P.</given-names></name> <name><surname>Miech</surname> <given-names>A.</given-names></name> <name><surname>Barr</surname> <given-names>I.</given-names></name> <name><surname>Hasson</surname> <given-names>Y.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Flamingo: a visual language model for few-shot learning</article-title>. <source>Adv. Neural Inf. Process. Syst</source>. <volume>35</volume>, <fpage>23716</fpage>&#x02013;<lpage>23736</lpage>. Available online at: <ext-link ext-link-type="uri" xlink:href="https://proceedings.neurips.cc/paper_files/paper/2022/hash/960a172bc7fbf0177ccccbb411a7d800-Abstract-Conference.html">https://proceedings.neurips.cc/paper_files/paper/2022/hash/960a172bc7fbf0177ccccbb411a7d800-Abstract-Conference.html</ext-link></citation>
</ref>
<ref id="B7">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Alghamdi</surname> <given-names>W.</given-names></name> <name><surname>Salama</surname> <given-names>R.</given-names></name> <name><surname>Sirija</surname> <given-names>M.</given-names></name> <name><surname>Abbas</surname> <given-names>A. R.</given-names></name> <name><surname>Dilnoza</surname> <given-names>K.</given-names></name></person-group> (<year>2023</year>). &#x0201C;Secure multi-party computation for collaborative data analysis,&#x0201D;? in <italic>E3S Web of Conferences</italic> (Les Ulis: EDP Sciences), <fpage>04034</fpage>.</citation>
</ref>
<ref id="B8">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Al-jabery</surname> <given-names>K. K.</given-names></name> <name><surname>Obafemi-Ajayi</surname> <given-names>T.</given-names></name> <name><surname>Olbricht</surname> <given-names>G. R.</given-names></name> <name><surname>Wunsch</surname> <given-names>I. I. D. C</given-names></name></person-group>. (<year>2020</year>). &#x0201C;Data preprocessing,&#x0201D;? in K. K. Al-jabery, T. Obafemi-Ajayi, G. R. Olbricht and D. C. Wunsch <italic>Computational Learning Approaches to Data Analytics in Biomedical Applications</italic> (Cambridge, MA: Academic Press), <fpage>7</fpage>&#x02013;<lpage>27</lpage>.</citation>
</ref>
<ref id="B9">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Almasan</surname> <given-names>P.</given-names></name> <name><surname>Su&#x000E1;rez-Varela</surname> <given-names>J.</given-names></name> <name><surname>Rusek</surname> <given-names>K.</given-names></name> <name><surname>Barlet-Ros</surname> <given-names>P.</given-names></name> <name><surname>Cabellos-Aparicio</surname> <given-names>A.</given-names></name></person-group> (<year>2022</year>). <article-title>Deep reinforcement learning meets graph neural networks: Exploring a routing optimization use case</article-title>. <source>Comput. Commun</source>. <volume>196</volume>, <fpage>184</fpage>&#x02013;<lpage>194</lpage>. <pub-id pub-id-type="doi">10.1016/j.comcom.2022.09.029</pub-id></citation>
</ref>
<ref id="B10">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Anand</surname> <given-names>D.</given-names></name> <name><surname>Gadiya</surname> <given-names>S.</given-names></name> <name><surname>Sethi</surname> <given-names>A.</given-names></name></person-group> (<year>2020</year>). &#x0201C;Histographs: graphs in histopathology,&#x0201D;? in <italic>Medical Imaging 2020: Digital Pathology</italic> (California: SPIE), 150-155.</citation>
</ref>
<ref id="B11">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Angermueller</surname> <given-names>C.</given-names></name> <name><surname>Lee</surname> <given-names>H. J.</given-names></name> <name><surname>Reik</surname> <given-names>W.</given-names></name> <name><surname>Stegle</surname> <given-names>O.</given-names></name></person-group> (<year>2017</year>). <article-title>DeepCpG: accurate prediction of single-cell DNA methylation states using deep learning</article-title>. <source>Genome Biol</source>. <volume>18</volume>, <fpage>1</fpage>&#x02013;<lpage>13</lpage>. <pub-id pub-id-type="doi">10.1186/s13059-017-1189-z</pub-id><pub-id pub-id-type="pmid">28395661</pub-id></citation></ref>
<ref id="B12">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Asan</surname> <given-names>O.</given-names></name> <name><surname>Nattinger</surname> <given-names>A. B.</given-names></name> <name><surname>Gurses</surname> <given-names>A. P.</given-names></name> <name><surname>Tyszka</surname> <given-names>J. T.</given-names></name> <name><surname>Yen</surname> <given-names>T. W.</given-names></name></person-group> (<year>2018</year>). <article-title>Oncologists&#x00027; views regarding the role of electronic health records in care coordination</article-title>. <source>JCO Clini. Cancer Inform</source>. <volume>2</volume>, <fpage>1</fpage>&#x02013;<lpage>12</lpage>. <pub-id pub-id-type="doi">10.1200/CCI.17.00118</pub-id><pub-id pub-id-type="pmid">30652555</pub-id></citation></ref>
<ref id="B13">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bai</surname> <given-names>S.</given-names></name> <name><surname>Zhang</surname> <given-names>F.</given-names></name> <name><surname>Torr</surname> <given-names>P. H.</given-names></name></person-group> (<year>2021</year>). <article-title>Hypergraph convolution and hypergraph attention</article-title>. <source>Pattern Recognit</source>. <volume>110</volume>:<fpage>107637</fpage>. <pub-id pub-id-type="doi">10.1016/j.patcog.2020.107637</pub-id></citation>
</ref>
<ref id="B14">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Baltru&#x00161;aitis</surname> <given-names>T.</given-names></name> <name><surname>Ahuja</surname> <given-names>C.</given-names></name> <name><surname>Morency</surname> <given-names>L.-P.</given-names></name></person-group> (<year>2018</year>). <article-title>Multimodal machine learning: a survey and taxonomy</article-title>. <source>IEEE Trans. Pattern Anal. Mach. Intell</source>. <volume>41</volume>, <fpage>423</fpage>&#x02013;<lpage>443</lpage>. <pub-id pub-id-type="doi">10.1109/TPAMI.2018.2798607</pub-id><pub-id pub-id-type="pmid">29994351</pub-id></citation></ref>
<ref id="B15">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Barhoumi</surname> <given-names>Y.</given-names></name> <name><surname>Bouaynaya</surname> <given-names>N. C.</given-names></name> <name><surname>Rasool</surname> <given-names>G.</given-names></name></person-group> (<year>2023</year>). <article-title>Efficient scopeformer: towards scalable and rich feature extraction for intracranial hemorrhage detection</article-title>. <source>IEEE Access</source>. <volume>11</volume>, <fpage>81656</fpage>&#x02013;<lpage>81671</lpage>. <pub-id pub-id-type="doi">10.1109/ACCESS.2023.3301160</pub-id></citation>
</ref>
<ref id="B16">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Basit</surname> <given-names>A.</given-names></name> <name><surname>Hussain</surname> <given-names>K.</given-names></name> <name><surname>Hanif</surname> <given-names>M. A.</given-names></name> <name><surname>Shafique</surname> <given-names>M.</given-names></name></person-group> (<year>2024</year>). <article-title>Medaide: Leveraging large language models for on-premise medical assistance on edge devices</article-title>. <source>arXiv</source> [preprint] arXiv:2403.00830. <pub-id pub-id-type="doi">10.48550/arXiv.2403.00830</pub-id></citation>
</ref>
<ref id="B17">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Boehm</surname> <given-names>K. M.</given-names></name> <name><surname>Khosravi</surname> <given-names>P.</given-names></name> <name><surname>Vanguri</surname> <given-names>R.</given-names></name> <name><surname>Gao</surname> <given-names>J.</given-names></name> <name><surname>Shah</surname> <given-names>S. P.</given-names></name></person-group> (<year>2021</year>). <article-title>Harnessing multimodal data integration to advance precision oncology</article-title>. <source>Nat. Rev. Cancer</source> <volume>22</volume>, <fpage>114</fpage>&#x02013;<lpage>126</lpage>. <pub-id pub-id-type="doi">10.1038/s41568-021-00408-3</pub-id><pub-id pub-id-type="pmid">34663944</pub-id></citation></ref>
<ref id="B18">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bommasani</surname> <given-names>R.</given-names></name> <name><surname>Hudson</surname> <given-names>D. A.</given-names></name> <name><surname>Adeli</surname> <given-names>E.</given-names></name> <name><surname>Altman</surname> <given-names>R.</given-names></name> <name><surname>Arora</surname> <given-names>S.</given-names></name> <name><surname>von Arx</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2022</year>). <source>On the Opportunities and Risks of Foundation Models</source>. arXiv preprint.</citation>
</ref>
<ref id="B19">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Borisov</surname> <given-names>V.</given-names></name> <name><surname>Leemann</surname> <given-names>T.</given-names></name> <name><surname>Se&#x000DF;ler</surname> <given-names>K.</given-names></name> <name><surname>Haug</surname> <given-names>J.</given-names></name> <name><surname>Pawelczyk</surname> <given-names>M.</given-names></name> <name><surname>Kasneci</surname> <given-names>G.</given-names></name></person-group> (<year>2022</year>). <article-title>Deep neural networks and tabular data: a survey</article-title>. <source>IEEE Trans. Neural Netw. Learn. Syst</source>. <volume>35</volume>, <fpage>1</fpage>&#x02013;<lpage>21</lpage>. <pub-id pub-id-type="doi">10.1109/TNNLS.2022.3229161</pub-id><pub-id pub-id-type="pmid">37015381</pub-id></citation></ref>
<ref id="B20">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>&#x000C7;al&#x00131;&#x0015F;kan</surname> <given-names>M.</given-names></name> <name><surname>Tazaki</surname> <given-names>K.</given-names></name></person-group> (<year>2023</year>). <article-title>Ai/ml advances in non-small cell lung cancer biomarker discovery</article-title>. <source>Front. Oncol</source>. <volume>13</volume>:<fpage>1260374</fpage>. <pub-id pub-id-type="doi">10.3389/fonc.2023.1260374</pub-id><pub-id pub-id-type="pmid">38148837</pub-id></citation></ref>
<ref id="B21">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cao</surname> <given-names>Z.-J.</given-names></name> <name><surname>Gao</surname> <given-names>G.</given-names></name></person-group> (<year>2022</year>). <article-title>Multi-omics single-cell data integration and regulatory inference with graph-linked embedding</article-title>. <source>Nat. Biotechnol</source>. <volume>40</volume>, <fpage>1458</fpage>&#x02013;<lpage>1466</lpage>. <pub-id pub-id-type="doi">10.1038/s41587-022-01284-4</pub-id><pub-id pub-id-type="pmid">35501393</pub-id></citation></ref>
<ref id="B22">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chan</surname> <given-names>H.-P.</given-names></name> <name><surname>Hadjiiski</surname> <given-names>L. M.</given-names></name> <name><surname>Samala</surname> <given-names>R. K.</given-names></name></person-group> (<year>2020</year>). <article-title>Computer-aided diagnosis in the era of deep learning</article-title>. <source>Med. Phys</source>. <volume>47</volume>, <fpage>e218</fpage>&#x02013;<lpage>e227</lpage>. <pub-id pub-id-type="doi">10.1002/mp.13764</pub-id><pub-id pub-id-type="pmid">32418340</pub-id></citation></ref>
<ref id="B23">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Chao</surname> <given-names>C.-H.</given-names></name> <name><surname>Zhu</surname> <given-names>Z.</given-names></name> <name><surname>Guo</surname> <given-names>D.</given-names></name> <name><surname>Yan</surname> <given-names>K.</given-names></name> <name><surname>Ho</surname> <given-names>T.-Y.</given-names></name> <name><surname>Cai</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>&#x0201C;Lymph node gross tumor volume detection in oncology imaging via relationship learning using graph neural network,&#x0201D;</article-title> in <source>Medical Image Computing and Computer Assisted Intervention-MICCAI 2020: 23rd International Conference, Lima, Peru, October 4-8, 2020, Proceedings, Part VII 23</source> (<publisher-loc>Cham</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>772</fpage>&#x02013;<lpage>782</lpage>.</citation>
</ref>
<ref id="B24">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chatzianastasis</surname> <given-names>M.</given-names></name> <name><surname>Vazirgiannis</surname> <given-names>M.</given-names></name> <name><surname>Zhang</surname> <given-names>Z.</given-names></name></person-group> (<year>2023</year>). <article-title>Explainable multilayer graph neural network for cancer gene prediction</article-title>. <source>arXiv</source> [preprint] arXiv:2301.08831. <pub-id pub-id-type="doi">10.1093/bioinformatics/btad643</pub-id><pub-id pub-id-type="pmid">37862225</pub-id></citation></ref>
<ref id="B25">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>B.</given-names></name> <name><surname>Jin</surname> <given-names>J.</given-names></name> <name><surname>Liu</surname> <given-names>H.</given-names></name> <name><surname>Yang</surname> <given-names>Z.</given-names></name> <name><surname>Zhu</surname> <given-names>H.</given-names></name> <name><surname>Wang</surname> <given-names>Y.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Trends and hotspots in research on medical images with deep learning: a bibliometric analysis from 2013 to 2023</article-title>. <source>Front. Artif. Intellig</source>. <volume>6</volume>:<fpage>1289669</fpage>. <pub-id pub-id-type="doi">10.3389/frai.2023.1289669</pub-id><pub-id pub-id-type="pmid">38028662</pub-id></citation></ref>
<ref id="B26">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>R. J.</given-names></name> <name><surname>Lu</surname> <given-names>M. Y.</given-names></name> <name><surname>Wang</surname> <given-names>J.</given-names></name> <name><surname>Williamson</surname> <given-names>D. F.</given-names></name> <name><surname>Rodig</surname> <given-names>S. J.</given-names></name> <name><surname>Lindeman</surname> <given-names>N. I.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Pathomic Fusion: an integrated framework for fusing histopathology and genomic features for cancer diagnosis and prognosis</article-title>. <source>IEEE Trans. Med. Imaging</source> <volume>41</volume>, <fpage>757</fpage>&#x02013;<lpage>770</lpage>. <pub-id pub-id-type="doi">10.1109/TMI.2020.3021387</pub-id><pub-id pub-id-type="pmid">32881682</pub-id></citation></ref>
<ref id="B27">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>R. J.</given-names></name> <name><surname>Lu</surname> <given-names>M. Y.</given-names></name> <name><surname>Weng</surname> <given-names>W.-H.</given-names></name> <name><surname>Chen</surname> <given-names>T. Y.</given-names></name> <name><surname>Williamson</surname> <given-names>D. F.</given-names></name> <name><surname>Manz</surname> <given-names>T.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>&#x0201C;Multimodal co-attention transformer for survival prediction in gigapixel whole slide images,&#x0201D;</article-title> in <source>2021 IEEE/CVF International Conference on Computer Vision (ICCV)</source>, (Montreal, QC: IEEE), <fpage>3995</fpage>&#x02013;<lpage>4005</lpage>.<pub-id pub-id-type="pmid">37030860</pub-id></citation></ref>
<ref id="B28">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Choi</surname> <given-names>E.</given-names></name> <name><surname>Bahadori</surname> <given-names>M. T.</given-names></name> <name><surname>Song</surname> <given-names>L.</given-names></name> <name><surname>Stewart</surname> <given-names>W. F.</given-names></name> <name><surname>Sun</surname> <given-names>J.</given-names></name></person-group> (<year>2017</year>). <article-title>&#x0201C;GRAM: graph-based attention model for healthcare representation learning,&#x0201D;</article-title> in <source>Proceedings of the 23rd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</source> (<publisher-loc>ACM</publisher-loc>), <fpage>787</fpage>&#x02013;<lpage>795</lpage>.<pub-id pub-id-type="pmid">33717639</pub-id></citation></ref>
<ref id="B29">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Cui</surname> <given-names>H.</given-names></name> <name><surname>Xuan</surname> <given-names>P.</given-names></name> <name><surname>Jin</surname> <given-names>Q.</given-names></name> <name><surname>Ding</surname> <given-names>M.</given-names></name> <name><surname>Li</surname> <given-names>B.</given-names></name> <name><surname>Zou</surname> <given-names>B.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>&#x0201C;Co-graph attention reasoning based imaging and clinical features integration for lymph node metastasis prediction,&#x0201D;</article-title> in <source>Medical Image Computing and Computer Assisted Intervention-MICCAI 2021: 24th International Conference, Strasbourg, France, September 27-October 1, 2021, Proceedings, Part V 24</source> (<publisher-loc>Cham</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>657</fpage>&#x02013;<lpage>666</lpage>.</citation>
</ref>
<ref id="B30">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dai</surname> <given-names>J.</given-names></name> <name><surname>Pan</surname> <given-names>X.</given-names></name> <name><surname>Sun</surname> <given-names>R.</given-names></name> <name><surname>Ji</surname> <given-names>J.</given-names></name> <name><surname>Xu</surname> <given-names>X.</given-names></name> <name><surname>Liu</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Safe rlhf: Safe reinforcement learning from human feedback</article-title>. <source>arXiv</source> [preprint] arXiv:2310.12773.<pub-id pub-id-type="pmid">38819632</pub-id></citation></ref>
<ref id="B31">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dara</surname> <given-names>S.</given-names></name> <name><surname>Tumma</surname> <given-names>P.</given-names></name></person-group> (<year>2018</year>). <article-title>&#x0201C;Feature extraction by using deep learning: a survey,&#x0201D;</article-title> in <source>2018 Second International Conference on Electronics, Communication and Aerospace Technology (ICECA</source>) (Coimbatore: IEEE).</citation>
</ref>
<ref id="B32">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Defferrard</surname> <given-names>M.</given-names></name> <name><surname>Bresson</surname> <given-names>X.</given-names></name> <name><surname>Vandergheynst</surname> <given-names>P.</given-names></name></person-group> (<year>2016</year>). <article-title>Convolutional neural networks on graphs with fast localized spectral filtering</article-title>. <source>Adv. Neural Inf. Process. Syst</source>. <volume>29</volume>, <fpage>3844</fpage>&#x02013;<lpage>3852</lpage>. Available online at: <ext-link ext-link-type="uri" xlink:href="https://proceedings.neurips.cc/paper_files/paper/2016/file/04df4d434d481c5bb723be1b6df1ee65-Paper.pdf">https://proceedings.neurips.cc/paper_files/paper/2016/file/04df4d434d481c5bb723be1b6df1ee65-Paper.pdf</ext-link></citation>
</ref>
<ref id="B33">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dera</surname> <given-names>D.</given-names></name> <name><surname>Bouaynaya</surname> <given-names>N. C.</given-names></name> <name><surname>Rasool</surname> <given-names>G.</given-names></name> <name><surname>Shterenberg</surname> <given-names>R.</given-names></name> <name><surname>Fathallah-Shaykh</surname> <given-names>H. M.</given-names></name></person-group> (<year>2021</year>). <article-title>PremiUm-CNN: propagating uncertainty towards robust convolutional neural networks</article-title>. <source>IEEE Trans. Signal Proc</source>. <volume>69</volume>, <fpage>4669</fpage>&#x02013;<lpage>4684</lpage>. <pub-id pub-id-type="doi">10.1109/TSP.2021.3096804</pub-id></citation>
</ref>
<ref id="B34">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Dera</surname> <given-names>D.</given-names></name> <name><surname>Rasool</surname> <given-names>G.</given-names></name> <name><surname>Bouaynaya</surname> <given-names>N.</given-names></name></person-group> (<year>2019</year>). <article-title>&#x0201C;Extended variational inference for propagating uncertainty in convolutional neural networks,&#x0201D;</article-title> in <source>2019 IEEE 29th International Workshop on Machine Learning for Signal Processing (MLSP)</source> (<publisher-loc>Pittsburgh, PA</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>1</fpage>&#x02013;<lpage>6</lpage>.</citation>
</ref>
<ref id="B35">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Derrow-Pinion</surname> <given-names>A.</given-names></name> <name><surname>She</surname> <given-names>J.</given-names></name> <name><surname>Wong</surname> <given-names>D.</given-names></name> <name><surname>Lange</surname> <given-names>O.</given-names></name> <name><surname>Hester</surname> <given-names>T.</given-names></name> <name><surname>Perez</surname> <given-names>L.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>&#x0201C;ETA prediction with graph neural networks in google maps,&#x0201D;</article-title> in <source>Proceedings of the 30th ACM International Conference on Information</source> &#x00026; <italic>Knowledge Management</italic> (New York, NY: ACM), <fpage>3767</fpage>&#x02013;<lpage>3776</lpage>.</citation>
</ref>
<ref id="B36">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Devlin</surname> <given-names>J.</given-names></name> <name><surname>Chang</surname> <given-names>M.-W.</given-names></name> <name><surname>Lee</surname> <given-names>K.</given-names></name> <name><surname>Toutanova</surname> <given-names>K.</given-names></name></person-group> (<year>2019</year>). <source>Bert: Pre-Training of Deep Bidirectional Transformers for Language Understanding</source>. <publisher-loc>Minneapolis</publisher-loc>: <publisher-name>North American Chapter of the Association for Computational Linguistics</publisher-name>.</citation>
</ref>
<ref id="B37">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Dosovitskiy</surname> <given-names>A.</given-names></name> <name><surname>Beyer</surname> <given-names>L.</given-names></name> <name><surname>Kolesnikov</surname> <given-names>A.</given-names></name> <name><surname>Weissenborn</surname> <given-names>D.</given-names></name> <name><surname>Zhai</surname> <given-names>X.</given-names></name> <name><surname>Unterthiner</surname> <given-names>T.</given-names></name> <etal/></person-group>. (<year>2020</year>). <source>An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale</source>. <publisher-loc>Vienna</publisher-loc>: <publisher-name>ICLR</publisher-name>.</citation>
</ref>
<ref id="B38">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Du</surname> <given-names>H.</given-names></name> <name><surname>Feng</surname> <given-names>J.</given-names></name> <name><surname>Feng</surname> <given-names>M.</given-names></name></person-group> (<year>2019</year>). <article-title>Zoom in to where it matters: a hierarchical graph based model for mammogram analysis</article-title>. <source>arXiv</source> [preprint] arXiv:1912.07517. <pub-id pub-id-type="doi">10.48550/arXiv.1912.07517</pub-id></citation>
</ref>
<ref id="B39">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ektefaie</surname> <given-names>Y.</given-names></name> <name><surname>Dasoulas</surname> <given-names>G.</given-names></name> <name><surname>Noori</surname> <given-names>A.</given-names></name> <name><surname>Farhat</surname> <given-names>M.</given-names></name> <name><surname>Zitnik</surname> <given-names>M.</given-names></name></person-group> (<year>2023</year>). <article-title>Multimodal learning with graphs</article-title>. <source>Nat. Mach. Intell</source>. <volume>5</volume>, <fpage>340</fpage>&#x02013;<lpage>350</lpage>. <pub-id pub-id-type="doi">10.1038/s42256-023-00624-6</pub-id><pub-id pub-id-type="pmid">38076673</pub-id></citation></ref>
<ref id="B40">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Farooq</surname> <given-names>H.</given-names></name> <name><surname>Chen</surname> <given-names>Y.</given-names></name> <name><surname>Georgiou</surname> <given-names>T. T.</given-names></name> <name><surname>Tannenbaum</surname> <given-names>A.</given-names></name> <name><surname>Lenglet</surname> <given-names>C.</given-names></name></person-group> (<year>2019</year>). <article-title>Network curvature as a hallmark of brain structural connectivity</article-title>. <source>Nat. Commun</source>. <volume>10</volume>, <fpage>1</fpage>&#x02013;<lpage>11</lpage>. <pub-id pub-id-type="doi">10.1038/s41467-019-12915-x</pub-id><pub-id pub-id-type="pmid">31666510</pub-id></citation></ref>
<ref id="B41">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Fathinezhad</surname> <given-names>F.</given-names></name> <name><surname>Adibi</surname> <given-names>P.</given-names></name> <name><surname>Shoushtarian</surname> <given-names>B.</given-names></name> <name><surname>Chanussot</surname> <given-names>J.</given-names></name></person-group> (<year>2023</year>). <source>Graph Neural Networks and Reinforcement Learning: A Survey</source>. <publisher-loc>London</publisher-loc>: <publisher-name>IntechOpen</publisher-name>.</citation>
</ref>
<ref id="B42">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Feng</surname> <given-names>Y.-H.</given-names></name> <name><surname>Zhang</surname> <given-names>S.-W.</given-names></name> <name><surname>Shi</surname> <given-names>J.-Y.</given-names></name></person-group> (<year>2020</year>). <article-title>DPDDI: a deep predictor for drug-drug interactions</article-title>. <source>BMC Bioinformat</source>. <volume>21</volume>, <fpage>1</fpage>&#x02013;<lpage>15</lpage>. <pub-id pub-id-type="doi">10.1186/s12859-020-03724-x</pub-id><pub-id pub-id-type="pmid">32972364</pub-id></citation></ref>
<ref id="B43">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Fout</surname> <given-names>A.</given-names></name> <name><surname>Byrd</surname> <given-names>J.</given-names></name> <name><surname>Shariat</surname> <given-names>B.</given-names></name> <name><surname>Ben-Hur</surname> <given-names>A.</given-names></name></person-group> (<year>2017</year>). <article-title>&#x0201C;Protein interface prediction using graph convolutional networks,&#x0201D;</article-title> in <source>Adv. Neural Inf. Process. Syst</source> (<publisher-loc>NeurIPS</publisher-loc>), <fpage>30</fpage>.</citation>
</ref>
<ref id="B44">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fritz</surname> <given-names>C.</given-names></name> <name><surname>Dorigatti</surname> <given-names>E.</given-names></name> <name><surname>R&#x000FC;gamer</surname> <given-names>D.</given-names></name></person-group> (<year>2022</year>). <article-title>Combining graph neural networks and spatio-temporal disease models to improve the prediction of weekly covid-19 cases in germany</article-title>. <source>Sci. Rep</source>. 12, 3930. <pub-id pub-id-type="doi">10.1038/s41598-022-07757-5</pub-id><pub-id pub-id-type="pmid">35273252</pub-id></citation></ref>
<ref id="B45">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Galassi</surname> <given-names>A.</given-names></name> <name><surname>Lippi</surname> <given-names>M.</given-names></name> <name><surname>Torroni</surname> <given-names>P.</given-names></name></person-group> (<year>2021</year>). <article-title>Attention in natural language processing</article-title>. <source>IEEE Trans. Neural Netw. Learn. Syst</source>. <volume>32</volume>, <fpage>4291</fpage>&#x02013;<lpage>4308</lpage>. <pub-id pub-id-type="doi">10.1109/TNNLS.2020.3019893</pub-id><pub-id pub-id-type="pmid">32915750</pub-id></citation></ref>
<ref id="B46">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ghaffari Laleh</surname> <given-names>N.</given-names></name> <name><surname>Ligero</surname> <given-names>M.</given-names></name> <name><surname>Perez-Lopez</surname> <given-names>R.</given-names></name> <name><surname>Kather</surname> <given-names>J. N.</given-names></name></person-group> (<year>2023</year>). <article-title>Facts and hopes on the use of artificial intelligence for predictive immunotherapy biomarkers in cancer</article-title>. <source>Clini.Cancer Res</source>. <volume>29</volume>, <fpage>316</fpage>&#x02013;<lpage>323</lpage>. <pub-id pub-id-type="doi">10.1158/1078-0432.CCR-22-0390</pub-id><pub-id pub-id-type="pmid">36083132</pub-id></citation></ref>
<ref id="B47">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Gilmer</surname> <given-names>J.</given-names></name> <name><surname>Schoenholz</surname> <given-names>S. S.</given-names></name> <name><surname>Riley</surname> <given-names>P. F.</given-names></name> <name><surname>Vinyals</surname> <given-names>O.</given-names></name> <name><surname>Dahl</surname> <given-names>G. E.</given-names></name></person-group> (<year>2017</year>). <article-title>&#x0201C;Neural message passing for quantum chemistry,&#x0201D;</article-title> in <source>International Conference on Machine Learning</source> (<publisher-loc>New York</publisher-loc>: <publisher-name>PMLR</publisher-name>), <fpage>1263</fpage>&#x02013;<lpage>1272</lpage>..</citation>
</ref>
<ref id="B48">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Giuffr&#x000E8;</surname> <given-names>M.</given-names></name> <name><surname>Kresevic</surname> <given-names>S.</given-names></name> <name><surname>Pugliese</surname> <given-names>N.</given-names></name> <name><surname>You</surname> <given-names>K.</given-names></name> <name><surname>Shung</surname> <given-names>D. L.</given-names></name></person-group> (<year>2024</year>). <article-title>&#x0201C;Optimizing large language models in digestive disease: strategies and challenges to improve clinical outcomes,&#x0201D;</article-title> in <source>Liver International</source> (<publisher-loc>Wiley</publisher-loc>).</citation>
</ref>
<ref id="B49">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Gonzalez Zelaya</surname> <given-names>C. V.</given-names></name></person-group> (<year>2019</year>). <article-title>&#x0201C;Towards explaining the effects of data preprocessing on machine learning,&#x0201D;</article-title> in <source>IEEE 35th International Conference on Data Engineering (ICDE)</source> (<publisher-loc>Macao</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>2086</fpage>&#x02013;<lpage>2090</lpage>.</citation>
</ref>
<ref id="B50">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Grossman</surname> <given-names>R. L.</given-names></name> <name><surname>Heath</surname> <given-names>A. P.</given-names></name> <name><surname>Ferretti</surname> <given-names>V.</given-names></name> <name><surname>Varmus</surname> <given-names>H. E.</given-names></name> <name><surname>Lowy</surname> <given-names>D. R.</given-names></name> <name><surname>Kibbe</surname> <given-names>W. A.</given-names></name> <etal/></person-group>. (<year>2016</year>). <article-title>Toward a shared vision for cancer genomic data</article-title>. <source>New Engl. J. Med</source>. <volume>375</volume>, <fpage>1109</fpage>&#x02013;<lpage>1112</lpage>. <pub-id pub-id-type="doi">10.1056/NEJMp1607591</pub-id><pub-id pub-id-type="pmid">27653561</pub-id></citation></ref>
<ref id="B51">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hadid</surname> <given-names>A.</given-names></name> <name><surname>Chakraborty</surname> <given-names>T.</given-names></name> <name><surname>Busby</surname> <given-names>D.</given-names></name></person-group> (<year>2024</year>). <article-title>When geoscience meets generative ai and large language models: foundations, trends, and future challenges</article-title>. <source>Expert Syst</source>. <volume>2024</volume>:<fpage>e13654</fpage>. <pub-id pub-id-type="doi">10.1111/exsy.13654</pub-id></citation>
</ref>
<ref id="B52">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Hamilton</surname> <given-names>W.</given-names></name> <name><surname>Ying</surname> <given-names>Z.</given-names></name> <name><surname>Leskovec</surname> <given-names>J.</given-names></name></person-group> (<year>2017</year>). <article-title>&#x0201C;Inductive representation learning on large graphs,&#x0201D;</article-title> in <source>Adv. Neural Inf. Process. Syst</source> (<publisher-loc>NeurIPS</publisher-loc>), <fpage>30</fpage>.</citation>
</ref>
<ref id="B53">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hamilton</surname> <given-names>W. L.</given-names></name></person-group> (<year>2020</year>). <article-title>Graph representation learning</article-title>. <source>Synth. Lect. Artif. Intellig. Mach. Learn</source>. <volume>14</volume>:<fpage>5</fpage>. <pub-id pub-id-type="doi">10.1007/978-3-031-01588-5</pub-id></citation>
</ref>
<ref id="B54">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Han</surname> <given-names>K.</given-names></name> <name><surname>Wang</surname> <given-names>Y.</given-names></name> <name><surname>Chen</surname> <given-names>H.</given-names></name> <name><surname>Chen</surname> <given-names>X.</given-names></name> <name><surname>Guo</surname> <given-names>J.</given-names></name> <name><surname>Liu</surname> <given-names>Z.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>A survey on vision transformer</article-title>. <source>IEEE Trans. Pattern Anal. Mach. Intell</source>. <volume>45</volume>, <fpage>87</fpage>&#x02013;<lpage>110</lpage>. <pub-id pub-id-type="doi">10.1109/TPAMI.2022.3152247</pub-id><pub-id pub-id-type="pmid">35180075</pub-id></citation></ref>
<ref id="B55">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hartsock</surname> <given-names>I.</given-names></name> <name><surname>Rasool</surname> <given-names>G.</given-names></name></person-group> (<year>2024</year>). <article-title>Vision-language models for medical report generation and visual question answering: a review</article-title>. <source>arXiv</source> [preprint] arXiv:2403.02469. <pub-id pub-id-type="doi">10.48550/arXiv.2403.02469</pub-id></citation>
</ref>
<ref id="B56">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hook</surname> <given-names>D. W.</given-names></name> <name><surname>Porter</surname> <given-names>S. J.</given-names></name> <name><surname>Herzog</surname> <given-names>C.</given-names></name></person-group> (<year>2018</year>). <article-title>Dimensions: building context for search and evaluation</article-title>. <source>Front. Res. Metrics Anal</source>. <volume>3</volume>:<fpage>23</fpage>. <pub-id pub-id-type="doi">10.3389/frma.2018.00023</pub-id></citation>
</ref>
<ref id="B57">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Hu</surname> <given-names>R.</given-names></name> <name><surname>Singh</surname> <given-names>A.</given-names></name></person-group> (<year>2021</year>). <article-title>&#x0201C;Unit: Multimodal multitask learning with a unified transformer,&#x0201D;</article-title> in <source>Proceedings of the IEEE/CVF International Conference on Computer Vision</source> (<publisher-loc>Montreal, QC</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>1439</fpage>&#x02013;<lpage>1449</lpage>.</citation>
</ref>
<ref id="B58">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Huang</surname> <given-names>Y.</given-names></name> <name><surname>Chung</surname> <given-names>A. C.</given-names></name></person-group> (<year>2020</year>). <article-title>&#x0201C;Edge-variational graph convolutional networks for uncertainty-aware disease prediction,&#x0201D;</article-title> in <source>Medical Image Computing and Computer Assisted Intervention-MICCAI 2020: 23rd International Conference, Lima, Peru, October 4-8, 2020, Proceedings, Part VII 23</source> (<publisher-loc>Cham</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>562</fpage>&#x02013;<lpage>572</lpage>.</citation>
</ref>
<ref id="B59">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Huang</surname> <given-names>Y.</given-names></name> <name><surname>Du</surname> <given-names>C.</given-names></name> <name><surname>Xue</surname> <given-names>Z.</given-names></name> <name><surname>Chen</surname> <given-names>X.</given-names></name> <name><surname>Zhao</surname> <given-names>H.</given-names></name> <name><surname>Huang</surname> <given-names>L.</given-names></name></person-group> (<year>2021</year>). <source>What Makes Multi-Modal Learning Better Than Single (Provably)</source>. <publisher-loc>New Orleans, LA</publisher-loc>: <publisher-name>Advances in Neural Information Processing Systems</publisher-name>.</citation>
</ref>
<ref id="B60">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Huang</surname> <given-names>Y.</given-names></name> <name><surname>Lin</surname> <given-names>J.</given-names></name> <name><surname>Zhou</surname> <given-names>C.</given-names></name> <name><surname>Yang</surname> <given-names>H.</given-names></name> <name><surname>Huang</surname> <given-names>L.</given-names></name></person-group> (<year>2022</year>). <article-title>&#x0201C;Modality competition: What makes joint training of multi-modal network fail in deep learning? (provably),&#x0201D;</article-title> in <source>International Conference on Machine Learning</source> (<publisher-loc>New York</publisher-loc>: <publisher-name>PMLR</publisher-name>), <fpage>9226</fpage>&#x02013;<lpage>9259</lpage>.</citation>
</ref>
<ref id="B61">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ibrahim</surname> <given-names>A.</given-names></name> <name><surname>Mohamed</surname> <given-names>H. K.</given-names></name> <name><surname>Maher</surname> <given-names>A.</given-names></name> <name><surname>Zhang</surname> <given-names>B.</given-names></name></person-group> (<year>2022</year>). <article-title>A survey on human cancer categorization based on deep learning</article-title>. <source>Front. Artif. Intellig</source>. <volume>5</volume>:<fpage>884749</fpage>. <pub-id pub-id-type="doi">10.3389/frai.2022.884749</pub-id><pub-id pub-id-type="pmid">35832207</pub-id></citation></ref>
<ref id="B62">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Iqbal</surname> <given-names>M. S.</given-names></name> <name><surname>Ahmad</surname> <given-names>W.</given-names></name> <name><surname>Alizadehsani</surname> <given-names>R.</given-names></name> <name><surname>Hussain</surname> <given-names>S.</given-names></name> <name><surname>Rehman</surname> <given-names>R.</given-names></name></person-group> (<year>2022</year>). <article-title>Breast cancer dataset, classification and detection using deep learning</article-title>. <source>Healthcare</source>. <volume>10</volume>:<fpage>2395</fpage>. <pub-id pub-id-type="doi">10.3390/healthcare10122395</pub-id><pub-id pub-id-type="pmid">36553919</pub-id></citation></ref>
<ref id="B63">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Iqbal</surname> <given-names>M. S.</given-names></name> <name><surname>Luo</surname> <given-names>B.</given-names></name> <name><surname>Mehmood</surname> <given-names>R.</given-names></name> <name><surname>Alrige</surname> <given-names>M. A.</given-names></name> <name><surname>Alharbey</surname> <given-names>R.</given-names></name></person-group> (<year>2019</year>). <article-title>Mitochondrial organelle movement classification (fission and fusion) via convolutional neural network approach</article-title>. <source>IEEE Access</source> <volume>7</volume>:<fpage>86570</fpage>&#x02013;<lpage>86577</lpage>. <pub-id pub-id-type="doi">10.1109/ACCESS.2019.2925041</pub-id></citation>
</ref>
<ref id="B64">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Islam</surname> <given-names>T. U.</given-names></name> <name><surname>Ghasemi</surname> <given-names>R.</given-names></name> <name><surname>Mohammed</surname> <given-names>N.</given-names></name></person-group> (<year>2022</year>). <article-title>&#x0201C;Privacy-preserving federated learning model for healthcare data,&#x0201D;</article-title> in <source>2022 IEEE 12th Annual Computing and Communication Workshop and Conference (CCWC)</source> (<publisher-loc>Las Vegas, NV</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>0281</fpage>&#x02013;<lpage>0287</lpage>.</citation>
</ref>
<ref id="B65">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jaegle</surname> <given-names>A.</given-names></name> <name><surname>Borgeaud</surname> <given-names>S.</given-names></name> <name><surname>Alayrac</surname> <given-names>J.-B.</given-names></name> <name><surname>Doersch</surname> <given-names>C.</given-names></name> <name><surname>Ionescu</surname> <given-names>C.</given-names></name> <name><surname>Ding</surname> <given-names>D.</given-names></name> <etal/></person-group>. (<year>2021</year>). <source>Perceiver IO: A General Architecture for Structured Inputs</source> &#x00026;; <italic>Outputs</italic> (ICLR).</citation>
</ref>
<ref id="B66">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jansen</surname> <given-names>C.</given-names></name> <name><surname>Ramirez</surname> <given-names>R. N.</given-names></name> <name><surname>El-Ali</surname> <given-names>N. C.</given-names></name> <name><surname>Gomez-Cabrero</surname> <given-names>D.</given-names></name> <name><surname>Tegner</surname> <given-names>J.</given-names></name> <name><surname>Merkenschlager</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>Building gene regulatory networks from scATAC-seq and scRNA-seq using linked self organizing maps</article-title>. <source>PLoS Comput. Biol</source>. <volume>15</volume>:<fpage>e1006555</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pcbi.1006555</pub-id><pub-id pub-id-type="pmid">31682608</pub-id></citation></ref>
<ref id="B67">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Javaloy</surname> <given-names>A.</given-names></name> <name><surname>Meghdadi</surname> <given-names>M.</given-names></name> <name><surname>Valera</surname> <given-names>I.</given-names></name></person-group> (<year>2022</year>). <article-title>&#x0201C;Mitigating modality collapse in multimodal VAEs via impartial optimization,&#x0201D;</article-title> in <source>International Conference on Machine Learning</source> (<publisher-loc>New York</publisher-loc>: <publisher-name>PMLR</publisher-name>), <fpage>9938</fpage>&#x02013;<lpage>9964</lpage>.</citation>
</ref>
<ref id="B68">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ji</surname> <given-names>Y.</given-names></name> <name><surname>Zhou</surname> <given-names>Z.</given-names></name> <name><surname>Liu</surname> <given-names>H.</given-names></name> <name><surname>Davuluri</surname> <given-names>R. V.</given-names></name></person-group> (<year>2020</year>). <article-title>DNABERT: pre-trained Bidirectional Encoder Representations from Transformers model for DNA-language in genome</article-title>. <source>bioRxiv</source>. <pub-id pub-id-type="doi">10.1101/2020.09.17.301879</pub-id><pub-id pub-id-type="pmid">33538820</pub-id></citation></ref>
<ref id="B69">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Jia</surname> <given-names>C.</given-names></name> <name><surname>Yang</surname> <given-names>Y.</given-names></name> <name><surname>Xia</surname> <given-names>Y.</given-names></name> <name><surname>Chen</surname> <given-names>Y.-T.</given-names></name> <name><surname>Parekh</surname> <given-names>Z.</given-names></name> <name><surname>Pham</surname> <given-names>H.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>&#x0201C;Scaling up visual and vision-language representation learning with noisy text supervision,&#x0201D;</article-title> in <source>International Conference on Machine Learning</source> (<publisher-loc>New York</publisher-loc>: <publisher-name>PMLR</publisher-name>), <fpage>4904</fpage>&#x02013;<lpage>4916</lpage>.</citation>
</ref>
<ref id="B70">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jiang</surname> <given-names>J.</given-names></name> <name><surname>Dun</surname> <given-names>C.</given-names></name> <name><surname>Huang</surname> <given-names>T.</given-names></name> <name><surname>Lu</surname> <given-names>Z.</given-names></name></person-group> (<year>2018</year>). <article-title>Graph convolutional reinforcement learning</article-title>. <source>arXiv</source> [preprint] arXiv:1810.09202.</citation>
</ref>
<ref id="B71">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jiang</surname> <given-names>P.</given-names></name> <name><surname>Sinha</surname> <given-names>S.</given-names></name> <name><surname>Aldape</surname> <given-names>K.</given-names></name> <name><surname>Hannenhalli</surname> <given-names>S.</given-names></name> <name><surname>Sahinalp</surname> <given-names>C.</given-names></name> <name><surname>Ruppin</surname> <given-names>E.</given-names></name></person-group> (<year>2022</year>). <article-title>Big Data in basic and translational cancer research</article-title>. <source>Nat. Rev. Cancer</source> <volume>22</volume>, <fpage>625</fpage>&#x02013;<lpage>639</lpage>. <pub-id pub-id-type="doi">10.1038/s41568-022-00502-0</pub-id><pub-id pub-id-type="pmid">36064595</pub-id></citation></ref>
<ref id="B72">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jiao</surname> <given-names>L.</given-names></name> <name><surname>Chen</surname> <given-names>J.</given-names></name> <name><surname>Liu</surname> <given-names>F.</given-names></name> <name><surname>Yang</surname> <given-names>S.</given-names></name> <name><surname>You</surname> <given-names>C.</given-names></name> <name><surname>Liu</surname> <given-names>X.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Graph representation learning meets computer vision: a survey</article-title>. <source>IEEE Trans. Artif. Intellig</source>. <volume>4</volume>, <fpage>2</fpage>&#x02013;<lpage>22</lpage>. <pub-id pub-id-type="doi">10.1109/TAI.2022.3194869</pub-id></citation>
</ref>
<ref id="B73">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jin</surname> <given-names>D.</given-names></name> <name><surname>Huo</surname> <given-names>C.</given-names></name> <name><surname>Dang</surname> <given-names>J.</given-names></name> <name><surname>Zhu</surname> <given-names>P.</given-names></name> <name><surname>Zhang</surname> <given-names>W.</given-names></name> <name><surname>Pedrycz</surname> <given-names>W.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Heterogeneous graph neural networks using self-supervised reciprocally contrastive learning</article-title>. <source>arXiv</source> [preprint] arXiv:2205.00256.</citation>
</ref>
<ref id="B74">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Joo</surname> <given-names>S.</given-names></name> <name><surname>Ko</surname> <given-names>E.</given-names></name> <name><surname>Kwon</surname> <given-names>S.</given-names></name> <name><surname>Jeon</surname> <given-names>E.</given-names></name> <name><surname>Jung</surname> <given-names>H.</given-names></name> <name><surname>Kim</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Multimodal deep learning models for the prediction of pathologic response to neoadjuvant chemotherapy in breast cancer</article-title>. <source>Sci. Rep</source>. <volume>11</volume>:<fpage>18800</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-021-98408-8</pub-id><pub-id pub-id-type="pmid">34552163</pub-id></citation></ref>
<ref id="B75">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Kaczmarek</surname> <given-names>E.</given-names></name> <name><surname>Jamzad</surname> <given-names>A.</given-names></name> <name><surname>Imtiaz</surname> <given-names>T.</given-names></name> <name><surname>Nanayakkara</surname> <given-names>J.</given-names></name> <name><surname>Renwick</surname> <given-names>N.</given-names></name> <name><surname>Mousavi</surname> <given-names>P.</given-names></name></person-group> (<year>2021</year>). <article-title>&#x0201C;Multi-omic graph transformers for cancer classification and interpretation,&#x0201D;</article-title> in <source>Pacific Symposium On Biocomputing 2022</source> (<publisher-loc>Singapore</publisher-loc>: <publisher-name>World Scientific</publisher-name>), <fpage>373</fpage>&#x02013;<lpage>384</lpage>.<pub-id pub-id-type="pmid">34890164</pub-id></citation></ref>
<ref id="B76">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Kalfaoglu</surname> <given-names>M. E.</given-names></name> <name><surname>Kalkan</surname> <given-names>S.</given-names></name> <name><surname>Alatan</surname> <given-names>A. A.</given-names></name></person-group> (<year>2020</year>). <article-title>&#x0201C;Late Temporal Modeling in 3D CNN Architectures with BERT for Action Recognition,&#x0201D;</article-title> in <source>Computer Vision-ECCV 2020 Workshops: Glasgow, UK, August 23-28, 2020, Proceedings, Part V 16</source> (<publisher-loc>Cham</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>731</fpage>&#x02013;<lpage>747</lpage>.</citation>
</ref>
<ref id="B77">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Khan</surname> <given-names>M.</given-names></name> <name><surname>Ashraf</surname> <given-names>I.</given-names></name> <name><surname>Alhaisoni</surname> <given-names>M.</given-names></name> <name><surname>Dama&#x00161;evi&#x0010D;ius</surname> <given-names>R.</given-names></name> <name><surname>Scherer</surname> <given-names>R.</given-names></name> <name><surname>Rehman</surname> <given-names>A.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Multimodal brain tumor classification using deep learning and robust feature selection: a machine learning application for radiologists</article-title>. <source>Diagnostics</source> <volume>10</volume>:<fpage>565</fpage>. <pub-id pub-id-type="doi">10.3390/diagnostics10080565</pub-id><pub-id pub-id-type="pmid">32781795</pub-id></citation></ref>
<ref id="B78">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Khan</surname> <given-names>S.</given-names></name> <name><surname>Ali</surname> <given-names>H.</given-names></name> <name><surname>Shah</surname> <given-names>Z.</given-names></name></person-group> (<year>2023</year>). <article-title>Identifying the role of vision transformer for skin cancer&#x02013;a scoping review</article-title>. <source>Front. Artif. Intellig</source>. <volume>6</volume>:<fpage>1202990</fpage>. <pub-id pub-id-type="doi">10.3389/frai.2023.1202990</pub-id><pub-id pub-id-type="pmid">37529760</pub-id></citation></ref>
<ref id="B79">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kipf</surname> <given-names>T. N.</given-names></name> <name><surname>Welling</surname> <given-names>M.</given-names></name></person-group> (<year>2016</year>). <article-title>Semi-supervised classification with graph convolutional networks</article-title>. <source>arXiv</source> [preprint] arXiv:1609.02907.</citation>
</ref>
<ref id="B80">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>LeCun</surname> <given-names>Y.</given-names></name> <name><surname>Bengio</surname> <given-names>Y.</given-names></name> <name><surname>Hinton</surname> <given-names>G.</given-names></name></person-group> (<year>2015</year>). <article-title>Deep learning</article-title>. <source>Nature</source> <volume>521</volume>, <fpage>436</fpage>&#x02013;<lpage>444</lpage>. <pub-id pub-id-type="doi">10.1038/nature14539</pub-id><pub-id pub-id-type="pmid">26017442</pub-id></citation></ref>
<ref id="B81">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lee</surname> <given-names>J.</given-names></name> <name><surname>Yoon</surname> <given-names>W.</given-names></name> <name><surname>Kim</surname> <given-names>S.</given-names></name> <name><surname>Kim</surname> <given-names>D.</given-names></name> <name><surname>Kim</surname> <given-names>S.</given-names></name> <name><surname>So</surname> <given-names>C. H.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>BioBERT: a pre-trained biomedical language representation model for biomedical text mining</article-title>. <source>Bioinformatics</source> <volume>36</volume>, <fpage>1234</fpage>&#x02013;<lpage>1240</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btz682</pub-id><pub-id pub-id-type="pmid">31501885</pub-id></citation></ref>
<ref id="B82">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Leng</surname> <given-names>D.</given-names></name> <name><surname>Zheng</surname> <given-names>L.</given-names></name> <name><surname>Wen</surname> <given-names>Y.</given-names></name> <name><surname>Zhang</surname> <given-names>Y.</given-names></name> <name><surname>Wu</surname> <given-names>L.</given-names></name> <name><surname>Wang</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>A benchmark study of deep learning-based multi-omics data fusion methods for cancer</article-title>. <source>Genome Biol</source>. <volume>23</volume>, <fpage>1</fpage>&#x02013;<lpage>32</lpage>. <pub-id pub-id-type="doi">10.1186/s13059-022-02739-2</pub-id><pub-id pub-id-type="pmid">35945544</pub-id></citation></ref>
<ref id="B83">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Lewis</surname> <given-names>M.</given-names></name> <name><surname>Liu</surname> <given-names>Y.</given-names></name> <name><surname>Goyal</surname> <given-names>N.</given-names></name> <name><surname>Ghazvininejad</surname> <given-names>M.</given-names></name> <name><surname>Mohamed</surname> <given-names>A.</given-names></name> <name><surname>Levy</surname> <given-names>O.</given-names></name> <etal/></person-group>. (<year>2019</year>). <source>BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension</source>. <publisher-loc>Dublin</publisher-loc>: <publisher-name>Annual Meeting of the Association for Computational Linguistics</publisher-name>.</citation>
</ref>
<ref id="B84">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>M. M.</given-names></name> <name><surname>Huang</surname> <given-names>K.</given-names></name> <name><surname>Zitnik</surname> <given-names>M.</given-names></name></person-group> (<year>2022</year>). <article-title>Graph representation learning in biomedicine and healthcare</article-title>. <source>Nat. Biomed. Eng</source>. <volume>6</volume>, <fpage>1353</fpage>&#x02013;<lpage>1369</lpage>. <pub-id pub-id-type="doi">10.1038/s41551-022-00942-x</pub-id><pub-id pub-id-type="pmid">36316368</pub-id></citation></ref>
<ref id="B85">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>P.</given-names></name> <name><surname>Gu</surname> <given-names>J.</given-names></name> <name><surname>Kuen</surname> <given-names>J.</given-names></name> <name><surname>Morariu</surname> <given-names>V. I.</given-names></name> <name><surname>Zhao</surname> <given-names>H.</given-names></name> <name><surname>Jain</surname> <given-names>R.</given-names></name> <etal/></person-group>. (<year>2021a</year>). <article-title>&#x0201C;Selfdoc: self-supervised document representation learning,&#x0201D;</article-title> in <source>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition</source> (<publisher-loc>Nashville, TN</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>5652</fpage>&#x02013;<lpage>5660</lpage>.</citation>
</ref>
<ref id="B86">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>P.</given-names></name> <name><surname>Wang</surname> <given-names>J.</given-names></name> <name><surname>Qiao</surname> <given-names>Y.</given-names></name> <name><surname>Chen</surname> <given-names>H.</given-names></name> <name><surname>Yu</surname> <given-names>Y.</given-names></name> <name><surname>Yao</surname> <given-names>X.</given-names></name> <etal/></person-group>. (<year>2021b</year>). <article-title>An effective self-supervised framework for learning expressive molecular global representations to drug discovery</article-title>. <source>Brief. Bioinform</source>. <volume>22</volume>:<fpage>bbab109</fpage>. <pub-id pub-id-type="doi">10.1093/bib/bbab109</pub-id><pub-id pub-id-type="pmid">33940598</pub-id></citation></ref>
<ref id="B87">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>P.</given-names></name> <name><surname>Yang</surname> <given-names>Y.</given-names></name> <name><surname>Pagnucco</surname> <given-names>M.</given-names></name> <name><surname>Song</surname> <given-names>Y.</given-names></name></person-group> (<year>2022</year>). <article-title>Explainability in graph neural networks: An experimental survey</article-title>. <source>arXiv</source> [preprint] arXiv:2203.09258. <pub-id pub-id-type="doi">10.48550/arXiv.2203.09258</pub-id><pub-id pub-id-type="pmid">35216273</pub-id></citation></ref>
<ref id="B88">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>Z.</given-names></name> <name><surname>Pardos</surname> <given-names>Z. A.</given-names></name> <name><surname>Ren</surname> <given-names>C.</given-names></name></person-group> (<year>2024</year>). <article-title>Aligning open educational resources to new taxonomies: How ai technologies can help and in which scenarios</article-title>. <source>Comp. Educ</source>. <volume>216</volume>:<fpage>105027</fpage>. <pub-id pub-id-type="doi">10.1016/j.compedu.2024.105027</pub-id></citation>
</ref>
<ref id="B89">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lian</surname> <given-names>J.</given-names></name> <name><surname>Deng</surname> <given-names>J.</given-names></name> <name><surname>Hui</surname> <given-names>E. S.</given-names></name> <name><surname>Koohi-Moghadam</surname> <given-names>M.</given-names></name> <name><surname>She</surname> <given-names>Y.</given-names></name> <name><surname>Chen</surname> <given-names>C.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Early stage NSCLS patients&#x00027; prognostic prediction with multi-information using transformer and graph neural network model</article-title>. <source>Elife</source> <volume>11</volume>:<fpage>e80547</fpage>. <pub-id pub-id-type="doi">10.7554/eLife.80547</pub-id><pub-id pub-id-type="pmid">36194194</pub-id></citation></ref>
<ref id="B90">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liang</surname> <given-names>J.</given-names></name> <name><surname>Yang</surname> <given-names>C.</given-names></name> <name><surname>Zeng</surname> <given-names>M.</given-names></name> <name><surname>Wang</surname> <given-names>X.</given-names></name></person-group> (<year>2022</year>). <article-title>TransConver: transformer and convolution parallel network for developing automatic brain tumor segmentation in MRI images</article-title>. <source>Quant. Imaging Med. Surg</source>. <volume>12</volume>:<fpage>4</fpage>. <pub-id pub-id-type="doi">10.21037/qims-21-919</pub-id><pub-id pub-id-type="pmid">35371952</pub-id></citation></ref>
<ref id="B91">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liang</surname> <given-names>P. P.</given-names></name> <name><surname>Zadeh</surname> <given-names>A.</given-names></name> <name><surname>Morency</surname> <given-names>L.-P.</given-names></name></person-group> (<year>2022</year>). <article-title>Foundations and recent trends in multimodal machine learning: principles, challenges, and open questions</article-title>. <source>arXiv</source> [preprint] arXiv:2209.03430. <pub-id pub-id-type="doi">10.48550/arXiv.2209.03430</pub-id></citation>
</ref>
<ref id="B92">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lipkova</surname> <given-names>J.</given-names></name> <name><surname>Chen</surname> <given-names>R. J.</given-names></name> <name><surname>Chen</surname> <given-names>B.</given-names></name> <name><surname>Lu</surname> <given-names>M. Y.</given-names></name> <name><surname>Barbieri</surname> <given-names>M.</given-names></name> <name><surname>Shao</surname> <given-names>D.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Artificial intelligence for multimodal data integration in oncology</article-title>. <source>Cancer Cell</source> <volume>40</volume>:<fpage>1095</fpage>&#x02013;<lpage>1110</lpage>. <pub-id pub-id-type="doi">10.1016/j.ccell.2022.09.012</pub-id><pub-id pub-id-type="pmid">36220072</pub-id></citation></ref>
<ref id="B93">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Litjens</surname> <given-names>G.</given-names></name> <name><surname>Kooi</surname> <given-names>T.</given-names></name> <name><surname>Bejnordi</surname> <given-names>B. E.</given-names></name> <name><surname>Setio</surname> <given-names>A. A. A.</given-names></name> <name><surname>Ciompi</surname> <given-names>F.</given-names></name> <name><surname>Ghafoorian</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>A survey on deep learning in medical image analysis</article-title>. <source>Med. Image Anal</source>. <volume>42</volume>, <fpage>60</fpage>&#x02013;<lpage>88</lpage>. <pub-id pub-id-type="doi">10.1016/j.media.2017.07.005</pub-id><pub-id pub-id-type="pmid">28778026</pub-id></citation></ref>
<ref id="B94">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>J.</given-names></name> <name><surname>Pandya</surname> <given-names>P.</given-names></name> <name><surname>Afshar</surname> <given-names>S.</given-names></name></person-group> (<year>2021</year>). <article-title>Therapeutic advances in oncology</article-title>. <source>Int. J. Mol. Sci</source>. <volume>22</volume>:<fpage>2008</fpage>. <pub-id pub-id-type="doi">10.3390/ijms22042008</pub-id><pub-id pub-id-type="pmid">33670524</pub-id></citation></ref>
<ref id="B95">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>T.</given-names></name> <name><surname>Huang</surname> <given-names>J.</given-names></name> <name><surname>Liao</surname> <given-names>T.</given-names></name> <name><surname>Pu</surname> <given-names>R.</given-names></name> <name><surname>Liu</surname> <given-names>S.</given-names></name> <name><surname>Peng</surname> <given-names>Y.</given-names></name></person-group> (<year>2022</year>). <article-title>A hybrid deep learning model for predicting molecular subtypes of human breast cancer using multimodal data</article-title>. <source>IRBM</source> <volume>43</volume>, <fpage>62</fpage>&#x02013;<lpage>74</lpage>. <pub-id pub-id-type="doi">10.1016/j.irbm.2020.12.002</pub-id></citation>
</ref>
<ref id="B96">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ma</surname> <given-names>J.</given-names></name> <name><surname>Liu</surname> <given-names>J.</given-names></name> <name><surname>Lin</surname> <given-names>Q.</given-names></name> <name><surname>Wu</surname> <given-names>B.</given-names></name> <name><surname>Wang</surname> <given-names>Y.</given-names></name> <name><surname>You</surname> <given-names>Y.</given-names></name></person-group> (<year>2021</year>). <article-title>Multitask learning for visual question answering</article-title>. <source>IEEE Trans. Neural Netw. Learn. Syst</source>. <volume>34</volume>, <fpage>1380</fpage>&#x02013;<lpage>1394</lpage>.</citation>
</ref>
<ref id="B97">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Ma</surname> <given-names>L.</given-names></name> <name><surname>Yang</surname> <given-names>Z.</given-names></name> <name><surname>Miao</surname> <given-names>Y.</given-names></name> <name><surname>Xue</surname> <given-names>J.</given-names></name> <name><surname>Wu</surname> <given-names>M.</given-names></name> <name><surname>Zhou</surname> <given-names>L.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>&#x0201C;NeuGraph: parallel deep neural network computation on large graphs,&#x0201D;</article-title> in <source>USENIX Annual Technical Conference</source> (<publisher-loc>USENIX</publisher-loc>), <fpage>443</fpage>&#x02013;<lpage>458</lpage>.</citation>
</ref>
<ref id="B98">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Ma</surname> <given-names>X.</given-names></name> <name><surname>Jia</surname> <given-names>F.</given-names></name></person-group> (<year>2020</year>). <article-title>&#x0201C;Brain tumor classification with multimodal MR and pathology images,&#x0201D;</article-title> in <source>Brainlesion: Glioma, Multiple Sclerosis, Stroke and Traumatic Brain Injuries: 5th International Workshop, BrainLes 2019, Held in Conjunction with MICCAI 2019, Shenzhen, China, October 17, 2019, Revised Selected Papers, Part II 5</source> (<publisher-loc>Cham</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>343</fpage>&#x02013;<lpage>352</lpage>.</citation>
</ref>
<ref id="B99">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ma</surname> <given-names>Y.</given-names></name> <name><surname>Liu</surname> <given-names>X.</given-names></name> <name><surname>Zhao</surname> <given-names>T.</given-names></name> <name><surname>Liu</surname> <given-names>Y.</given-names></name> <name><surname>Tang</surname> <given-names>J.</given-names></name> <name><surname>Shah</surname> <given-names>N.</given-names></name></person-group> (<year>2021</year>). <article-title>&#x0201C;A unified view on graph neural networks as graph signal denoising,&#x0201D;</article-title> in <source>Proceedings of the 30th ACM International Conference on Information</source> &#x00026; <italic>Knowledge Management</italic> (New York, NY: ACM), <fpage>1202</fpage>&#x02013;<lpage>1211</lpage>.</citation>
</ref>
<ref id="B100">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Ma</surname> <given-names>Y.</given-names></name> <name><surname>Tang</surname> <given-names>J.</given-names></name></person-group> (<year>2021</year>). <source>Deep Learning on Graphs</source>. <publisher-loc>Cambridge</publisher-loc>: <publisher-name>Cambridge University Press</publisher-name>.</citation>
</ref>
<ref id="B101">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Miotto</surname> <given-names>R.</given-names></name> <name><surname>Li</surname> <given-names>L.</given-names></name> <name><surname>Kidd</surname> <given-names>B. A.</given-names></name> <name><surname>Dudley</surname> <given-names>J. T.</given-names></name></person-group> (<year>2016</year>). <article-title>Deep patient: an unsupervised representation to predict the future of patients from the electronic health records</article-title>. <source>Sci. Rep</source>. <volume>6</volume>, <fpage>1</fpage>&#x02013;<lpage>10</lpage>. <pub-id pub-id-type="doi">10.1038/srep26094</pub-id><pub-id pub-id-type="pmid">27185194</pub-id></citation></ref>
<ref id="B102">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mirza</surname> <given-names>B.</given-names></name> <name><surname>Wang</surname> <given-names>W.</given-names></name> <name><surname>Wang</surname> <given-names>J.</given-names></name> <name><surname>Choi</surname> <given-names>H.</given-names></name> <name><surname>Chung</surname> <given-names>N. C.</given-names></name> <name><surname>Ping</surname> <given-names>P.</given-names></name></person-group> (<year>2019</year>). <article-title>Machine learning and integrative analysis of biomedical big data</article-title>. <source>Genes</source> <volume>10</volume>, <fpage>87</fpage>. <pub-id pub-id-type="doi">10.3390/genes10020087</pub-id><pub-id pub-id-type="pmid">30696086</pub-id></citation></ref>
<ref id="B103">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Mo</surname> <given-names>S.</given-names></name> <name><surname>Cai</surname> <given-names>M.</given-names></name> <name><surname>Lin</surname> <given-names>L.</given-names></name> <name><surname>Tong</surname> <given-names>R.</given-names></name> <name><surname>Chen</surname> <given-names>Q.</given-names></name> <name><surname>Wang</surname> <given-names>F.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>&#x0201C;Multimodal priors guided segmentation of liver lesions in MRI using mutual information based graph co-attention networks,&#x0201D;</article-title> in <source>Medical Image Computing and Computer Assisted Intervention-MICCAI 2020: 23rd International Conference, Lima, Peru, October 4-8, 2020, Proceedings, Part IV 23</source> (<publisher-loc>Cham</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>429</fpage>&#x02013;<lpage>438</lpage>.</citation>
</ref>
<ref id="B104">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Muhammad</surname> <given-names>L. J.</given-names></name> <name><surname>Bria</surname> <given-names>A.</given-names></name></person-group> (<year>2023</year>). <article-title>Editorial: Ai applications for diagnosis of breast cancer</article-title>. <source>Front. Artif. Intellig</source>. <volume>6</volume>:<fpage>1247261</fpage>. <pub-id pub-id-type="doi">10.3389/frai.2023.1247261</pub-id><pub-id pub-id-type="pmid">37915538</pub-id></citation></ref>
<ref id="B105">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Muhammad</surname> <given-names>W.</given-names></name> <name><surname>Ahmed</surname> <given-names>S.-,b,.-S.</given-names></name> <name><surname>Naeem</surname> <given-names>S.</given-names></name> <name><surname>Khan</surname> <given-names>A. A. M. H.</given-names></name> <name><surname>Qureshi</surname> <given-names>B. M.</given-names></name> <name><surname>Hussain</surname> <given-names>A.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>Artificial neural network-assisted prediction of radiobiological indices in head and neck cancer</article-title>. <source>Front. Artif. Intellig</source>. <volume>7</volume>:<fpage>1329737</fpage>. <pub-id pub-id-type="doi">10.3389/frai.2024.1329737</pub-id><pub-id pub-id-type="pmid">38646416</pub-id></citation></ref>
<ref id="B106">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Nampalle</surname> <given-names>K. B.</given-names></name> <name><surname>Singh</surname> <given-names>P.</given-names></name> <name><surname>Narayan</surname> <given-names>U. V.</given-names></name> <name><surname>Raman</surname> <given-names>B.</given-names></name></person-group> (<year>2023</year>). <article-title>Vision through the veil: Differential privacy in federated learning for medical image classification</article-title>. <source>arXiv</source> [preprint] arXiv:2306.17794. <pub-id pub-id-type="doi">10.48550/arXiv.2306.17794</pub-id></citation>
</ref>
<ref id="B107">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Nguyen</surname> <given-names>T.</given-names></name> <name><surname>Le</surname> <given-names>H.</given-names></name> <name><surname>Quinn</surname> <given-names>T. P.</given-names></name> <name><surname>Nguyen</surname> <given-names>T.</given-names></name> <name><surname>Le</surname> <given-names>T. D.</given-names></name> <name><surname>Venkatesh</surname> <given-names>S.</given-names></name></person-group> (<year>2021</year>). <article-title>GraphDTA: predicting drug-target binding affinity with graph neural networks</article-title>. <source>Bioinformatics</source> <volume>37</volume>, <fpage>1140</fpage>&#x02013;<lpage>1147</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btaa921</pub-id><pub-id pub-id-type="pmid">33119053</pub-id></citation></ref>
<ref id="B108">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Nie</surname> <given-names>M.</given-names></name> <name><surname>Chen</surname> <given-names>D.</given-names></name> <name><surname>Wang</surname> <given-names>D.</given-names></name></person-group> (<year>2023</year>). <article-title>Reinforcement learning on graphs: A survey</article-title>. <source>IEEE Trans. Emerg. Topics Comp. Intellig</source>. <volume>7</volume>, <fpage>1065</fpage>&#x02013;<lpage>1082</lpage>. <pub-id pub-id-type="doi">10.1109/TETCI.2022.3222545</pub-id></citation>
</ref>
<ref id="B109">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Nielsen</surname> <given-names>I. E.</given-names></name> <name><surname>Dera</surname> <given-names>D.</given-names></name> <name><surname>Rasool</surname> <given-names>G.</given-names></name> <name><surname>Ramachandran</surname> <given-names>R. P.</given-names></name> <name><surname>Bouaynaya</surname> <given-names>N. C.</given-names></name></person-group> (<year>2022</year>). <article-title>Robust explainability: a tutorial on gradient-based attribution methods for deep neural networks</article-title>. <source>IEEE Signal Process. Mag</source>. <volume>39</volume>, <fpage>73</fpage>&#x02013;<lpage>84</lpage>. <pub-id pub-id-type="doi">10.1109/MSP.2022.3142719</pub-id></citation>
</ref>
<ref id="B110">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Orii</surname> <given-names>L.</given-names></name> <name><surname>Feldacker</surname> <given-names>C.</given-names></name> <name><surname>Tweya</surname> <given-names>H.</given-names></name> <name><surname>Anderson</surname> <given-names>R.</given-names></name></person-group> (<year>2024</year>). <article-title>ehealth data security and privacy: Perspectives from diverse stakeholders in malawi</article-title>. <source>Proc. ACM on Human-Comp. Interact</source>. <volume>8</volume>, <fpage>1</fpage>&#x02013;<lpage>26</lpage>. <pub-id pub-id-type="doi">10.1145/3637323</pub-id></citation>
</ref>
<ref id="B111">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Otter</surname> <given-names>D. W.</given-names></name> <name><surname>Medina</surname> <given-names>J. R.</given-names></name> <name><surname>Kalita</surname> <given-names>J. K.</given-names></name></person-group> (<year>2021</year>). <article-title>A survey of the usages of deep learning for natural language processing</article-title>. <source>IEEE Trans. Neural Netw. Learn. Syst</source>. <volume>32</volume>, <fpage>604</fpage>&#x02013;<lpage>624</lpage>. <pub-id pub-id-type="doi">10.1109/TNNLS.2020.2979670</pub-id><pub-id pub-id-type="pmid">32324570</pub-id></citation></ref>
<ref id="B112">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Park</surname> <given-names>J.</given-names></name> <name><surname>Cho</surname> <given-names>J.</given-names></name> <name><surname>Chang</surname> <given-names>H. J.</given-names></name> <name><surname>Choi</surname> <given-names>J. Y.</given-names></name></person-group> (<year>2021</year>). <article-title>&#x0201C;Unsupervised hyperbolic representation learning via message passing auto-encoders,&#x0201D;</article-title> in <source>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition</source> (<publisher-loc>Nashville, TN</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>5516</fpage>&#x02013;<lpage>5526</lpage>.</citation>
</ref>
<ref id="B113">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Park</surname> <given-names>M.-K.</given-names></name> <name><surname>Lim</surname> <given-names>J.-M.</given-names></name> <name><surname>Jeong</surname> <given-names>J.</given-names></name> <name><surname>Jang</surname> <given-names>Y.</given-names></name> <name><surname>Lee</surname> <given-names>J.-W.</given-names></name> <name><surname>Lee</surname> <given-names>J.-C.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Deep-learning algorithm and concomitant biomarker identification for NSCLC prediction using multi-omics data integration</article-title>. <source>Biomolecules</source> <volume>12</volume>:<fpage>1839</fpage>. <pub-id pub-id-type="doi">10.3390/biom12121839</pub-id><pub-id pub-id-type="pmid">36551266</pub-id></citation></ref>
<ref id="B114">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Pati</surname> <given-names>P.</given-names></name> <name><surname>Jaume</surname> <given-names>G.</given-names></name> <name><surname>Fernandes</surname> <given-names>L. A.</given-names></name> <name><surname>Foncubierta-Rodr&#x000ED;guez</surname> <given-names>A.</given-names></name> <name><surname>Feroce</surname> <given-names>F.</given-names></name> <name><surname>Anniciello</surname> <given-names>A. M.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>&#x0201C;HACT-net: a hierarchical cell-to-tissue graph neural network for histopathological image classification,&#x0201D;</article-title> in <source>Uncertainty for Safe Utilization of Machine Learning in Medical Imaging, and Graphs in Biomedical Image Analysis: Second International Workshop, UNSURE 2020, and Third International Workshop, GRAIL 2020, Held in Conjunction with MICCAI 2020, Lima, Peru, October 8, 2020, Proceedings 2</source> (<publisher-loc>Cham</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>208</fpage>&#x02013;<lpage>219</lpage>.</citation>
</ref>
<ref id="B115">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pati</surname> <given-names>S.</given-names></name> <name><surname>Baid</surname> <given-names>U.</given-names></name> <name><surname>Edwards</surname> <given-names>B.</given-names></name> <name><surname>Sheller</surname> <given-names>M.</given-names></name> <name><surname>Wang</surname> <given-names>S.-H.</given-names></name> <name><surname>Reina</surname> <given-names>G. A.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Federated learning enables big data for rare cancer boundary detection</article-title>. <source>Nat. Commun</source>. <volume>13</volume>:<fpage>7346</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-022-33407-5</pub-id><pub-id pub-id-type="pmid">36470898</pub-id></citation></ref>
<ref id="B116">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Qi</surname> <given-names>G.</given-names></name> <name><surname>Sun</surname> <given-names>Y.</given-names></name> <name><surname>Li</surname> <given-names>M.</given-names></name> <name><surname>Hou</surname> <given-names>X.</given-names></name></person-group> (<year>2020</year>). <article-title>Development and application of matrix variate restricted boltzmann machine</article-title>. <source>IEEE Access</source> <volume>8</volume>:<fpage>137856</fpage>&#x02013;<lpage>137866</lpage>. <pub-id pub-id-type="doi">10.1109/ACCESS.2020.3012603</pub-id></citation>
</ref>
<ref id="B117">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Quinn</surname> <given-names>M.</given-names></name> <name><surname>Forman</surname> <given-names>J.</given-names></name> <name><surname>Harrod</surname> <given-names>M.</given-names></name> <name><surname>Winter</surname> <given-names>S.</given-names></name> <name><surname>Fowler</surname> <given-names>K. E.</given-names></name> <name><surname>Krein</surname> <given-names>S. L.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>Electronic health records, communication, and data sharing: challenges and opportunities for improving the diagnostic process</article-title>. <source>Diagnosis</source> <volume>6</volume>:<fpage>241</fpage>&#x02013;<lpage>248</lpage>. <pub-id pub-id-type="doi">10.1515/dx-2018-0036</pub-id><pub-id pub-id-type="pmid">30485175</pub-id></citation></ref>
<ref id="B118">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Radford</surname> <given-names>A.</given-names></name> <name><surname>Kim</surname> <given-names>J. W.</given-names></name> <name><surname>Hallacy</surname> <given-names>C.</given-names></name> <name><surname>Ramesh</surname> <given-names>A.</given-names></name> <name><surname>Goh</surname> <given-names>G.</given-names></name> <name><surname>Agarwal</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>&#x0201C;Learning transferable visual models from natural language supervision,&#x0201D;</article-title> in <source>International Conference on Machine Learning</source> (<publisher-loc>New York</publisher-loc>: <publisher-name>PMLR</publisher-name>).</citation>
</ref>
<ref id="B119">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Radford</surname> <given-names>A.</given-names></name> <name><surname>Narasimhan</surname> <given-names>K.</given-names></name> <name><surname>Salimans</surname> <given-names>T.</given-names></name> <name><surname>Sutskever</surname> <given-names>I.</given-names></name></person-group> (<year>2018</year>). <source>Improving Language Understanding by Generative Pre-Training</source>. Available online at: <ext-link ext-link-type="uri" xlink:href="https://www.mikecaptain.com/resources/pdf/GPT-1.pdf">https://www.mikecaptain.com/resources/pdf/GPT-1.pdf</ext-link></citation>
</ref>
<ref id="B120">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Raffel</surname> <given-names>C.</given-names></name> <name><surname>Shazeer</surname> <given-names>N.</given-names></name> <name><surname>Roberts</surname> <given-names>A.</given-names></name> <name><surname>Lee</surname> <given-names>K.</given-names></name> <name><surname>Narang</surname> <given-names>S.</given-names></name> <name><surname>Matena</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Exploring the limits of transfer learning with a unified text-to-text transformer</article-title>. <source>J. Mach. Learn. Res</source>. <volume>21</volume>:<fpage>1</fpage>.</citation>
</ref>
<ref id="B121">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rajadhyaksha</surname> <given-names>N.</given-names></name> <name><surname>Chitkara</surname> <given-names>A.</given-names></name></person-group> (<year>2023</year>). <article-title>Graph contrastive learning for multi-omics data</article-title>. <source>arXiv</source> [preprint] arXiv:2301.02242. <pub-id pub-id-type="doi">10.48550/arXiv.2301.02242</pub-id></citation>
</ref>
<ref id="B122">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rao</surname> <given-names>J.</given-names></name> <name><surname>Zhou</surname> <given-names>X.</given-names></name> <name><surname>Lu</surname> <given-names>Y.</given-names></name> <name><surname>Zhao</surname> <given-names>H.</given-names></name> <name><surname>Yang</surname> <given-names>Y.</given-names></name></person-group> (<year>2021</year>). <article-title>Imputing single-cell RNA-seq data by combining graph convolution and autoencoder neural networks</article-title>. <source>Iscience</source> <volume>24</volume>:<fpage>102393</fpage>. <pub-id pub-id-type="doi">10.1016/j.isci.2021.102393</pub-id><pub-id pub-id-type="pmid">33997678</pub-id></citation></ref>
<ref id="B123">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rasmy</surname> <given-names>L.</given-names></name> <name><surname>Xiang</surname> <given-names>Y.</given-names></name> <name><surname>Xie</surname> <given-names>Z.</given-names></name> <name><surname>Tao</surname> <given-names>C.</given-names></name> <name><surname>Zhi</surname> <given-names>D.</given-names></name></person-group> (<year>2021</year>). <article-title>Med-BERT: Pretrained contextualized embeddings on large-scale structured electronic health records for disease prediction</article-title>. <source>NPJ Digital Med</source>. <volume>4</volume>:<fpage>1</fpage>. <pub-id pub-id-type="doi">10.1038/s41746-021-00455-y</pub-id><pub-id pub-id-type="pmid">34017034</pub-id></citation></ref>
<ref id="B124">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Remmer</surname> <given-names>E.</given-names></name></person-group> (<year>2022</year>). <source>Explainability Methods for Transformer-based Artificial Neural Networks: a Comparative Analysis</source> (<publisher-loc>PhD thesis</publisher-loc>).</citation>
</ref>
<ref id="B125">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Rombach</surname> <given-names>R.</given-names></name> <name><surname>Blattmann</surname> <given-names>A.</given-names></name> <name><surname>Lorenz</surname> <given-names>D.</given-names></name> <name><surname>Esser</surname> <given-names>P.</given-names></name> <name><surname>Ommer</surname> <given-names>B.</given-names></name></person-group> (<year>2022</year>). <article-title>&#x0201C;High-resolution image synthesis with latent diffusion models,&#x0201D;</article-title> in <source>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition</source> (<publisher-loc>IEEE</publisher-loc>), <fpage>10684</fpage>&#x02013;<lpage>10695</lpage>.<pub-id pub-id-type="pmid">38271725</pub-id></citation></ref>
<ref id="B126">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rowe</surname> <given-names>S. P.</given-names></name> <name><surname>Pomper</surname> <given-names>M. G.</given-names></name></person-group> (<year>2022</year>). <article-title>Molecular imaging in oncology: current impact and future directions</article-title>. <source>CA Cancer J. Clin</source>. <volume>72</volume>, <fpage>333</fpage>&#x02013;<lpage>352</lpage>. <pub-id pub-id-type="doi">10.3322/caac.21713</pub-id><pub-id pub-id-type="pmid">34902160</pub-id></citation></ref>
<ref id="B127">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rozemberczki</surname> <given-names>B.</given-names></name> <name><surname>Gogleva</surname> <given-names>A.</given-names></name> <name><surname>Nilsson</surname> <given-names>S.</given-names></name> <name><surname>Edwards</surname> <given-names>G.</given-names></name> <name><surname>Nikolov</surname> <given-names>A.</given-names></name> <name><surname>Papa</surname> <given-names>E.</given-names></name></person-group> (<year>2022</year>). <article-title>&#x0201C;MOOMIN: deep molecular omics network for anti-cancer drug combination therapy,&#x0201D;</article-title> in <source>Proceedings of the 31st ACM International Conference on Information</source> &#x00026; <italic>Knowledge Management</italic> (New York, NY: ACM), <fpage>3472</fpage>&#x02013;<lpage>3483</lpage>.</citation>
</ref>
<ref id="B128">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>&#x0015E;ahinba&#x0015F;</surname> <given-names>K.</given-names></name> <name><surname>Catak</surname> <given-names>F. O.</given-names></name></person-group> (<year>2021</year>). <article-title>Secure multi-party computation based privacy preserving data analysis in healthcare iot systems</article-title>. <source>arXiv</source> [preprint] arXiv:2109.14334. <pub-id pub-id-type="doi">10.1007/978-3-031-08637-3_3</pub-id></citation>
</ref>
<ref id="B129">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sanders</surname> <given-names>L. M.</given-names></name> <name><surname>Scott</surname> <given-names>R. T.</given-names></name> <name><surname>Yang</surname> <given-names>J. H.</given-names></name> <name><surname>Qutub</surname> <given-names>A. A.</given-names></name> <name><surname>Garcia Martin</surname> <given-names>H.</given-names></name> <name><surname>Berrios</surname> <given-names>D. C.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Biological research and self-driving labs in deep space supported by artificial intelligence</article-title>. <source>Nat. Mach. Intellig</source>. <volume>5</volume>, <fpage>208</fpage>&#x02013;<lpage>219</lpage>. <pub-id pub-id-type="doi">10.1038/s42256-023-00618-4</pub-id></citation>
</ref>
<ref id="B130">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sankar</surname> <given-names>A.</given-names></name> <name><surname>Wu</surname> <given-names>Y.</given-names></name> <name><surname>Gou</surname> <given-names>L.</given-names></name> <name><surname>Zhang</surname> <given-names>W.</given-names></name> <name><surname>Yang</surname> <given-names>H.</given-names></name></person-group> (<year>2018</year>). <article-title>Dynamic graph representation learning via self-attention networks</article-title>. <source>arXiv</source> [preprint] arXiv:1812.09430.<pub-id pub-id-type="pmid">27534393</pub-id></citation></ref>
<ref id="B131">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Saueressig</surname> <given-names>C.</given-names></name> <name><surname>Berkley</surname> <given-names>A.</given-names></name> <name><surname>Kang</surname> <given-names>E.</given-names></name> <name><surname>Munbodh</surname> <given-names>R.</given-names></name> <name><surname>Singh</surname> <given-names>R.</given-names></name></person-group> (<year>2021</year>). <article-title>&#x0201C;Exploring graph-based neural networks for automatic brain tumor segmentation,&#x0201D;</article-title> in <source>From Data to Models and Back: 9th International Symposium, DataMod 2020, Virtual Event, October 20, 2020, Revised Selected Papers 9</source> (<publisher-loc>Cham</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>18</fpage>&#x02013;<lpage>37</lpage>.</citation>
</ref>
<ref id="B132">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Scao</surname> <given-names>T. L.</given-names></name> <name><surname>Fan</surname> <given-names>A.</given-names></name> <name><surname>Akiki</surname> <given-names>C.</given-names></name> <name><surname>Pavlick</surname> <given-names>E.</given-names></name> <name><surname>Ili&#x00107;</surname> <given-names>S.</given-names></name> <name><surname>Hesslow</surname> <given-names>D.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>BLOOM: A 176B-parameter open-access multilingual language model</article-title>. <source>arXiv</source> [preprint] arXiv:2211.05100.</citation>
</ref>
<ref id="B133">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Schulz</surname> <given-names>S.</given-names></name> <name><surname>Woerl</surname> <given-names>A.-C.</given-names></name> <name><surname>Jungmann</surname> <given-names>F.</given-names></name> <name><surname>Glasner</surname> <given-names>C.</given-names></name> <name><surname>Stenzel</surname> <given-names>P.</given-names></name> <name><surname>Strobl</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Multimodal deep learning for prognosis prediction in renal cancer</article-title>. <source>Front. Oncol</source>. <volume>11</volume>:<fpage>788740</fpage>. <pub-id pub-id-type="doi">10.3389/fonc.2021.788740</pub-id><pub-id pub-id-type="pmid">34900744</pub-id></citation></ref>
<ref id="B134">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Shang</surname> <given-names>J.</given-names></name> <name><surname>Ma</surname> <given-names>T.</given-names></name> <name><surname>Xiao</surname> <given-names>C.</given-names></name> <name><surname>Sun</surname> <given-names>J.</given-names></name></person-group> (<year>2019</year>). <article-title>Pre-training of graph augmented transformers for medication recommendation</article-title>. <source>arXiv</source> [preprint] arXiv:1906.00346. <pub-id pub-id-type="doi">10.24963/ijcai.2019/825</pub-id></citation>
</ref>
<ref id="B135">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Shao</surname> <given-names>Z.</given-names></name> <name><surname>Bian</surname> <given-names>H.</given-names></name> <name><surname>Chen</surname> <given-names>Y.</given-names></name> <name><surname>Wang</surname> <given-names>Y.</given-names></name> <name><surname>Zhang</surname> <given-names>J.</given-names></name> <name><surname>Ji</surname> <given-names>X.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>&#x0201C;TransMIL: transformer based correlated multiple instance learning for whole slide image classification,&#x0201D;</article-title> in <source>Adv. Neural Inf. Process. Syst</source>, 34.</citation>
</ref>
<ref id="B136">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Shi</surname> <given-names>J.</given-names></name> <name><surname>Wang</surname> <given-names>R.</given-names></name> <name><surname>Zheng</surname> <given-names>Y.</given-names></name> <name><surname>Jiang</surname> <given-names>Z.</given-names></name> <name><surname>Yu</surname> <given-names>L.</given-names></name></person-group> (<year>2019</year>). <article-title>&#x0201C;Graph convolutional networks for cervical cell classification,&#x0201D;</article-title> in <source>MICCAI 2019 Computational Pathology Workshop COMPAY</source> (<publisher-loc>Shenzhen</publisher-loc>: <publisher-name>Compay</publisher-name>).</citation>
</ref>
<ref id="B137">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Siam</surname> <given-names>A.</given-names></name> <name><surname>Alsaify</surname> <given-names>A. R.</given-names></name> <name><surname>Mohammad</surname> <given-names>B.</given-names></name> <name><surname>Biswas</surname> <given-names>M. R.</given-names></name> <name><surname>Ali</surname> <given-names>H.</given-names></name> <name><surname>Shah</surname> <given-names>Z.</given-names></name></person-group> (<year>2023</year>). <article-title>Multimodal deep learning for liver cancer applications: a scoping review</article-title>. <source>Front. Artif. Intellig</source>. <volume>6</volume>:<fpage>1247195</fpage>. <pub-id pub-id-type="doi">10.3389/frai.2023.1247195</pub-id><pub-id pub-id-type="pmid">37965284</pub-id></citation></ref>
<ref id="B138">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Siegel</surname> <given-names>R. L.</given-names></name> <name><surname>Miller</surname> <given-names>K. D.</given-names></name> <name><surname>Wagle</surname> <given-names>N. S.</given-names></name> <name><surname>Jemal</surname> <given-names>A.</given-names></name></person-group> (<year>2023</year>). <article-title>Cancer statistics, 2023</article-title>. <source>CA Cancer J. Clin</source>. <volume>73</volume>, <fpage>17</fpage>&#x02013;<lpage>48</lpage>. <pub-id pub-id-type="doi">10.3322/caac.21763</pub-id><pub-id pub-id-type="pmid">36633525</pub-id></citation></ref>
<ref id="B139">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Singh</surname> <given-names>A.</given-names></name> <name><surname>Hu</surname> <given-names>R.</given-names></name> <name><surname>Goswami</surname> <given-names>V.</given-names></name> <name><surname>Couairon</surname> <given-names>G.</given-names></name> <name><surname>Galuba</surname> <given-names>W.</given-names></name> <name><surname>Rohrbach</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2022</year>). <source>FLAVA: A Foundational Language and Vision Alignment Model</source>. <publisher-loc>Seattle, WA</publisher-loc>: <publisher-name>CVPR</publisher-name>. <pub-id pub-id-type="doi">10.1109/CVPR52688.2022.01519</pub-id></citation>
</ref>
<ref id="B140">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Singh</surname> <given-names>G.</given-names></name> <name><surname>Manjila</surname> <given-names>S.</given-names></name> <name><surname>Sakla</surname> <given-names>N.</given-names></name> <name><surname>True</surname> <given-names>A.</given-names></name> <name><surname>Wardeh</surname> <given-names>A. H.</given-names></name> <name><surname>Beig</surname> <given-names>N.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Radiomics and radiogenomics in gliomas: a contemporary update</article-title>. <source>Br. J. Cancer</source> <volume>125</volume>, <fpage>641</fpage>&#x02013;<lpage>657</lpage>. <pub-id pub-id-type="doi">10.1038/s41416-021-01387-w</pub-id><pub-id pub-id-type="pmid">33958734</pub-id></citation></ref>
<ref id="B141">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sleeman</surname> <given-names>I. V.W. C</given-names></name> <name><surname>Kapoor</surname> <given-names>R.</given-names></name> <name><surname>Ghosh</surname> <given-names>P.</given-names></name></person-group> (<year>2022</year>). <article-title>Multimodal classification: current landscape, taxonomy and future directions</article-title>. <source>ACM Comp. Surv</source>. <volume>55</volume>, <fpage>1</fpage>&#x02013;<lpage>31</lpage>. <pub-id pub-id-type="doi">10.1145/3543848</pub-id></citation>
</ref>
<ref id="B142">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Song</surname> <given-names>J.</given-names></name> <name><surname>Zheng</surname> <given-names>Y.</given-names></name> <name><surname>Zakir Ullah</surname> <given-names>M.</given-names></name> <name><surname>Wang</surname> <given-names>J.</given-names></name> <name><surname>Jiang</surname> <given-names>Y.</given-names></name> <name><surname>Xu</surname> <given-names>C.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Multiview multimodal network for breast cancer diagnosis in contrast-enhanced spectral mammography images</article-title>. <source>Int. J. Comput. Assist. Radiol. Surg</source>. <volume>16</volume>, <fpage>979</fpage>&#x02013;<lpage>988</lpage>. <pub-id pub-id-type="doi">10.1007/s11548-021-02391-4</pub-id><pub-id pub-id-type="pmid">33966155</pub-id></citation></ref>
<ref id="B143">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Song</surname> <given-names>Q.</given-names></name> <name><surname>Su</surname> <given-names>J.</given-names></name> <name><surname>Zhang</surname> <given-names>W.</given-names></name></person-group> (<year>2021</year>). <article-title>scGCN is a graph convolutional networks algorithm for knowledge transfer in single cell omics</article-title>. <source>Nat. Commun</source>. 12, 3826. <pub-id pub-id-type="doi">10.1038/s41467-021-24172-y</pub-id><pub-id pub-id-type="pmid">34158507</pub-id></citation></ref>
<ref id="B144">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Stark</surname> <given-names>S. G.</given-names></name> <name><surname>Ficek</surname> <given-names>J.</given-names></name> <name><surname>Locatello</surname> <given-names>F.</given-names></name> <name><surname>Bonilla</surname> <given-names>X.</given-names></name> <name><surname>Chevrier</surname> <given-names>S.</given-names></name> <name><surname>Singer</surname> <given-names>F.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>SCIM: universal single-cell matching with unpaired feature sets</article-title>. <source>Bioinformatics</source> <volume>36</volume>, <fpage>i919</fpage>&#x02013;<lpage>927</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btaa843</pub-id><pub-id pub-id-type="pmid">33381818</pub-id></citation></ref>
<ref id="B145">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Sun</surname> <given-names>C.</given-names></name> <name><surname>Shrivastava</surname> <given-names>A.</given-names></name> <name><surname>Singh</surname> <given-names>S.</given-names></name> <name><surname>Gupta</surname> <given-names>A.</given-names></name></person-group> (<year>2017</year>). <article-title>&#x0201C;Revisiting unreasonable effectiveness of data in deep learning era,&#x0201D;</article-title> in <source>Proceedings of the IEEE International Conference on Computer Vision</source> (<publisher-loc>Venice</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>843</fpage>&#x02013;<lpage>852</lpage>.<pub-id pub-id-type="pmid">30366739</pub-id></citation></ref>
<ref id="B146">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sun</surname> <given-names>H.</given-names></name> <name><surname>Liu</surname> <given-names>J.</given-names></name> <name><surname>Chai</surname> <given-names>S.</given-names></name> <name><surname>Qiu</surname> <given-names>Z.</given-names></name> <name><surname>Lin</surname> <given-names>L.</given-names></name> <name><surname>Huang</surname> <given-names>X.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Multi-Modal Adaptive Fusion Transformer Network for the estimation of depression level</article-title>. <source>Sensors</source> <volume>21</volume>:<fpage>4764</fpage>. <pub-id pub-id-type="doi">10.3390/s21144764</pub-id><pub-id pub-id-type="pmid">34300504</pub-id></citation></ref>
<ref id="B147">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Sun</surname> <given-names>X.</given-names></name> <name><surname>Bosch</surname> <given-names>J. A.</given-names></name> <name><surname>De Wit</surname> <given-names>J.</given-names></name> <name><surname>Krahmer</surname> <given-names>E.</given-names></name></person-group> (<year>2023</year>). <article-title>&#x0201C;Human-in-the-loop interaction for continuously improving generative model in conversational agent for behavioral intervention,&#x0201D;</article-title> in <source>Companion Proceedings of the 28th International Conference on Intelligent User Interfaces</source> (<publisher-loc>New York, NY</publisher-loc>: <publisher-name>ACM</publisher-name>), <fpage>99</fpage>&#x02013;<lpage>101</lpage>.</citation>
</ref>
<ref id="B148">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Syed</surname> <given-names>K.</given-names></name> <name><surname>Sleeman</surname> <given-names>I. V.W. C</given-names></name> <name><surname>Hagan</surname> <given-names>M.</given-names></name> <name><surname>Palta</surname> <given-names>J.</given-names></name> <name><surname>Kapoor</surname> <given-names>R.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Multi-view data integration methods for radiotherapy structure name standardization</article-title>. <source>Cancers</source> <volume>13</volume>:<fpage>1796</fpage>. <pub-id pub-id-type="doi">10.3390/cancers13081796</pub-id><pub-id pub-id-type="pmid">33918716</pub-id></citation></ref>
<ref id="B149">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Talebi</surname> <given-names>R.</given-names></name> <name><surname>Celis-Morales</surname> <given-names>C. A.</given-names></name> <name><surname>Akbari</surname> <given-names>A.</given-names></name> <name><surname>Talebi</surname> <given-names>A.</given-names></name> <name><surname>Borumandnia</surname> <given-names>N.</given-names></name> <name><surname>Pourhoseingholi</surname> <given-names>M. A.</given-names></name></person-group> (<year>2024</year>). <article-title>Machine learning-based classifiers to predict metastasis in colorectal cancer patients</article-title>. <source>Front. Artif. Intellig</source>. <volume>7</volume>:<fpage>1285037</fpage>. <pub-id pub-id-type="doi">10.3389/frai.2024.1285037</pub-id><pub-id pub-id-type="pmid">38327669</pub-id></citation></ref>
<ref id="B150">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Tang</surname> <given-names>J.</given-names></name> <name><surname>Li</surname> <given-names>K.</given-names></name> <name><surname>Hou</surname> <given-names>M.</given-names></name> <name><surname>Jin</surname> <given-names>X.</given-names></name> <name><surname>Kong</surname> <given-names>W.</given-names></name> <name><surname>Ding</surname> <given-names>Y.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>&#x0201C;MMT: multi-way multi-modal transformer for multimodal learning,&#x0201D;</article-title> in <source>Proceedings of the Thirty-First International Joint Conference on Artificial Intelligence, IJCAI-22</source> (<publisher-loc>Vienna</publisher-loc>: <publisher-name>International Joint Conferences on Artificial Intelligence Organization</publisher-name>), <fpage>3458</fpage>&#x02013;<lpage>3465</lpage>.</citation>
</ref>
<ref id="B151">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Thangudu</surname> <given-names>R. R.</given-names></name> <name><surname>Rudnick</surname> <given-names>P. A.</given-names></name> <name><surname>Holck</surname> <given-names>M.</given-names></name> <name><surname>Singhal</surname> <given-names>D.</given-names></name> <name><surname>MacCoss</surname> <given-names>M. J.</given-names></name> <name><surname>Edwards</surname> <given-names>N. J.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Abstract lb-242: Proteomic data commons: a resource for proteogenomic analysis</article-title>. <source>Cancer Res</source>. 80:LB-242. <pub-id pub-id-type="doi">10.1158/1538-7445.AM2020-LB-242</pub-id></citation>
</ref>
<ref id="B152">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Thomee</surname> <given-names>B.</given-names></name> <name><surname>Shamma</surname> <given-names>D. A.</given-names></name> <name><surname>Friedland</surname> <given-names>G.</given-names></name> <name><surname>Elizalde</surname> <given-names>B.</given-names></name> <name><surname>Ni</surname> <given-names>K.</given-names></name> <name><surname>Poland</surname> <given-names>D.</given-names></name> <etal/></person-group>. (<year>2016</year>). <article-title>YFCC100M: The new data in multimedia research</article-title>. <source>Commun. ACM</source> <volume>59</volume>:<fpage>64</fpage>&#x02013;<lpage>73</lpage>. <pub-id pub-id-type="doi">10.1145/2812802</pub-id></citation>
</ref>
<ref id="B153">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tian</surname> <given-names>Z.</given-names></name> <name><surname>Li</surname> <given-names>X.</given-names></name> <name><surname>Zheng</surname> <given-names>Y.</given-names></name> <name><surname>Chen</surname> <given-names>Z.</given-names></name> <name><surname>Shi</surname> <given-names>Z.</given-names></name> <name><surname>Liu</surname> <given-names>L.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Graph-convolutional-network-based interactive prostate segmentation in MR images</article-title>. <source>Med. Phys</source>. <volume>47</volume>, <fpage>4164</fpage>&#x02013;<lpage>4176</lpage>. <pub-id pub-id-type="doi">10.1002/mp.14327</pub-id><pub-id pub-id-type="pmid">32533855</pub-id></citation></ref>
<ref id="B154">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tortora</surname> <given-names>M.</given-names></name> <name><surname>Cordelli</surname> <given-names>E.</given-names></name> <name><surname>Sicilia</surname> <given-names>R.</given-names></name> <name><surname>Nibid</surname> <given-names>L.</given-names></name> <name><surname>Ippolito</surname> <given-names>E.</given-names></name> <name><surname>Perrone</surname> <given-names>G.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Radiopathomics: Multimodal learning in non-small cell lung cancer for adaptive radiotherapy</article-title>. <source>IEEE Access</source>. <volume>11</volume>, <fpage>47563</fpage>&#x02013;<lpage>47578</lpage>. <pub-id pub-id-type="doi">10.1109/ACCESS.2023.3275126</pub-id></citation>
</ref>
<ref id="B155">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tripathi</surname> <given-names>A.</given-names></name> <name><surname>Waqas</surname> <given-names>A.</given-names></name> <name><surname>Venkatesan</surname> <given-names>K.</given-names></name> <name><surname>Yilmaz</surname> <given-names>Y.</given-names></name> <name><surname>Rasool</surname> <given-names>G.</given-names></name></person-group> (<year>2024a</year>). <article-title>Building flexible, scalable, and machine learning-ready multimodal oncology datasets</article-title>. <source>Sensors</source> <volume>24</volume>:<fpage>1634</fpage>. <pub-id pub-id-type="doi">10.3390/s24051634</pub-id><pub-id pub-id-type="pmid">38475170</pub-id></citation></ref>
<ref id="B156">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tripathi</surname> <given-names>A.</given-names></name> <name><surname>Waqas</surname> <given-names>A.</given-names></name> <name><surname>Yilmaz</surname> <given-names>Y.</given-names></name> <name><surname>Rasool</surname> <given-names>G.</given-names></name></person-group> (<year>2024b</year>). <article-title>Honeybee: a scalable modular framework for creating multimodal oncology datasets with foundational embedding models</article-title>. <source>arXiv</source> [preprint] arXiv:2405.07460.</citation>
</ref>
<ref id="B157">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tripathi</surname> <given-names>A.</given-names></name> <name><surname>Waqas</surname> <given-names>A.</given-names></name> <name><surname>Yilmaz</surname> <given-names>Y.</given-names></name> <name><surname>Rasool</surname> <given-names>G.</given-names></name></person-group> (<year>2024c</year>). <article-title>Multimodal transformer model improves survival prediction in lung cancer compared to unimodal approaches</article-title>. <source>Cancer Res</source>. <volume>84</volume>:<fpage>4905</fpage>&#x02013;<lpage>4905</lpage>. <pub-id pub-id-type="doi">10.1158/1538-7445.AM2024-4905</pub-id></citation>
</ref>
<ref id="B158">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Tripathi</surname> <given-names>S.</given-names></name> <name><surname>Moyer</surname> <given-names>E. J.</given-names></name> <name><surname>Augustin</surname> <given-names>A. I.</given-names></name> <name><surname>Zavalny</surname> <given-names>A.</given-names></name> <name><surname>Dheer</surname> <given-names>S.</given-names></name> <name><surname>Sukumaran</surname> <given-names>R.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>RadGenNets: Deep learning-based radiogenomics model for gene mutation prediction in lung cancer</article-title>. <source>Inform. Med. Unlocked</source> <volume>33</volume>:<fpage>101062</fpage>. <pub-id pub-id-type="doi">10.1016/j.imu.2022.101062</pub-id></citation>
</ref>
<ref id="B159">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Valsesia</surname> <given-names>D.</given-names></name> <name><surname>Fracastoro</surname> <given-names>G.</given-names></name> <name><surname>Magli</surname> <given-names>E.</given-names></name></person-group> (<year>2021</year>). <article-title>RAN-GNNs: breaking the capacity limits of graph neural networks</article-title>. <source>IEEE Trans. Neural Netw. Learn. Syst</source>. <volume>34</volume>, <fpage>4610</fpage>&#x02013;<lpage>4619</lpage>. <pub-id pub-id-type="doi">10.1002/9781119850830.ch3</pub-id><pub-id pub-id-type="pmid">34653010</pub-id></citation></ref>
<ref id="B160">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Varlamova</surname> <given-names>E. V.</given-names></name> <name><surname>Butakova</surname> <given-names>M. A.</given-names></name> <name><surname>Semyonova</surname> <given-names>V. V.</given-names></name> <name><surname>Soldatov</surname> <given-names>S. A.</given-names></name> <name><surname>Poltavskiy</surname> <given-names>A. V.</given-names></name> <name><surname>Kit</surname> <given-names>O. I.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>Machine learning meets cancer</article-title>. <source>Cancers</source> <volume>16</volume>:<fpage>1100</fpage>. <pub-id pub-id-type="doi">10.3390/cancers16061100</pub-id><pub-id pub-id-type="pmid">38539435</pub-id></citation></ref>
<ref id="B161">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Vaswani</surname> <given-names>A.</given-names></name> <name><surname>Shazeer</surname> <given-names>N.</given-names></name> <name><surname>Parmar</surname> <given-names>N.</given-names></name> <name><surname>Uszkoreit</surname> <given-names>J.</given-names></name> <name><surname>Jones</surname> <given-names>L.</given-names></name> <name><surname>Gomez</surname> <given-names>A. N.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>&#x0201C;Attention is all you need,&#x0201D;</article-title> in <source>Adv. Neural Inf. Process. Syst</source> (<publisher-loc>NeurIPS</publisher-loc>), <fpage>30</fpage>.</citation>
</ref>
<ref id="B162">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Veli&#x0010D;kovi&#x00107;</surname> <given-names>P.</given-names></name> <name><surname>Cucurull</surname> <given-names>G.</given-names></name> <name><surname>Casanova</surname> <given-names>A.</given-names></name> <name><surname>Romero</surname> <given-names>A.</given-names></name> <name><surname>Lio</surname> <given-names>P.</given-names></name> <name><surname>Bengio</surname> <given-names>Y.</given-names></name></person-group> (<year>2017</year>). <article-title>Graph attention networks</article-title>. <source>arXiv</source> [preprint] arXiv:1710.10903.</citation>
</ref>
<ref id="B163">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Vu</surname> <given-names>M.</given-names></name> <name><surname>Thai</surname> <given-names>M. T.</given-names></name></person-group> (<year>2020</year>). <article-title>PGM-explainer: probabilistic graphical model explanations for graph neural networks</article-title>. <source>Adv. Neural Inf. Process. Syst</source>. <volume>33</volume>, <fpage>12225</fpage>&#x02013;<lpage>12235</lpage>.</citation>
</ref>
<ref id="B164">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Waikhom</surname> <given-names>L.</given-names></name> <name><surname>Patgiri</surname> <given-names>R.</given-names></name></person-group> (<year>2022</year>). <article-title>A survey of graph neural networks in various learning paradigms: methods, applications, and challenges</article-title>. <source>Artif. Intellig. Rev</source>. <volume>56</volume>, <fpage>1</fpage>&#x02013;<lpage>70</lpage>. <pub-id pub-id-type="doi">10.1007/s10462-022-10321-2</pub-id><pub-id pub-id-type="pmid">38442651</pub-id></citation></ref>
<ref id="B165">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>D.</given-names></name> <name><surname>Su</surname> <given-names>J.</given-names></name> <name><surname>Yu</surname> <given-names>H.</given-names></name></person-group> (<year>2020a</year>). <article-title>Feature extraction and analysis of natural language processing for deep learning english language</article-title>. <source>IEEE Access</source> <volume>8</volume>:<fpage>46335</fpage>&#x02013;<lpage>46345</lpage>. <pub-id pub-id-type="doi">10.1109/ACCESS.2020.2974101</pub-id></citation>
</ref>
<ref id="B166">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>J.</given-names></name> <name><surname>Chen</surname> <given-names>R. J.</given-names></name> <name><surname>Lu</surname> <given-names>M. Y.</given-names></name> <name><surname>Baras</surname> <given-names>A.</given-names></name> <name><surname>Mahmood</surname> <given-names>F.</given-names></name></person-group> (<year>2020b</year>). <article-title>&#x0201C;Weakly supervised prostate TMA classification via graph convolutional networks,&#x0201D;</article-title> in <source>2020 IEEE 17th International Symposium on Biomedical Imaging (ISBI)</source> (<publisher-loc>Iowa City, IA</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>239</fpage>&#x02013;<lpage>243</lpage>.</citation>
</ref>
<ref id="B167">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>J.</given-names></name> <name><surname>Ma</surname> <given-names>A.</given-names></name> <name><surname>Chang</surname> <given-names>Y.</given-names></name> <name><surname>Gong</surname> <given-names>J.</given-names></name> <name><surname>Jiang</surname> <given-names>Y.</given-names></name> <name><surname>Qi</surname> <given-names>R.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>scGNN is a novel graph neural network framework for single-cell RNA-Seq analyses</article-title>. <source>Nat. Commun</source>. <volume>12</volume>:<fpage>1882</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-021-22197-x</pub-id><pub-id pub-id-type="pmid">33767197</pub-id></citation></ref>
<ref id="B168">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>S.</given-names></name> <name><surname>Sun</surname> <given-names>S.</given-names></name> <name><surname>Li</surname> <given-names>Z.</given-names></name> <name><surname>Zhang</surname> <given-names>R.</given-names></name> <name><surname>Xu</surname> <given-names>J.</given-names></name></person-group> (<year>2017</year>). <article-title>Accurate de novo prediction of protein contact map by ultra-deep learning model</article-title>. <source>PLoS Comput. Biol</source>. <volume>13</volume>:<fpage>e1005324</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pcbi.1005324</pub-id><pub-id pub-id-type="pmid">28056090</pub-id></citation></ref>
<ref id="B169">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>T.</given-names></name> <name><surname>Shao</surname> <given-names>W.</given-names></name> <name><surname>Huang</surname> <given-names>Z.</given-names></name> <name><surname>Tang</surname> <given-names>H.</given-names></name> <name><surname>Zhang</surname> <given-names>J.</given-names></name> <name><surname>Ding</surname> <given-names>Z.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>MOGONET integrates multi-omics data using graph convolutional networks allowing patient classification and biomarker identification</article-title>. <source>Nat. Commun</source>. <volume>12</volume>:<fpage>3445</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-021-23774-w</pub-id><pub-id pub-id-type="pmid">34103512</pub-id></citation></ref>
<ref id="B170">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>Y.</given-names></name> <name><surname>Wang</surname> <given-names>Y. G.</given-names></name> <name><surname>Hu</surname> <given-names>C.</given-names></name> <name><surname>Li</surname> <given-names>M.</given-names></name> <name><surname>Fan</surname> <given-names>Y.</given-names></name> <name><surname>Otter</surname> <given-names>N.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Cell graph neural networks enable the precise prediction of patient survival in gastric cancer</article-title>. <source>NPJ Prec. Oncol</source>. <volume>6</volume>:<fpage>45</fpage>. <pub-id pub-id-type="doi">10.1038/s41698-022-00285-5</pub-id><pub-id pub-id-type="pmid">35739342</pub-id></citation></ref>
<ref id="B171">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Waqas</surname> <given-names>A.</given-names></name> <name><surname>Bui</surname> <given-names>M. M.</given-names></name> <name><surname>Glassy</surname> <given-names>E. F.</given-names></name> <name><surname>El Naqa</surname> <given-names>I.</given-names></name> <name><surname>Borkowski</surname> <given-names>P.</given-names></name> <name><surname>Borkowski</surname> <given-names>A. A.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Revolutionizing digital pathology with the power of generative artificial intelligence and foundation models</article-title>. <source>Lab. Investigat</source>. <volume>2023</volume>:<fpage>100255</fpage>. <pub-id pub-id-type="doi">10.1016/j.labinv.2023.100255</pub-id><pub-id pub-id-type="pmid">37757969</pub-id></citation></ref>
<ref id="B172">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Waqas</surname> <given-names>A.</given-names></name> <name><surname>Dera</surname> <given-names>D.</given-names></name> <name><surname>Rasool</surname> <given-names>G.</given-names></name> <name><surname>Bouaynaya</surname> <given-names>N. C.</given-names></name> <name><surname>Fathallah-Shaykh</surname> <given-names>H. M.</given-names></name></person-group> (<year>2021</year>). <article-title>&#x0201C;Brain tumor segmentation and surveillance with deep artificial neural networks,&#x0201D;</article-title> in <source>Deep Learning for Biomedical Data Analysis</source> (<publisher-loc>Springer Nature</publisher-loc>), <fpage>311</fpage>&#x02013;<lpage>350</lpage>.<pub-id pub-id-type="pmid">37919325</pub-id></citation></ref>
<ref id="B173">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Waqas</surname> <given-names>A.</given-names></name> <name><surname>Farooq</surname> <given-names>H.</given-names></name> <name><surname>Bouaynaya</surname> <given-names>N. C.</given-names></name> <name><surname>Rasool</surname> <given-names>G.</given-names></name></person-group> (<year>2022</year>). <article-title>Exploring robust architectures for deep artificial neural networks</article-title>. <source>Commun. Eng</source>. <volume>1</volume>:<fpage>46</fpage>. <pub-id pub-id-type="doi">10.1038/s44172-022-00043-2</pub-id></citation>
</ref>
<ref id="B174">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Waqas</surname> <given-names>A.</given-names></name> <name><surname>Tripathi</surname> <given-names>A.</given-names></name> <name><surname>Ahmed</surname> <given-names>S.</given-names></name> <name><surname>Mukund</surname> <given-names>A.</given-names></name> <name><surname>Farooq</surname> <given-names>H.</given-names></name> <name><surname>Schabath</surname> <given-names>M. B.</given-names></name> <etal/></person-group>. (<year>2024a</year>). <article-title>SeNMo: a self-normalizing deep learning model for enhanced multi-omics data analysis in oncology</article-title>. <source>arXiv preprint</source> arXiv:2405.08226. <pub-id pub-id-type="doi">10.48550/arXiv.2405.08226</pub-id></citation>
</ref>
<ref id="B175">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Waqas</surname> <given-names>A.</given-names></name> <name><surname>Tripathi</surname> <given-names>A.</given-names></name> <name><surname>Stewart</surname> <given-names>P.</given-names></name> <name><surname>Naeini</surname> <given-names>M.</given-names></name> <name><surname>Rasool</surname> <given-names>G.</given-names></name></person-group> (<year>2024b</year>). <article-title>Embedding-based multimodal learning on pan-squamous cell carcinomas for improved survival outcomes</article-title>. <source>arXiv</source> [preprint] arXiv:2406.08521. <pub-id pub-id-type="doi">10.6004/jnccn.2023.7137</pub-id></citation>
</ref>
<ref id="B176">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wei</surname> <given-names>H.</given-names></name> <name><surname>Feng</surname> <given-names>L.</given-names></name> <name><surname>Chen</surname> <given-names>X.</given-names></name> <name><surname>An</surname> <given-names>B.</given-names></name></person-group> (<year>2020</year>). <article-title>&#x0201C;Combating noisy labels by agreement: A joint training method with co-regularization,&#x0201D;</article-title> in <source>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition</source>, 13726&#x02013;13735. <pub-id pub-id-type="doi">10.1109/CVPR42600.2020.01374</pub-id></citation>
</ref>
<ref id="B177">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Wei</surname> <given-names>Y.</given-names></name> <name><surname>Wang</surname> <given-names>X.</given-names></name> <name><surname>Nie</surname> <given-names>L.</given-names></name> <name><surname>He</surname> <given-names>X.</given-names></name> <name><surname>Hong</surname> <given-names>R.</given-names></name> <name><surname>Chua</surname> <given-names>T.-S.</given-names></name></person-group> (<year>2019</year>). <article-title>&#x0201C;MMGCN: Multi-modal graph convolution network for personalized recommendation of micro-video,&#x0201D;</article-title> in <source>Proceedings of the 27th ACM International Conference on Multimedia</source> (<publisher-loc>Nice</publisher-loc>: <publisher-name>ACM</publisher-name>), <fpage>1437</fpage>&#x02013;<lpage>1445</lpage>.</citation>
</ref>
<ref id="B178">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Wen</surname> <given-names>H.</given-names></name> <name><surname>Ding</surname> <given-names>J.</given-names></name> <name><surname>Jin</surname> <given-names>W.</given-names></name> <name><surname>Wang</surname> <given-names>Y.</given-names></name> <name><surname>Xie</surname> <given-names>Y.</given-names></name> <name><surname>Tang</surname> <given-names>J.</given-names></name></person-group> (<year>2022</year>). <article-title>&#x0201C;Graph neural networks for multimodal single-cell data integration,&#x0201D;</article-title> in <source>Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining</source> (<publisher-loc>New York, NY</publisher-loc>: <publisher-name>ACM</publisher-name>), <fpage>4153</fpage>&#x02013;<lpage>4163</lpage>. <pub-id pub-id-type="doi">10.1145/3534678.3539213</pub-id><pub-id pub-id-type="pmid">34238220</pub-id></citation></ref>
<ref id="B179">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wu</surname> <given-names>Z.</given-names></name> <name><surname>Pan</surname> <given-names>S.</given-names></name> <name><surname>Chen</surname> <given-names>F.</given-names></name> <name><surname>Long</surname> <given-names>G.</given-names></name> <name><surname>Zhang</surname> <given-names>C.</given-names></name> <name><surname>Philip</surname> <given-names>S. Y.</given-names></name></person-group> (<year>2020</year>). <article-title>A comprehensive survey on graph neural networks</article-title>. <source>IEEE Trans. Neural Netw. Learn. Syst</source>. <volume>32</volume>:<fpage>4</fpage>&#x02013;<lpage>24</lpage>. <pub-id pub-id-type="doi">10.1109/TNNLS.2020.2978386</pub-id><pub-id pub-id-type="pmid">32217482</pub-id></citation></ref>
<ref id="B180">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Xiao</surname> <given-names>Y.</given-names></name> <name><surname>Codevilla</surname> <given-names>F.</given-names></name> <name><surname>Gurram</surname> <given-names>A.</given-names></name> <name><surname>Urfalioglu</surname> <given-names>O.</given-names></name> <name><surname>L&#x000F3;pez</surname> <given-names>A. M.</given-names></name></person-group> (<year>2020</year>). <article-title>Multimodal end-to-end autonomous driving</article-title>. <source>IEEE Trans. Intellig. Transp. Syst</source>. <volume>23</volume>, <fpage>537</fpage>&#x02013;<lpage>547</lpage>. <pub-id pub-id-type="doi">10.1109/TITS.2020.3013234</pub-id></citation>
</ref>
<ref id="B181">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Xie</surname> <given-names>Y.</given-names></name> <name><surname>Zhang</surname> <given-names>J.</given-names></name> <name><surname>Shen</surname> <given-names>C.</given-names></name> <name><surname>Xia</surname> <given-names>Y.</given-names></name></person-group> (<year>2021</year>). <article-title>&#x0201C;Cotr: Efficiently bridging cnn and transformer for 3d medical image segmentation,&#x0201D;</article-title> in <source>Medical Image Computing and Computer Assisted Intervention-MICCAI 2021: 24th International Conference, Strasbourg, France, September 27-October 1, 2021, Proceedings, Part III 24</source> (<publisher-loc>Cham</publisher-loc>: <publisher-name>Springer</publisher-name>).</citation>
</ref>
<ref id="B182">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Xu</surname> <given-names>P.</given-names></name> <name><surname>Zhu</surname> <given-names>X.</given-names></name> <name><surname>Clifton</surname> <given-names>D. A.</given-names></name></person-group> (<year>2023</year>). <article-title>Multimodal learning with transformers: a survey</article-title>. <source>IEEE Trans. Pattern Anal. Mach. Intellig</source>. <volume>45</volume>, <fpage>12113</fpage>&#x02013;<lpage>12132</lpage>. <pub-id pub-id-type="doi">10.1109/TPAMI.2023.3275156</pub-id><pub-id pub-id-type="pmid">37167049</pub-id></citation></ref>
<ref id="B183">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Xu</surname> <given-names>Y.</given-names></name> <name><surname>Das</surname> <given-names>P.</given-names></name> <name><surname>McCord</surname> <given-names>R. P.</given-names></name></person-group> (<year>2022</year>). <article-title>SMILE: mutual information learning for integration of single-cell omics data</article-title>. <source>Bioinformatics</source> <volume>38</volume>, <fpage>476</fpage>&#x02013;<lpage>486</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btab706</pub-id><pub-id pub-id-type="pmid">34623402</pub-id></citation></ref>
<ref id="B184">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>K. D.</given-names></name> <name><surname>Belyaeva</surname> <given-names>A.</given-names></name> <name><surname>Venkatachalapathy</surname> <given-names>S.</given-names></name> <name><surname>Damodaran</surname> <given-names>K.</given-names></name> <name><surname>Katcoff</surname> <given-names>A.</given-names></name> <name><surname>Radhakrishnan</surname> <given-names>A.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Multi-domain translation between single-cell imaging and sequencing data using autoencoders</article-title>. <source>Nat. Commun</source>. 12, 31. <pub-id pub-id-type="doi">10.1038/s41467-020-20249-2</pub-id><pub-id pub-id-type="pmid">33397893</pub-id></citation></ref>
<ref id="B185">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>L.</given-names></name> <name><surname>Ng</surname> <given-names>T. L. J.</given-names></name> <name><surname>Smyth</surname> <given-names>B.</given-names></name> <name><surname>Dong</surname> <given-names>R.</given-names></name></person-group> (<year>2020</year>). <article-title>&#x0201C;HTML: hierarchical transformer-based multi-task learning for volatility prediction,&#x0201D;</article-title> in <source>Proceedings of The Web Conference 2020, WWW &#x00027;20</source> (<publisher-loc>New York, NY</publisher-loc>: <publisher-name>Association for Computing Machinery</publisher-name>), <fpage>441</fpage>&#x02013;<lpage>451</lpage>.</citation>
</ref>
<ref id="B186">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>T.</given-names></name> <name><surname>Hu</surname> <given-names>L.</given-names></name> <name><surname>Shi</surname> <given-names>C.</given-names></name> <name><surname>Ji</surname> <given-names>H.</given-names></name> <name><surname>Li</surname> <given-names>X.</given-names></name> <name><surname>Nie</surname> <given-names>L.</given-names></name></person-group> (<year>2021</year>). <article-title>HGAT: Heterogeneous graph attention networks for semi-supervised short text classification</article-title>. <source>ACM Trans. Inform. Syst</source>. (TOIS) <volume>39</volume>, <fpage>1</fpage>&#x02013;<lpage>29</lpage>. <pub-id pub-id-type="doi">10.1145/3450352</pub-id></citation>
</ref>
<ref id="B187">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yap</surname> <given-names>J.</given-names></name> <name><surname>Yolland</surname> <given-names>W.</given-names></name> <name><surname>Tschandl</surname> <given-names>P.</given-names></name></person-group> (<year>2018</year>). <article-title>Multimodal skin lesion classification using deep learning</article-title>. <source>Exp. Dermatol</source>. <volume>27</volume>, <fpage>1261</fpage>&#x02013;<lpage>1267</lpage>. <pub-id pub-id-type="doi">10.1111/exd.13777</pub-id><pub-id pub-id-type="pmid">30187575</pub-id></citation></ref>
<ref id="B188">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yi</surname> <given-names>H.-C.</given-names></name> <name><surname>You</surname> <given-names>Z.-H.</given-names></name> <name><surname>Huang</surname> <given-names>D.-S.</given-names></name> <name><surname>Kwoh</surname> <given-names>C. K.</given-names></name></person-group> (<year>2022</year>). <article-title>Graph representation learning in bioinformatics: trends, methods and applications</article-title>. <source>Brief. Bioinform</source>. <volume>23</volume>:<fpage>bbab340</fpage>. <pub-id pub-id-type="doi">10.1093/bib/bbab340</pub-id><pub-id pub-id-type="pmid">34471921</pub-id></citation></ref>
<ref id="B189">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ying</surname> <given-names>Z.</given-names></name> <name><surname>Bourgeois</surname> <given-names>D.</given-names></name> <name><surname>You</surname> <given-names>J.</given-names></name> <name><surname>Zitnik</surname> <given-names>M.</given-names></name> <name><surname>Leskovec</surname> <given-names>J.</given-names></name></person-group> (<year>2019</year>). <article-title>&#x0201C;GNNExplainer: generating explanations for graph neural networks,&#x0201D;</article-title> in <source>Adv. Neural Inf. Process. Syst</source>, 32.<pub-id pub-id-type="pmid">32265580</pub-id></citation></ref>
<ref id="B190">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yogi</surname> <given-names>M. K.</given-names></name> <name><surname>Mundru</surname> <given-names>Y.</given-names></name></person-group> (<year>2024</year>). <article-title>Genomic data analysis with variant of secure multi-party computation technique</article-title>. <source>J. Trends Comp. Sci. Smart Technol</source>. <volume>5</volume>, <fpage>450</fpage>&#x02013;<lpage>470</lpage>. <pub-id pub-id-type="doi">10.36548/jtcsst.2023.4.006</pub-id><pub-id pub-id-type="pmid">32683440</pub-id></citation></ref>
<ref id="B191">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yousefi</surname> <given-names>S.</given-names></name> <name><surname>Amrollahi</surname> <given-names>F.</given-names></name> <name><surname>Amgad</surname> <given-names>M.</given-names></name> <name><surname>Dong</surname> <given-names>C.</given-names></name> <name><surname>Lewis</surname> <given-names>J. E.</given-names></name> <name><surname>Song</surname> <given-names>C.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>Predicting clinical outcomes from large scale cancer genomic profiles with deep survival models</article-title>. <source>Sci. Rep</source>. <volume>7</volume>:<fpage>1</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-017-11817-6</pub-id><pub-id pub-id-type="pmid">28916782</pub-id></citation></ref>
<ref id="B192">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Yu</surname> <given-names>J.</given-names></name> <name><surname>Wang</surname> <given-names>Z.</given-names></name> <name><surname>Vasudevan</surname> <given-names>V.</given-names></name> <name><surname>Yeung</surname> <given-names>L.</given-names></name> <name><surname>Seyedhosseini</surname> <given-names>M.</given-names></name> <name><surname>Wu</surname> <given-names>Y.</given-names></name></person-group> (<year>2022</year>). <article-title>&#x0201C;Coca: Contrastive captioners are image-text foundation models,&#x0201D;</article-title> in <source>Transactions on Machine Learning Research</source> (<publisher-loc>New York, NY</publisher-loc>: <publisher-name>JMLR</publisher-name>).</citation>
</ref>
<ref id="B193">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Yuan</surname> <given-names>H.</given-names></name> <name><surname>Yu</surname> <given-names>H.</given-names></name> <name><surname>Wang</surname> <given-names>J.</given-names></name> <name><surname>Li</surname> <given-names>K.</given-names></name> <name><surname>Ji</surname> <given-names>S.</given-names></name></person-group> (<year>2021</year>). <article-title>&#x0201C;On explainability of graph neural networks via subgraph explorations,&#x0201D;</article-title> in <source>International Conference on Machine Learning</source> (<publisher-loc>New York</publisher-loc>: <publisher-name>PMLR</publisher-name>), <fpage>12241</fpage>&#x02013;<lpage>12252</lpage>.</citation>
</ref>
<ref id="B194">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Zeng</surname> <given-names>Y.</given-names></name> <name><surname>Zhou</surname> <given-names>X.</given-names></name> <name><surname>Rao</surname> <given-names>J.</given-names></name> <name><surname>Lu</surname> <given-names>Y.</given-names></name> <name><surname>Yang</surname> <given-names>Y.</given-names></name></person-group> (<year>2020</year>). <article-title>&#x0201C;Accurately clustering single-cell RNA-seq data by capturing structural relations between cells through graph convolutional network,&#x0201D;</article-title> in <source>2020 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)</source> (<publisher-loc>Seoul</publisher-loc>: <publisher-name>IEEE</publisher-name>).</citation>
</ref>
<ref id="B195">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>H.</given-names></name> <name><surname>Wu</surname> <given-names>B.</given-names></name> <name><surname>Yuan</surname> <given-names>X.</given-names></name> <name><surname>Pan</surname> <given-names>S.</given-names></name> <name><surname>Tong</surname> <given-names>H.</given-names></name> <name><surname>Pei</surname> <given-names>J.</given-names></name></person-group> (<year>2022</year>). <article-title>Trustworthy graph neural networks: Aspects, methods and trends</article-title>. <source>arXiv</source> [preprint] arXiv:2205.07424.</citation>
</ref>
<ref id="B196">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>N.</given-names></name></person-group> (<year>2020</year>). <article-title>Learning adversarial transformer for symbolic music generation</article-title>. <source>IEEE Trans. Neural Netw. Learn. Syst</source>. <volume>34</volume>, <fpage>1754</fpage>&#x02013;<lpage>1763</lpage>. <pub-id pub-id-type="doi">10.1109/TNNLS.2020.2990746</pub-id><pub-id pub-id-type="pmid">32614773</pub-id></citation></ref>
<ref id="B197">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>Y.-D.</given-names></name> <name><surname>Satapathy</surname> <given-names>S. C.</given-names></name> <name><surname>Guttery</surname> <given-names>D. S.</given-names></name> <name><surname>G&#x000F3;rriz</surname> <given-names>J. M.</given-names></name> <name><surname>Wang</surname> <given-names>S.-H.</given-names></name></person-group> (<year>2021</year>). <article-title>Improved breast cancer classification through combining graph convolutional network and convolutional neural network</article-title>. <source>Inform. Proc. Manage</source>. 58(2):102439. <pub-id pub-id-type="doi">10.1016/j.ipm.2020.102439</pub-id></citation>
</ref>
<ref id="B198">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>Z.</given-names></name> <name><surname>Yang</surname> <given-names>C.</given-names></name> <name><surname>Zhang</surname> <given-names>X.</given-names></name></person-group> (<year>2022</year>). <article-title>scDART: integrating unmatched scRNA-seq and scATAC-seq data and learning cross-modality relationship simultaneously</article-title>. <source>Genome Biol</source>. <volume>23</volume>:<fpage>139</fpage>. <pub-id pub-id-type="doi">10.1186/s13059-022-02706-x</pub-id><pub-id pub-id-type="pmid">35761403</pub-id></citation></ref>
<ref id="B199">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhao</surname> <given-names>B.</given-names></name> <name><surname>Gong</surname> <given-names>M.</given-names></name> <name><surname>Li</surname> <given-names>X.</given-names></name></person-group> (<year>2022</year>). <article-title>Hierarchical multimodal transformer to summarize videos</article-title>. <source>Neurocomputing</source> <volume>468</volume>:<fpage>360</fpage>&#x02013;<lpage>369</lpage>. <pub-id pub-id-type="doi">10.1016/j.neucom.2021.10.039</pub-id></citation>
</ref>
<ref id="B200">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhao</surname> <given-names>F.</given-names></name> <name><surname>Zhang</surname> <given-names>C.</given-names></name> <name><surname>Geng</surname> <given-names>B.</given-names></name></person-group> (<year>2024</year>). <source>Deep Multimodal Data Fusion</source>. Beijing: ACM Computing Surveys. <pub-id pub-id-type="doi">10.1145/3649447</pub-id></citation>
</ref>
<ref id="B201">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhao</surname> <given-names>M.</given-names></name> <name><surname>Huang</surname> <given-names>X.</given-names></name> <name><surname>Jiang</surname> <given-names>J.</given-names></name> <name><surname>Mou</surname> <given-names>L.</given-names></name> <name><surname>Yan</surname> <given-names>D.-M.</given-names></name> <name><surname>Ma</surname> <given-names>L.</given-names></name></person-group> (<year>2023</year>). <article-title>Accurate registration of cross-modality geometry via consistent clustering</article-title>. <source>IEEE Trans. Visualizat. Comp. Graph</source>. <volume>30</volume>, <fpage>4055</fpage>&#x02013;<lpage>4067</lpage>. <pub-id pub-id-type="doi">10.1109/TVCG.2023.3247169</pub-id><pub-id pub-id-type="pmid">37027717</pub-id></citation></ref>
<ref id="B202">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Zhong</surname> <given-names>Z.</given-names></name> <name><surname>Schneider</surname> <given-names>D.</given-names></name> <name><surname>Voit</surname> <given-names>M.</given-names></name> <name><surname>Stiefelhagen</surname> <given-names>R.</given-names></name> <name><surname>Beyerer</surname> <given-names>J.</given-names></name></person-group> (<year>2023</year>). <article-title>&#x0201C;Anticipative feature fusion transformer for multi-modal action anticipation,&#x0201D;</article-title> in <source>2023 IEEE/CVF Winter Conference on Applications of Computer Vision (WACV)</source> (<publisher-loc>Vancouver, BC</publisher-loc>: <publisher-name>IEEE/CVF</publisher-name>), <fpage>6057</fpage>&#x02013;<lpage>6066</lpage>.</citation>
</ref>
<ref id="B203">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Zhu</surname> <given-names>H.</given-names></name> <name><surname>Sun</surname> <given-names>X.</given-names></name> <name><surname>Li</surname> <given-names>Y.</given-names></name> <name><surname>Ma</surname> <given-names>K.</given-names></name> <name><surname>Zhou</surname> <given-names>S. K.</given-names></name> <name><surname>Zheng</surname> <given-names>Y.</given-names></name></person-group> (<year>2022</year>). <source>DFTR: Depth-supervised Fusion Transformer for Salient Object Detection</source>. <publisher-loc>arXiv preprint</publisher-loc>.</citation>
</ref>
<ref id="B204">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Zhuang</surname> <given-names>L.</given-names></name> <name><surname>Wayne</surname> <given-names>L.</given-names></name> <name><surname>Ya</surname> <given-names>S.</given-names></name> <name><surname>Jun</surname> <given-names>Z.</given-names></name></person-group> (<year>2021</year>). <article-title>&#x0201C;A robustly optimized BERT pre-training approach with post-training,&#x0201D;</article-title> in <source>Proceedings of the 20th Chinese National Conference on Computational Linguistics</source>, S. Li, M. Sun, Y. Liu, H. Wu, K. Liu, W. Che (<publisher-loc>Huhhot, China</publisher-loc>: <publisher-name>Chinese Information Processing Society of China</publisher-name>), <fpage>1218</fpage>&#x02013;<lpage>1227</lpage>.</citation>
</ref>
<ref id="B205">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zitnik</surname> <given-names>M.</given-names></name> <name><surname>Agrawal</surname> <given-names>M.</given-names></name> <name><surname>Leskovec</surname> <given-names>J.</given-names></name></person-group> (<year>2018</year>). <article-title>Modeling polypharmacy side effects with graph convolutional networks</article-title>. <source>Bioinformatics</source> <volume>34</volume>, <fpage>i457</fpage>&#x02013;<lpage>i466</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/bty294</pub-id><pub-id pub-id-type="pmid">29949996</pub-id></citation></ref>
</ref-list>
</back>
</article>