<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Genet.</journal-id>
<journal-title>Frontiers in Genetics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Genet.</abbrev-journal-title>
<issn pub-type="epub">1664-8021</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1407765</article-id>
<article-id pub-id-type="doi">10.3389/fgene.2024.1407765</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Genetics</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Integrative analysis of cancer multimodality data identifying COPS5 as a novel biomarker of diffuse large B-cell lymphoma</article-title>
<alt-title alt-title-type="left-running-head">Dai et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fgene.2024.1407765">10.3389/fgene.2024.1407765</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Dai</surname>
<given-names>Yutong</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2613755/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Li</surname>
<given-names>Jingmei</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Yamamoto</surname>
<given-names>Keita</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Goyama</surname>
<given-names>Susumu</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1065055/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Loza</surname>
<given-names>Martin</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2529550/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Park</surname>
<given-names>Sung-Joon</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1357254/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Nakai</surname>
<given-names>Kenta</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/991531/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Department of Computational Biology and Medical Science</institution>, <institution>The University of Tokyo</institution>, <addr-line>Kashiwa</addr-line>, <country>Japan</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>The Institute of Medical Science</institution>, <institution>The University of Tokyo</institution>, <addr-line>Tokyo</addr-line>, <country>Japan</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/32091/overview">Shoba Ranganathan</ext-link>, Macquarie University, Australia</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1269169/overview">Wanwei Zhang</ext-link>, Columbia University, United States</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/55577/overview">Prashanth N. Suravajhala</ext-link>, Amrita Vishwa Vidyapeetham University, India</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Kenta Nakai, <email>knakai@ims.u-tokyo.ac.jp</email>
</corresp>
</author-notes>
<pub-date pub-type="epub">
<day>21</day>
<month>06</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>15</volume>
<elocation-id>1407765</elocation-id>
<history>
<date date-type="received">
<day>27</day>
<month>03</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>03</day>
<month>06</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2024 Dai, Li, Yamamoto, Goyama, Loza, Park and Nakai.</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Dai, Li, Yamamoto, Goyama, Loza, Park and Nakai</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Preventing, diagnosing, and treating diseases requires accurate clinical biomarkers, which remains challenging. Recently, advanced computational approaches have accelerated the discovery of promising biomarkers from high-dimensional multimodal data. Although machine-learning methods have greatly contributed to the research fields, handling data sparseness, which is not unusual in research settings, is still an issue as it leads to limited interpretability and performance in the presence of missing information. Here, we propose a novel pipeline integrating joint non-negative matrix factorization (JNMF), identifying key features within sparse high-dimensional heterogeneous data, and a biological pathway analysis, interpreting the functionality of features by detecting activated signaling pathways. By applying our pipeline to large-scale public cancer datasets, we identified sets of genomic features relevant to specific cancer types as common pattern modules (CPMs) of JNMF. We further detected <italic>COPS5</italic> as a potential upstream regulator of pathways associated with diffuse large B-cell lymphoma (DLBCL). <italic>COPS5</italic> exhibited co-overexpression with <italic>MYC</italic>, <italic>TP53</italic>, and <italic>BCL2</italic>, known DLBCL marker genes, and its high expression was correlated with a lower survival probability of DLBCL patients. Using the CRISPR-Cas9 system, we confirmed the tumor growth effect of <italic>COPS5</italic>, which suggests it as a novel prognostic biomarker for DLBCL. Our results highlight that integrating multiple high-dimensional data and effectively decomposing them to interpretable dimensions unravels hidden biological importance, which enhances the discovery of clinical biomarkers.</p>
</abstract>
<kwd-group>
<kwd>biomarker discovery</kwd>
<kwd>diffuse large B-cell lymphoma</kwd>
<kwd>joint non-negative matrix factorization</kwd>
<kwd>multi-omics</kwd>
<kwd>pathway analysis</kwd>
</kwd-group>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Computational Genomics</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>The era of precision medicine has witnessed a prosperous shift from one-size-fits-all medicine to personalized medicine (<xref ref-type="bibr" rid="B8">Collins and Varmus, 2015</xref>). In general, precision medicine aiming for personalized prevention, diagnosis, and treatment requires high-quality biomarkers, which remains challenging (<xref ref-type="bibr" rid="B25">Tsimberidou et al., 2020</xref>). Recently, accessible large-scale multimodal data has accelerated the discovery of clinical biomarkers where diverse computational approaches, particularly those integrating multi-omics data, have gained widespread adoption (<xref ref-type="bibr" rid="B2">Bersanelli et al., 2016</xref>). On the other hand, the intrinsic nature of biomedical datasets that includes sparse and unlabeled information hinders the practical application of computational methods and limits interpretability and performance (<xref ref-type="bibr" rid="B7">Cho et al., 2023</xref>).</p>
<p>Several unsupervised clustering methods have been developed to address these issues and discover potential biological patterns (<xref ref-type="bibr" rid="B21">Reel et al., 2021</xref>). For example, the sparse multiple canonical correlation analysis successfully recognized relationships between copy number variations in genomic regions on different chromosomes. Yet, it is infeasible to fully consider the correlation of information across different omics data (<xref ref-type="bibr" rid="B27">Witten and Tibshirani, 2010</xref>). Similar Network Fusion (<xref ref-type="bibr" rid="B6">Chiu et al., 2018</xref>) identified novel subtypes of triple-negative breast cancer patients but limited applicability to diverse multi-omics scenarios due to its inability to accept multiple data types, such as continuum and binary types. In contrast, joint non-negative matrix factorization (JNMF), an unsupervised algorithm, complements the bottlenecks in those methods and affords to extract underlying features from sparse high-dimensional heterogeneous data (<xref ref-type="bibr" rid="B30">Zhang et al., 2012</xref>; <xref ref-type="bibr" rid="B29">Yang and Michailidis, 2016</xref>).</p>
<p>In this study, we aim to develop a method to discover interpretable biomarkers from intricate multimodal data. To this end, we designed a novel pipeline that integrates JNMF and a biological pathway analysis; the functionality of JNMF-detected genetic features is implicated through detecting signaling pathways specifically activated by the features. We demonstrate the ability to find reliable biomarkers from the large-scale cancer datasets of the Cancer Cell Line Encyclopedia (CCLE) and The Cancer Genome Atlas (TCGA). In particular, we identified <italic>COPS5</italic> as a novel biomarker for diffuse large B-cell lymphoma (DLBCL) and experimentally validated by the CRISPR-Cas9 knockout, which supports the feasibility of our approach.</p>
</sec>
<sec sec-type="materials|methods" id="s2">
<title>2 Materials and methods</title>
<sec id="s2-1">
<title>2.1 Data preparation and processing</title>
<p>The raw dataset of the CCLE project (DepMap release 23Q4) was downloaded <italic>via</italic> the DepMap portal (<ext-link ext-link-type="uri" xlink:href="https://depmap.org/portal/">https://depmap.org/portal/</ext-link>). The dataset consisted of six matrices including gene expression, CNV (copy number variation) amplification, CNV loss, DNA mutation of somatic point mutations and indels, pharmacologic sensitivity, and metadata of cell lines. For correlation analysis and survival analysis, the expression profiles of DLBCL patients were downloaded from TCGA (<ext-link ext-link-type="uri" xlink:href="https://www.cancer.gov/tcga">https://www.cancer.gov/tcga</ext-link>) and NCBI Gene Expression Omnibus (GEO) GSE69049.</p>
<p>We prepared the CCLE datasets only for 504 cell lines presented in the pharmacologic sensitivity matrix and built the input matrices of JNMF as follows. The TPMs (transcripts per million) on a log-2 scale quantifying gene expressions were converted ranging from 0 to 1 by a min-max normalization. The DNA mutation profile was converted into a binary matrix where 1 for mutated and 0 for normal. Two binary matrices for CNV gain and loss were constructed by the GISTIC scores in the CNV data; &#x2b;2 for amplification and &#x2212;2 for deletion. The pharmacologic sensitivity was also converted into values ranging from 0 (insensitive) to 1 (sensitive) as follows:<disp-formula id="equ1">
<mml:math id="m1">
<mml:mrow>
<mml:mi>max</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi mathvariant="normal">X</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mo>&#x2013;</mml:mo>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>x</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mo>/</mml:mo>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>max</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi mathvariant="normal">X</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mo>&#x2013;</mml:mo>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>min</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi mathvariant="normal">X</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>where <italic>x</italic> is an IC50 (half maximal inhibitory concentration) value and max(X) and min(X) are the maximum and minimum values in the pharmacologic sensitivity profile. A cancer-type matrix in binary format was prepared from the metadata of cell lines.</p>
</sec>
<sec id="s2-2">
<title>2.2 Plasmids and viral infection</title>
<p>To generate single-guide RNA (sgRNA) expression vectors targeting <italic>COPS5</italic> (<italic>COPS5</italic>-sgRNA-1 and <italic>COPS5</italic>-sgRNA-2) or a non-targeting (NT) control, annealed oligonucleotides were cloned into the pLKO5.sgRNA.EFS.tRFP657 vector (Addgene plasmid &#x23; 57824; <ext-link ext-link-type="uri" xlink:href="http://n2t.net/addgene:57824">http://n2t.net/addgene:57824</ext-link>), which was a gift from Benjamin Ebert. The Cas9 expression in the cell lines Raji (Burkitt lymphoma cell line) and SLVL (Splenic marginal zone lymphoma cell line) was induced by FUCas9Cherry plasmid, which was a gift from Marco Herold (Addgene plasmid &#x23; 70182, <ext-link ext-link-type="uri" xlink:href="http://n2t.net/addgene:70182">http://n2t.net/addgene:70182</ext-link>). Lentiviruses were produced by transient transfection of 293T cells with viral plasmids, along with gag-, pol-, and env-expressing plasmids (pMD2.G and psPAX2) using the calcium-phosphate method (<xref ref-type="bibr" rid="B11">Goyama et al., 2016</xref>). pMD2.G (Addgene plasmid &#x23;12259; <ext-link ext-link-type="uri" xlink:href="http://n2t.net/addgene:12259">http://n2t.net/addgene:12259</ext-link>) and psPAX2 (Addgene plasmid &#x23;12260; <ext-link ext-link-type="uri" xlink:href="http://n2t.net/addgene:12260">http://n2t.net/addgene:12260</ext-link>) were gifts from Didier Trono. The sequences for the sgRNAs are as follows: NT: 5&#x2032;-cgc&#x200b;ttc&#x200b;cgc&#x200b;ggc&#x200b;ccg&#x200b;ttc&#x200b;aa-3&#x2032;, <italic>COPS5</italic>-sgRNA-1: 5&#x2032;-gtg&#x200b;atg&#x200b;cat&#x200b;gcc&#x200b;aga&#x200b;tcg&#x200b;gg-3&#x2032;, <italic>COPS5</italic>-sgRNA-2: 5&#x2032;-caa&#x200b;caa&#x200b;gaa&#x200b;caa&#x200b;tat&#x200b;ccg&#x200b;ca-3&#x2032;.</p>
</sec>
<sec id="s2-3">
<title>2.3 Cell culture and CRISPR/Cas9-mediated gene knockout</title>
<p>The lymphoma cell lines Raji and SLVL were cultured in RPMI1640 medium supplemented with 10% fetal bovine serum (FBS) and 1% penicillin. 293T cells (CRL-11268, ATCC, Manassas, VA, United States) were cultured in Dulbecco&#x2019;s modified Eagle&#x2019;s medium supplemented with 10% FBS and 1% penicillin. These cells were first transduced with the FUCas9Cherry, followed by sorting of mCherry<sup>&#x2b;</sup> cells using BD FACSAriaIII (BD Biosciences, San Jose, CA, United States). The Cas9-expressing (mCherry<sup>&#x2b;</sup>) Raji and SLVL cells were then transduced with the sgRNAs co-expressing tRFP657. The frequency of tRFP657<sup>&#x2b;</sup> cells in the cultures was evaluated on Day3, Day6, Day10, Day17 and Day24.</p>
<p>Cells were sorted by FACS Aria (BD Biosciences, San Jose, CA, United States), and the expression of mCherry and tRFP657 was analyzed with FACS CytoFLEX (Beckman Coulter, Brea, California, United States). The cytometry data were analyzed by BD FlowJo software (TREESTAR, Inc., San Carlos, CA. ver.10.8.1).</p>
</sec>
<sec id="s2-4">
<title>2.4 Joint non-negative matrix factorization (JNMF)</title>
<p>JNMF, an extension of traditional NMF algorithm, is designed to facilitate the simultaneous decomposition of <italic>N</italic> datasets (<xref ref-type="bibr" rid="B30">Zhang et al., 2012</xref>). The objective function of JNMF is given by:<disp-formula id="equ2">
<mml:math id="m2">
<mml:mrow>
<mml:mi mathvariant="italic">min</mml:mi>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:msubsup>
</mml:mstyle>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mfenced open="&#x2016;" close="&#x2016;" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>W</mml:mi>
<mml:mi>H</mml:mi>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi>F</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>where <italic>X</italic>
<sub>
<italic>i</italic>
</sub> is an input matrix with size <italic>m</italic>&#xd7;<italic>n</italic>
<sub>
<italic>i</italic>
</sub>, and <italic>F</italic> represents the Frobenius norm. The <italic>W</italic> and <italic>H</italic>
<sub>
<italic>i</italic>
</sub> represent <italic>m</italic>&#xd7;<italic>k</italic> and <italic>k</italic>&#xd7;<italic>n</italic>
<sub>
<italic>i</italic>
</sub> factorized matrices, respectively. Here, <italic>k</italic> is the number of clusters to be extracted, namely, common pattern modules (CPMs). To find the optimal <italic>W</italic> and <italic>Hi</italic> minimizing the objective function, JNMF updates them based on the traditional multiplication update formulas (<xref ref-type="bibr" rid="B30">Zhang et al., 2012</xref>) as follows:<disp-formula id="equ3">
<mml:math id="m3">
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>J</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:msubsup>
</mml:mstyle>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>J</mml:mi>
</mml:msub>
<mml:msubsup>
<mml:mi>H</mml:mi>
<mml:mi>J</mml:mi>
<mml:mi>T</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:msubsup>
</mml:mstyle>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mi>K</mml:mi>
</mml:msub>
<mml:msubsup>
<mml:mi>H</mml:mi>
<mml:mi>K</mml:mi>
<mml:mi>T</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="equ4">
<mml:math id="m4">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mi>I</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mi>I</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mi>T</mml:mi>
</mml:msup>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>I</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mi>T</mml:mi>
</mml:msup>
<mml:msub>
<mml:mrow>
<mml:mi>W</mml:mi>
<mml:mi>H</mml:mi>
</mml:mrow>
<mml:mi>I</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
<mml:mi>I</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>N</mml:mi>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
<p>To handle missing values in the input matrices, we employed a weighted NMF approach (<xref ref-type="bibr" rid="B30">Zhang et al., 2012</xref>): a mask matrix <italic>M</italic> representing 1 for non-missing and 0 for missing cases is introduced into the JNMF framework. <italic>X</italic> is accessed by the Hadamard product with <italic>M</italic> effectively filtering the influence of missing values (<xref ref-type="bibr" rid="B10">Fujita et al., 2018</xref>). The objective function and multiplicative update rules of the weighted JNMF are given by:<disp-formula id="equ5">
<mml:math id="m5">
<mml:mrow>
<mml:mi mathvariant="italic">min</mml:mi>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:msubsup>
</mml:mstyle>
<mml:msubsup>
<mml:mrow>
<mml:mfenced open="&#x2016;" close="&#x2016;" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2218;</mml:mo>
<mml:mi>X</mml:mi>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>W</mml:mi>
<mml:mi>H</mml:mi>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi>F</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="equ6">
<mml:math id="m6">
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>J</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:msubsup>
</mml:mstyle>
<mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2218;</mml:mo>
<mml:mi>X</mml:mi>
</mml:mrow>
<mml:mi>J</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msubsup>
<mml:mi>H</mml:mi>
<mml:mi>J</mml:mi>
<mml:mi>T</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:msubsup>
</mml:mstyle>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2218;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>W</mml:mi>
<mml:mi>H</mml:mi>
</mml:mrow>
<mml:mi>K</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msubsup>
<mml:mi>H</mml:mi>
<mml:mi>K</mml:mi>
<mml:mi>T</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="equ7">
<mml:math id="m7">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mi>I</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mi>I</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mi>T</mml:mi>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2218;</mml:mo>
<mml:mi>X</mml:mi>
</mml:mrow>
<mml:mi>I</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mi>T</mml:mi>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2218;</mml:mo>
<mml:mi>W</mml:mi>
<mml:mi>H</mml:mi>
</mml:mrow>
<mml:mi>I</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
<mml:mi>I</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>N</mml:mi>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>where <inline-formula id="inf1">
<mml:math id="m8">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mo>&#x2218;</mml:mo>
<mml:mi>B</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is Hadamard product.</p>
</sec>
<sec id="s2-5">
<title>2.5 Hyperparameter optimization</title>
<p>Given a factorization rank <italic>k</italic>, JNMF starts with random <italic>W</italic> and <italic>Hs</italic> and updates the random matrices toward minimizing the objective function for <italic>n</italic> iterations step-by-step. To tune the hyperparameters, whether the procedure with <italic>k</italic> and <italic>n</italic> stably convergent in repeating <italic>t</italic> times is monitored. A consensus matrix and its cophenetic correlation coefficient (CCC) (<xref ref-type="bibr" rid="B3">Brunet et al., 2004</xref>) evaluate the performance of JNMF under the setting of <italic>k</italic> and <italic>n</italic>.</p>
</sec>
<sec id="s2-6">
<title>2.6 Selection of features for each CPM in each H matrix</title>
<p>In analyzing the six input matrices of CCLE datasets, our JNMF produces seven matrices: <italic>W</italic>, <italic>H</italic>
<sub>
<italic>1</italic>
</sub>, <italic>H</italic>
<sub>
<italic>2</italic>
</sub>, <italic>H</italic>
<sub>
<italic>3</italic>
</sub>, <italic>H</italic>
<sub>
<italic>4</italic>
</sub>, <italic>H</italic>
<sub>
<italic>5</italic>
</sub>, and <italic>H</italic>
<sub>
<italic>6</italic>
</sub>. For normalizing the matrices, rather than using the maximum values, z-score normalization was applied to each row and column of <italic>W</italic> and <italic>H</italic>s as follows:<disp-formula id="equ8">
<mml:math id="m9">
<mml:mrow>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>&#x3bc;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:msub>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>where <inline-formula id="inf2">
<mml:math id="m10">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3bc;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf3">
<mml:math id="m11">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> stand the average and the standard deviation for cell line/drug/mutation/CNV/genes/cancer type feature <italic>j</italic>, respectively. The feature <italic>j</italic> is assigned to CPMs if and only if <inline-formula id="inf4">
<mml:math id="m12">
<mml:mrow>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is &#x3e; &#x2b;1.96.</p>
</sec>
<sec id="s2-7">
<title>2.7 Pathway analysis</title>
<p>To identify the activated pathways within each CPM obtained from JNMF, we employed IPA (Ingenuity Pathway Analysis) (<xref ref-type="bibr" rid="B13">Kr&#xe4;mer et al., 2014</xref>). IPA is a widely used tool for exploring signaling pathways and biological networks. Specifically, IPA&#x2019;s upstream analysis and protein-protein interaction analyses were performed to determine general regulators in CPM-activating pathways.</p>
</sec>
<sec id="s2-8">
<title>2.8 Correlation analysis</title>
<p>For examining the correlation between candidate biomarkers and known hub genes of DLBCL, we utilized GEPIA2 (Gene Expression Profiling Interactive Analysis 2) (<xref ref-type="bibr" rid="B24">Tang et al., 2019</xref>). GEPIA2 is a comprehensive web-based tool that integrates data from the TCGA and GTEx databases. Specifically, we used the &#x201c;correlation analysis&#x201d; module within GEPIA2, utilizing the TCGA-DLBCL project dataset, to assess the correlation between the candidate biomarkers and the known hub genes of DLBCL.</p>
</sec>
<sec id="s2-9">
<title>2.9 Survival analysis</title>
<p>The Kaplan-Meier method was used to investigate whether the candidate biomarkers in GSE69049 datasets affected the overall survival (OS) of DLBCL patients treated with chemotherapy. The &#x201c;survminer&#x201d; R package was used to explore survival analysis.</p>
</sec>
</sec>
<sec sec-type="results" id="s3">
<title>3 Results</title>
<sec id="s3-1">
<title>3.1 Overall workflow of the proposed method</title>
<p>We designed a method to find significant sets of multimodal factors (i.e., modules) that have the potency to characterize disease phenotypes. Since multimodality comprises multiple high-dimensional heterogeneous data, we adopted JNMF, a well-proven algorithm for clustering the factors as modules, by modifying it to handle data sparseness efficiently, referred to as weighted JNMF. To interpret the functional importance of the JNMF-detected modules, namely, the common pattern module (CPM), we utilized a pathway analysis by detecting upstream regulators in signaling pathways activated by the modules (<xref ref-type="fig" rid="F1">Figure 1</xref>).</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Overall workflow of our analysis. The figure outlines a systematic approach for identifying candidate biomarkers in cancer research, starting with the collection of multimodal data. These data were analyzed using joint non-negative matrix factorization to identify common pattern modules and combined with pathway analysis to highlight potential biomarkers at the intersection of different data types. Then, these biomarkers were clinically validated by correlation and survival analyses using TCGA and GEO data, and lastly, experimentally validated using CRISPR/Cas9 system to determine their promising applications in cancer cells.</p>
</caption>
<graphic xlink:href="fgene-15-1407765-g001.tif"/>
</fig>
<p>To perform the biomarker discovery using the proposed method, we complied six feature matrices for 504 cancer cell lines by processing the large-scale CCLE datasets, each of which has binary or continuous values: gene expression, CNV amplification, CNV loss, DNA mutation, pharmacologic sensitivity, and cancer type (<xref ref-type="sec" rid="s10">Supplementary Table S1</xref>). Using these matrices as inputs, the weighted JNMF masks sparse elements and generates a factor matrix <italic>W</italic> given by latent coefficients for the cancer types in the reduced <italic>k</italic>-dimensional space (i.e., rank). Simultaneously, the six input matrices are reduced to the <italic>k</italic> dimension generating the matrices <italic>H</italic>
<sub>
<italic>i</italic>
</sub> (<italic>i</italic> &#x3d; 1, &#x2026; ,6), where each product with <italic>W</italic> approximates the original input matrix. Given optimal <italic>W</italic> and <italic>H</italic>s, significant feature sets (&#x3e;&#x2b;1.96 in z-score) are captured as CPMs by investigating z-score distributions in the factorized matrices.</p>
<p>Subsequently, the gene expression profiles in the JNMF-detected CPMs are analyzed by IPA to identify activated pathways. Meanwhile, the disease subtypes corresponding to the CPM are identified by combining characteristic drugs, DNA mutations, and structural variants in the CPM. Moreover, through IPA upstream analysis and IPA causal network analysis, the upstream regulators of CPMs which are considered candidate biomarkers are investigated. Next, the correlation and survival analyses of candidate biomarkers using TCGA and GEO data examine the clinical significance of the candidate biomarkers. Ultimately, the candidate biomarkers are experimentally validated by the CRISPR-Cas9 system.</p>
</sec>
<sec id="s3-2">
<title>3.2 Assessing the robustness of JNMF</title>
<p>To interrogate the ability of our JNMF, we prepared three artificial datasets as used in a previous study (<xref ref-type="bibr" rid="B10">Fujita et al., 2018</xref>). We first constructed three matrices with random values imprecating noise: a binary matrix for mutation and continuous matrices for pharmacologic sensitivity and gene expression. Then, we inserted missing values into randomly selected 10% of the entries of each matrix: the missing rates in CCLE datasets were 2.6%&#x2013;10.5%. Next, we embedded three or four predefined CPMs into the matrices and randomly shuffled the entries in each matrix. Thereby, three artificial input data that are noisy and sparse but include modules were generated (<xref ref-type="sec" rid="s10">Supplementary Table S2</xref>, <xref ref-type="fig" rid="F2">Figure 2</xref>).</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>JNMF clustering results of simulated data. JNMF was utilized to identify CPMs embedded in simulated pharmacologic sensitivity, mutation, and expression matrix. The continuous simulated pharmacologic sensitivity matrix X<sub>1</sub> comprises four modules alongside missing values. The continuous simulated expression matrix X<sub>2</sub> comprises three modules alongside missing rows. The binary simulated mutation matrix X<sub>3</sub> comprises three modules alongside missing rows. The Gaussian noise is introduced into X<sub>1</sub> and X<sub>2</sub> matrix. The value in X<sub>3</sub> is partially reverse as the noise. The grey parts represent the missing value.</p>
</caption>
<graphic xlink:href="fgene-15-1407765-g002.tif"/>
</fig>
<p>Since the embedded CPMs that our JNMF has to detect were three or four, we set the rank <italic>k</italic> &#x3d; 4. As shown in <xref ref-type="fig" rid="F2">Figure 2</xref>, our JNMF successfully identified the CPMs by decomposing the input matrices into <italic>W</italic>, <italic>H</italic>
<sub>
<italic>1</italic>
</sub>, <italic>H</italic>
<sub>
<italic>2</italic>
</sub>, and <italic>H</italic>
<sub>
<italic>3</italic>
</sub>. In addition, the products <italic>WH</italic>
<sub>
<italic>1</italic>
</sub>, <italic>WH</italic>
<sub>
<italic>2</italic>
</sub>, and <italic>WH</italic>
<sub>
<italic>3</italic>
</sub> accurately restored the input matrices. This result demonstrates that our JNMF can uncover hidden relationships within high-dimensional multimodal datasets by reducing the influence of noise and missing values.</p>
</sec>
<sec id="s3-3">
<title>3.3 Identifying CPMs corresponding to cancer types</title>
<p>To optimize the hyperparameters of JNMF for the six input matrices prepared from CCLE gene expression, CNV amplification, CNV loss, DNA mutation, pharmacologic sensitivity, and cancer cell line type, we investigated the convergence of JNMF during 2000 iterations updating W and Hs (<xref ref-type="fig" rid="F3">Figure 3A</xref>). We noticed that the outputs of JNMF are stable (CCC &#x3d; 0.72) when <italic>k</italic> &#x3d; 40 and manifest substantial consistency across 10 repeats (<xref ref-type="fig" rid="F3">Figure 3B</xref>). Using these hyperparameters, we finally retrieved 40 CPMs corresponding to specific cancer types, such as CPM &#x23;1 for hematopoietic and lymphoid malignancies, CPM &#x23;7 for breast cancer, CPM &#x23;10 for malignant melanoma, and CPM &#x23;28 for endometrial cancer (<xref ref-type="table" rid="T1">Table1</xref>; <xref ref-type="fig" rid="F3">Figure 3C</xref>).</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>JNMF identifies biological features in multimodal data <bold>(A)</bold> Convergence curve showing the trajectory of JNMF objective function convergence under 2000 iterations <bold>(B)</bold> Consensus matrix of W showing the reproducibility of JNMF result in 10 repeated trials. The blue rectangles represent clusters of the cell lines highly reproduced. <bold>(C)</bold> The distribution of cancer types in CPMs. The blue rectangles represent the specificity of each cancer in the CPMs. Darker blue corresponds to stronger specificity. CPM &#x23;1 shows high specificity for hematopoietic and lymphoid malignancies.</p>
</caption>
<graphic xlink:href="fgene-15-1407765-g003.tif"/>
</fig>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Summary of key features in CPMs.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">CPM</th>
<th align="left">Drug in CPM</th>
<th align="left">Genetic features in CPM</th>
<th align="left">Cell lines in CPM</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">CPM &#x23;1</td>
<td align="left">AEW541</td>
<td align="left">Mutation (BCL2, BCL6, IGF1R, MTOR, MYC, MYD88, PI3KR1, PTEN, SPEN, STAT3, TP53)</td>
<td align="left">Lymphoma</td>
</tr>
<tr>
<td align="left">CPM &#x23;7</td>
<td align="left">Lapatinib</td>
<td align="left">BRACA2 mutation, HER2 amplification, HER2 overexpression</td>
<td align="left">Breast Cancer</td>
</tr>
<tr>
<td align="left">CPM &#x23;10</td>
<td rowspan="2" align="left">PLX4720</td>
<td align="left">BRAF mutation, MITF amplification</td>
<td align="left">Skin Cancer</td>
</tr>
<tr>
<td align="left">CPM &#x23;28</td>
<td align="left">TP53 mutation, PTEN mutation, Overexpression (MLH1, MSH2, MSH6, and PMS2)</td>
<td align="left">Endometrium cancer</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The CPM &#x23;1 contained <italic>TP53</italic> mutation and the malignant translocations of <italic>MYC</italic>, <italic>BCL2</italic>, and <italic>BCL6</italic>, which are relevant to DLBCL (<xref ref-type="bibr" rid="B5">Chapuy et al., 2018</xref>). The CPM &#x23;7 included the pharmaceutical agent Lapatinib, <italic>HER2</italic> overexpression, and <italic>BRCA2</italic> amplification, which is supported by the clinical application of Lapatinib for treating <italic>HER2</italic>-positive metastatic breast cancer (<xref ref-type="bibr" rid="B28">Xu et al., 2021</xref>). The CPM &#x23;10 covering almost the entire skin cancer cell lines included the pharmaceutical agent PLX4720 (vemurafenib) and <italic>BRAF</italic> mutation; the efficacy of vemurafenib has been tested in several clinical trials for treating unresectable or metastatic melanoma with <italic>BRAF</italic> V600E mutation (<xref ref-type="bibr" rid="B4">Chapman et al., 2011</xref>). The CPM &#x23;28 confined several known diagnostic and prognostic biomarkers of endometrial cancer, such as the mismatch repair mutation, and the overexpression of <italic>MLH1</italic>, <italic>MSH2</italic>, <italic>MSH6</italic>, and <italic>PMS2</italic>. In addition, this module included the overexpression and mutation of <italic>PTEN</italic> and <italic>TP53</italic>, which significantly contribute to the diagnosis of endometrial cancer (<xref ref-type="bibr" rid="B9">Crosbie et al., 2022</xref>).</p>
<p>Collectively, we confirmed that the CPMs are in high concordance with known relationships among variants, medications, and cancers, which suggests the potency of our approach to the discovery of novel biomarkers from multimodal data.</p>
</sec>
<sec id="s3-4">
<title>3.4 Analyzing biological pathways activated by DLBCL-related CPM</title>
<p>To interpret the functionality of JNMF-detected CPMs, we focused on the CPM &#x23;1 that includes DLBCL biomarkers (<xref ref-type="table" rid="T1">Table 1</xref>). Notably, this module also contained several gene mutations, each of which is known to be involved in pathogenic pathways: <italic>MYD88</italic> and <italic>CARD11</italic> functioning in the NF-&#x3ba;B pathway, <italic>SPEN</italic> involved in the NOTCH signaling, and <italic>STAT3</italic> which is a pivotal member of the JAK/STAT signaling pathway (<xref ref-type="bibr" rid="B4">Chapman et al., 2011</xref>).</p>
<p>Next, we performed a pathway analysis of IPA with the gene expression profile of CPM &#x23;1. Consistent with the pathways in which the mutated genes are involved, we found the activation of several signaling pathways of DLBCL (<italic>p</italic> &#x3c; 0.05). For example, the activation of the NF-&#x3ba;B pathway causing DLBCL (<xref ref-type="bibr" rid="B18">Odqvist et al., 2014</xref>), the deregulation of the JAK-STAT pathway and PI3K-mediated signaling pathway which is the essential contributor to the pathogenesis and poor prognosis of DLBCL (<xref ref-type="bibr" rid="B5">Chapuy et al., 2018</xref>) (<xref ref-type="sec" rid="s10">Supplementary Figure S1</xref>). In addition, the sub-networks centered on <italic>MYC</italic>, <italic>TP53</italic>, and NF-&#x3ba;B indicated the activation of downstream pathways potentially relevant to DLBCL development (<xref ref-type="sec" rid="s10">Supplementary Figure S2</xref>). Lastly, by performing the IPA upstream analysis and IPA causal network analysis, we investigated upstream regulators (<italic>p</italic> &#x3c; 0.05) likely controlling the gene expression of CPM &#x23;1 (<xref ref-type="table" rid="T2">Tables 2</xref>).</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Top five genes in IPA upstream regulators and causal network analyses.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th colspan="2" align="left">IPA upstream regulators analysis</th>
<th colspan="2" align="left">IPA causal network analysis</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Gene</td>
<td align="left">
<italic>p</italic>-value</td>
<td align="left">Gene</td>
<td align="left">
<italic>p</italic>-value</td>
</tr>
<tr>
<td align="left">COPS5</td>
<td align="left">2.58E-25</td>
<td align="left">COPS5</td>
<td align="left">1.06E-25</td>
</tr>
<tr>
<td align="left">E2F4</td>
<td align="left">1.38E-20</td>
<td align="left">TFEB</td>
<td align="left">4.23E-16</td>
</tr>
<tr>
<td align="left">UQCC3</td>
<td align="left">7.53E-18</td>
<td align="left">NUPR1</td>
<td align="left">3.80E-14</td>
</tr>
<tr>
<td align="left">TFEB</td>
<td align="left">4.23E-16</td>
<td align="left">BCR</td>
<td align="left">4.31E-11</td>
</tr>
<tr>
<td align="left">NUPR1</td>
<td align="left">2.57E-14</td>
<td align="left">RPL11</td>
<td align="left">3.09E-10</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Interestingly, <italic>COPS5</italic>, a subunit of the COP9 signalosome complex, was identified from both analyses and showed regulatory interactions with the overexpressed genes in the CPM &#x23;1 including known DLBCL markers (<xref ref-type="fig" rid="F4">Figure 4</xref>). This result supports that <italic>COPS5</italic> overexpression affects tumor-negative regulators in diverse cancers (<xref ref-type="bibr" rid="B26">Wang et al., 2016</xref>). For example, <italic>COPS5</italic> activates <italic>MYC</italic> by mediating the SKP1-CUL1-F-box protein complex (<xref ref-type="bibr" rid="B16">Lyapina et al., 2001</xref>). Also, <italic>COPS5</italic> alters cytoplasmic localizations of <italic>TP53</italic> and induces the degradation of <italic>TP53</italic> (<xref ref-type="bibr" rid="B14">Li et al., 2013</xref>). Moreover, <italic>COPS5</italic> is co-expressed with <italic>STAT3</italic> in cancers (<xref ref-type="bibr" rid="B17">Nishimoto et al., 2013</xref>), and <italic>MYC</italic> mediates this co-regulation associated with poor prognosis (<xref ref-type="bibr" rid="B19">Ok et al., 2014</xref>).</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>COPS5-targetting genes in the CPM &#x23;1. Arrows indicate that <italic>COPS5</italic> directly regulates the expression of connected genes.</p>
</caption>
<graphic xlink:href="fgene-15-1407765-g004.tif"/>
</fig>
<p>Taken together, despite the considerable heterogeneity among DLBCL subtypes (<xref ref-type="bibr" rid="B22">Schmitz et al., 2018</xref>), the CPM &#x23;1 collectively retained the distinctive characteristics of DLBCL, which emphasizes the importance of understanding the orchestration of multiple oncogenic factors. Moreover, we identified <italic>COPS5</italic> using the information of this module as a potential upstream regulator of DLBCL, requiring further validations.</p>
</sec>
<sec id="s3-5">
<title>3.5 Inferring the impact of COPS5 in DLBCL</title>
<p>Using the gene expression profiles of 47 DLBCL patients available at TCGA, we sought to confirm the importance of <italic>COPS5</italic> in DLBCL patients. Consistent with the results of CCLE data analysis, we observed the positive expression correlation between <italic>COPS5</italic> and the marker genes, as well as between <italic>TP53</italic> and <italic>MYC</italic> (<xref ref-type="fig" rid="F5">Figure 5A</xref>). It is noteworthy to mention that all the positive correlations have been reported by previous <italic>in vitro</italic> studies (<xref ref-type="bibr" rid="B1">Adler et al., 2006</xref>; <xref ref-type="bibr" rid="B23">Sitte et al., 2012</xref>; <xref ref-type="bibr" rid="B14">Li et al., 2013</xref>; <xref ref-type="bibr" rid="B17">Nishimoto et al., 2013</xref>; <xref ref-type="bibr" rid="B15">Luo et al., 2022</xref>). Interestingly, even <italic>BCL6</italic>, <italic>MYC</italic>, and <italic>TP53</italic> were grouped in the CPM &#x23;1, their expression correlations in the patients were relatively less as shown in <xref ref-type="fig" rid="F5">Figure 5A</xref>, E.g., R &#x3d; 0.22 between <italic>BCL6</italic> and <italic>TP53</italic>, R &#x3d; 0.24 between <italic>BCL6</italic> and <italic>MYC</italic>. This result might reflect the high heterogeneity of DLBCL. Therefore, <italic>COPS5</italic> may be a key hub gene that efficiently characterizes heterogeneous DLBCL by co-expressing with the marker genes.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>
<bold>(A)</bold> The correlation scatter plots between CPM &#x23;1 overexpression genes in DLBCL patients. <bold>(B)</bold> Kaplan-Meier curve indicating the effect of high (red) and low (blue) expression of <italic>COPS5</italic> on the overall survival (OS) of DLBCL patients. <bold>(C)</bold> Changes in the frequency of Raji and SLVL cells cultured with <italic>COPS5</italic> depletions compared to controls (NT). The cells were transduced with Cas9 together with non-targeting (NT) or <italic>COPS5</italic>-targeting sgRNAs (<italic>COPS5</italic>-KO&#x23;1 and <italic>COPS5</italic>-KO &#x23;2) co-expressing tRFP657. The frequency of tRFP657&#x2b; cells was normalized to the frequency of tRFP657&#x2b; cells at day 3.</p>
</caption>
<graphic xlink:href="fgene-15-1407765-g005.tif"/>
</fig>
<p>Next, we performed the survival analysis to inspect the relationship of <italic>COPS5</italic> with the prognosis of DLBCL patients. As shown in <xref ref-type="fig" rid="F5">Figure 5B</xref>, the Kaplan-Meier curve exhibited that the high expression of <italic>COPS5</italic> is associated with poor prognosis in the overall survival of the patients treated with chemotherapy alone (<italic>p</italic> &#x3d; 0.0168). This result supports that the over expression of <italic>COPS5</italic> promotes malignancy.</p>
<p>Finally, Since <italic>COPS5</italic> has been shown to be associated with the proliferation of DLBCL in several cell lines derived from subtypes of DLBCL (<xref ref-type="bibr" rid="B20">Pulvino et al., 2015</xref>), to further determine the role of <italic>COPS5</italic> in B cell lymphoma, we depleted <italic>COPS5</italic> in the B cell lymphoma cell lines Raji and SLVL using the CRISPR/Cas9 system. Raji and SLVL cells were transduced with Cas9 together with tRFP657-coexpressing non-targeting (NT) or <italic>COPS5-</italic>targeting sgRNAs. As shown in <xref ref-type="fig" rid="F5">Figure 5C</xref>, <italic>COPS5</italic> depletion showed a strong growth-inhibitory effect in Raji and SLVL cells. These results confirm the key role of <italic>COPS5</italic> in the proliferation of malignant B cells (<xref ref-type="sec" rid="s10">Supplementary Figure S3</xref>).</p>
</sec>
</sec>
<sec sec-type="discussion" id="s4">
<title>4 Discussion</title>
<p>In this study, we proposed a JNMF-based method that integrates sparse multimodal data and reduces their higher dimensionality into interpretable lower dimensions. Our method captures CPMs grouping potentially relevant heterogeneous modalities and utilizes IPA for interpreting the biological importance of CPMs. Hence, contrary to traditional correlation-based predictions, our approach combines machine learning results with biological knowledge for rigorous inference, designed to uncover hidden relationships within intricate multimodality.</p>
<p>We applied the method to speculate key factors responsible for drug sensitivity in various cancers using CCLE datasets which provide plenty of genotype and phenotype annotations. Consequently, we successfully retrieved CPMs comprising CNVs, genomic mutations, and medications, characterizing the cancer types: for example, a metastatic breast cancer-related module showing the known relationship among the drug Lapatinib and gene mutations on <italic>HER2</italic>, and <italic>BRCA2</italic>, and a skin cancer-related module containing the BRAF-inhibitor PLX4720 and <italic>BRAF</italic> mutation. Unexpectedly, the CPM related to lymphoma contained <italic>COPS5</italic> co-overexpressing with DLBCL marker genes, e.g., <italic>MYC</italic>, <italic>TP53</italic>, and <italic>STAT3</italic>, and located upstream of the relevant pathways. The functional importance of <italic>COPS5</italic> was also confirmed in DLBCL patients and knockout experiments, revealing the significant contribution to poor prognosis and cancer cell proliferation. Constrained by the number of patients in the database, this result may need to be further validated in a larger scale of patient data.</p>
<p>It has been reported that the expression of <italic>COPS5</italic> is a prerequisite for the <italic>MYC</italic> activity in breast cancer (<xref ref-type="bibr" rid="B12">Hou et al., 2017</xref>). On the other hand, <italic>COPS5</italic> or <italic>MYC</italic> alone is insufficient to activate genes crucial for tumor growth and invasion fully (<xref ref-type="bibr" rid="B1">Adler et al., 2006</xref>), indicating their cooperativity is indispensable in cancer development. Regarding the tumor suppressor <italic>TP53</italic> co-overexpressing with <italic>COPS5</italic> in the DLBCL-related CPM, since the CPM contained <italic>MYC</italic> and <italic>TP53</italic> mutations also, our results suggest the importance of understanding the orchestration of multimodal features. Indeed, it has been reported that patients with overexpression of <italic>TP53</italic> in the presence of <italic>TP53</italic> mutations display chemotherapy resistance and poor prognosis (<xref ref-type="bibr" rid="B14">Li et al., 2013</xref>). We expect that our CPM, particularly <italic>COPS5</italic>, collectively explains this gain-of-function mutation, which needs further investigation.</p>
<p>We recognize that our current model input matrices do not include delicate genomic features, including various structural variants, mutation zygosity, and gene colocalization analysis. Enhancing our model by incorporating these additional patterns and refining preprocessing steps might improve the JNMF outcomes, allowing us to reveal more intricate biological relationships. Such improvements would expand the depth and breadth of our methodology. Despite these limitations, the features currently included have successfully identified biologically meaningful biomarkers, demonstrating our approach&#x2019;s robust scalability. This validation underscores the reliability of our model and underscores its potential for adaptation and growth with the integration of new data and advanced techniques.</p>
<p>In conclusion, our integrative analysis handled the sparsity of large-scale multimodal datasets by effectively decomposing them and offers the functional relationships among the high-dimensional features in disease phenotypes. Our findings highlight that integrating complementary data will facilitate clinical biomarker discovery, greatly advancing precision oncology.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s5">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="sec" rid="s10">Supplementary Material</xref>, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec id="s6">
<title>Author contributions</title>
<p>YD: Writing&#x2013;original draft, Writing&#x2013;review and editing. JL: Methodology, Validation, Writing&#x2013;review and editing. KY: Methodology, Validation, Writing&#x2013;review and editing. SG: Methodology, Validation, Writing&#x2013;review and editing. ML: Writing&#x2013;original draft, Writing&#x2013;review and editing. S-JP: Writing&#x2013;original draft, Writing&#x2013;review and editing. KN: Writing&#x2013;original draft, Writing&#x2013;review and editing.</p>
</sec>
<sec sec-type="funding-information" id="s7">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research, authorship, and/or publication of this article. This work was supported by JST SPRING (JPMJSP2108 to YD) and AMED (23ck0106XXXh000X to SG).</p>
</sec>
<ack>
<p>Computational resources were provided by the supercomputer system SHIROKANE at the Human Genome Center, the Institute of Medical Science, the University of Tokyo.</p>
</ack>
<sec sec-type="COI-statement" id="s8">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
<p>The author(s) declared that they were an editorial board member of Frontiers, at the time of submission. This had no impact on the peer review process and the final decision.</p>
</sec>
<sec sec-type="disclaimer" id="s9">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s10">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fgene.2024.1407765/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fgene.2024.1407765/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="DataSheet1.ZIP" id="SM1" mimetype="application/ZIP" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Adler</surname>
<given-names>A. S.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Horlings</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Nuyten</surname>
<given-names>D. S. A.</given-names>
</name>
<name>
<surname>van de Vijver</surname>
<given-names>M. J.</given-names>
</name>
<name>
<surname>Chang</surname>
<given-names>H. Y.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>Genetic regulators of large-scale transcriptional signatures in cancer</article-title>. <source>Nat. Genet.</source> <volume>38</volume> (<issue>4</issue>), <fpage>421</fpage>&#x2013;<lpage>430</lpage>. <pub-id pub-id-type="doi">10.1038/ng1752</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bersanelli</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Mosca</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Remondini</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Giampieri</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Sala</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Castellani</surname>
<given-names>G.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>Methods for the integration of multi-omics data: mathematical aspects</article-title>. <source>BMC Bioinforma.</source> <volume>17</volume> (<issue>2</issue>), <fpage>S15</fpage>. <pub-id pub-id-type="doi">10.1186/s12859-015-0857-9</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Brunet</surname>
<given-names>J.-P.</given-names>
</name>
<name>
<surname>Tamayo</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Golub</surname>
<given-names>T. R.</given-names>
</name>
<name>
<surname>Mesirov</surname>
<given-names>J. P.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>Metagenes and molecular pattern discovery using matrix factorization</article-title>. <source>Proc. Natl. Acad. Sci. U. S. A.</source> <volume>101</volume> (<issue>12</issue>), <fpage>4164</fpage>&#x2013;<lpage>4169</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.0308531101</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chapman</surname>
<given-names>P. B.</given-names>
</name>
<name>
<surname>Hauschild</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Robert</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Haanen</surname>
<given-names>J. B.</given-names>
</name>
<name>
<surname>Ascierto</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Larkin</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2011</year>). <article-title>Improved survival with vemurafenib in melanoma with BRAF V600E mutation</article-title>. <source>N. Engl. J. Med.</source> <volume>364</volume> (<issue>26</issue>), <fpage>2507</fpage>&#x2013;<lpage>2516</lpage>. <pub-id pub-id-type="doi">10.1056/NEJMoa1103782</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chapuy</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Stewart</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Dunford</surname>
<given-names>A. J.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Kamburov</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Redd</surname>
<given-names>R. A.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>Molecular subtypes of diffuse large B cell lymphoma are associated with distinct pathogenic mechanisms and outcomes</article-title>. <source>Nat. Med.</source> <volume>24</volume> (<issue>5</issue>), <fpage>679</fpage>&#x2013;<lpage>690</lpage>. <pub-id pub-id-type="doi">10.1038/s41591-018-0016-8</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chiu</surname>
<given-names>A. M.</given-names>
</name>
<name>
<surname>Mitra</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Boymoushakian</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Coller</surname>
<given-names>H. A.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Integrative analysis of the inter-tumoral heterogeneity of triple-negative breast cancer</article-title>. <source>Sci. Rep.</source> <volume>8</volume> (<issue>1</issue>), <fpage>11807</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-018-29992-5</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cho</surname>
<given-names>H. J.</given-names>
</name>
<name>
<surname>Shu</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Bekiranov</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Interpretable meta-learning of multi-omics data for survival analysis and pathway enrichment</article-title>, <source>Bioinforma. Oxf. Engl.</source> <volume>39</volume> (<issue>4</issue>), <fpage>btad113</fpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btad113</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Collins</surname>
<given-names>F. S.</given-names>
</name>
<name>
<surname>Varmus</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>A new Initiative on precision medicine</article-title>. <source>N. Engl. J. Med.</source> <volume>372</volume> (<issue>9</issue>), <fpage>793</fpage>&#x2013;<lpage>795</lpage>. <pub-id pub-id-type="doi">10.1056/NEJMp1500523</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Crosbie</surname>
<given-names>E. J.</given-names>
</name>
<name>
<surname>Kitson</surname>
<given-names>S. J.</given-names>
</name>
<name>
<surname>McAlpine</surname>
<given-names>J. N.</given-names>
</name>
<name>
<surname>Mukhopadhyay</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Powell</surname>
<given-names>M. E.</given-names>
</name>
<name>
<surname>Singh</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Endometrial cancer</article-title>. <source>Lancet</source> <volume>399</volume> (<issue>10333</issue>), <fpage>1412</fpage>&#x2013;<lpage>1428</lpage>. <pub-id pub-id-type="doi">10.1016/S0140-6736(22)00323-3</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fujita</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Mizuarai</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Murakami</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Nakai</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Biomarker discovery by integrated joint non-negative matrix factorization and pathway signature analyses</article-title>. <source>Sci. Rep.</source> <volume>8</volume> (<issue>1</issue>), <fpage>9743</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-018-28066-w</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Goyama</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Schibler</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Gasilina</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Shrestha</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Link</surname>
<given-names>K. A.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>UBASH3B/Sts-1-CBL axis regulates myeloid proliferation in human preleukemia induced by AML1-ETO</article-title>. <source>Leukemia</source> <volume>30</volume> (<issue>3</issue>), <fpage>728</fpage>&#x2013;<lpage>739</lpage>. <pub-id pub-id-type="doi">10.1038/leu.2015.275</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hou</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Yuan</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Jiao</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Xing</surname>
<given-names>L.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>Increased Jab1/COPS5 is associated with therapeutic response and adverse outcome in lung cancer and breast cancer patients</article-title>. <source>Oncotarget</source> <volume>8</volume> (<issue>57</issue>), <fpage>97504</fpage>&#x2013;<lpage>97515</lpage>. <pub-id pub-id-type="doi">10.18632/oncotarget.22146</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kr&#xe4;mer</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Green</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Pollard</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Tugendreich</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Causal analysis approaches in Ingenuity pathway analysis</article-title>. <source>Bioinformatics</source> <volume>30</volume> (<issue>4</issue>), <fpage>523</fpage>&#x2013;<lpage>530</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btt703</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Gordon</surname>
<given-names>M. W.</given-names>
</name>
<name>
<surname>Xu-Monette</surname>
<given-names>Z. Y.</given-names>
</name>
<name>
<surname>Visco</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Tzankov</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Zou</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2013</year>). <article-title>Single nucleotide variation in the TP53 3&#x2032; untranslated region in diffuse large B-cell lymphoma treated with rituximab-CHOP: a report from the International DLBCL Rituximab-CHOP Consortium Program</article-title>. <source>Blood</source> <volume>121</volume> (<issue>22</issue>), <fpage>4529</fpage>&#x2013;<lpage>4540</lpage>. <pub-id pub-id-type="doi">10.1182/blood-2012-12-471722</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Luo</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Young</surname>
<given-names>K. H.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>HDAC inhibitor chidamide synergizes with venetoclax to inhibit the growth of diffuse large B-cell lymphoma via down-regulation of MYC, BCL2, and TP53 expression</article-title>. <source>J. Zhejiang Univ. Sci. B</source> <volume>23</volume> (<issue>8</issue>), <fpage>666</fpage>&#x2013;<lpage>681</lpage>. <pub-id pub-id-type="doi">10.1631/jzus.B2200016</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lyapina</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Cope</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Shevchenko</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Serino</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Tsuge</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2001</year>). <article-title>Promotion of NEDD-CUL1 conjugate cleavage by COP9 signalosome</article-title>. <source>Science</source>, <volume>292</volume> (<issue>5520</issue>), <fpage>1382</fpage>&#x2013;<lpage>1385</lpage>. <pub-id pub-id-type="doi">10.1126/science.1059780</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nishimoto</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Kugimiya</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Hosoyama</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Enoki</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>T. S.</given-names>
</name>
<name>
<surname>Hamano</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>JAB1 regulates unphosphorylated STAT3 DNA-binding activity through protein&#x2013;protein interaction in human colon cancer cells</article-title>. <source>Biochem. Biophysical Res. Commun.</source> <volume>438</volume> (<issue>3</issue>), <fpage>513</fpage>&#x2013;<lpage>518</lpage>. <pub-id pub-id-type="doi">10.1016/j.bbrc.2013.07.105</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Odqvist</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Montes-Moreno</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>S&#xe1;nchez-Pacheco</surname>
<given-names>R. E.</given-names>
</name>
<name>
<surname>Young</surname>
<given-names>K. H.</given-names>
</name>
<name>
<surname>Mart&#xed;n-S&#xe1;nchez</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Cereceda</surname>
<given-names>L.</given-names>
</name>
<etal/>
</person-group> (<year>2014</year>). <article-title>NF&#x3ba;B expression is a feature of both activated B-cell-like and germinal center B-cell-like subtypes of diffuse large B-cell lymphoma</article-title>. <source>Mod. Pathol.</source> <volume>27</volume> (<issue>10</issue>), <fpage>1331</fpage>&#x2013;<lpage>1337</lpage>. <pub-id pub-id-type="doi">10.1038/modpathol.2014.34</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ok</surname>
<given-names>C. Y.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Xu-Monette</surname>
<given-names>Z. Y.</given-names>
</name>
<name>
<surname>Tzankov</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Manyam</surname>
<given-names>G. C.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>L.</given-names>
</name>
<etal/>
</person-group> (<year>2014</year>). <article-title>Clinical Implications of Phosphorylated STAT3 expression in <italic>de novo</italic> diffuse large B-cell lymphoma</article-title>. <source>Clin. cancer Res. official J. Am. Assoc. Cancer Res.</source> <volume>20</volume> (<issue>19</issue>), <fpage>5113</fpage>&#x2013;<lpage>5123</lpage>. <pub-id pub-id-type="doi">10.1158/1078-0432.CCR-14-0683</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pulvino</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Oleksyn</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Compitello</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Rossi</surname>
<given-names>R.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Inhibition of COP9-signalosome (CSN) deneddylating activity and tumor growth of diffuse large B-cell lymphomas by doxycycline</article-title>. <source>Oncotarget</source> <volume>6</volume> (<issue>17</issue>), <fpage>14796</fpage>&#x2013;<lpage>14813</lpage>. <pub-id pub-id-type="doi">10.18632/oncotarget.4193</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Reel</surname>
<given-names>P. S.</given-names>
</name>
<name>
<surname>Reel</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Pearson</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Trucco</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Jefferson</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Using machine learning approaches for multi-omics data analysis: a review</article-title>. <source>Biotechnol. Adv.</source> <volume>49</volume>, <fpage>107739</fpage>. <pub-id pub-id-type="doi">10.1016/j.biotechadv.2021.107739</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schmitz</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Wright</surname>
<given-names>G. W.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>D. W.</given-names>
</name>
<name>
<surname>Johnson</surname>
<given-names>C. A.</given-names>
</name>
<name>
<surname>Phelan</surname>
<given-names>J. D.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J. Q.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>Genetics and pathogenesis of diffuse large B-cell lymphoma</article-title>. <source>N. Engl. J. Med.</source> <volume>378</volume> (<issue>15</issue>), <fpage>1396</fpage>&#x2013;<lpage>1407</lpage>. <pub-id pub-id-type="doi">10.1056/NEJMoa1801445</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sitte</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Gl&#xe4;sner</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Jellusova</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Weisel</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Panattoni</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Pardi</surname>
<given-names>R.</given-names>
</name>
<etal/>
</person-group> (<year>2012</year>). <article-title>JAB1 is essential for B cell development and germinal center formation and Inversely regulates Fas Ligand and Bcl6 expression</article-title>. <source>J. Immunol.</source> <volume>188</volume> (<issue>6</issue>), <fpage>2677</fpage>&#x2013;<lpage>2686</lpage>. <pub-id pub-id-type="doi">10.4049/jimmunol.1101455</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Kang</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>GEPIA2: an enhanced web server for large-scale expression profiling and interactive analysis</article-title>. <source>Nucleic Acids Res.</source> <volume>47</volume> (<issue>W1</issue>), <fpage>W556</fpage>&#x2013;<lpage>W560</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkz430</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tsimberidou</surname>
<given-names>A. M.</given-names>
</name>
<name>
<surname>Fountzilas</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Nikanjam</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Kurzrock</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Review of precision cancer medicine: Evolution of the treatment Paradigm</article-title>. <source>Cancer Treat. Rev.</source> <volume>86</volume>, <fpage>102019</fpage>. <pub-id pub-id-type="doi">10.1016/j.ctrv.2020.102019</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>J.-N.</given-names>
</name>
<name>
<surname>Pei</surname>
<given-names>D.-S.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>The emerging roles of Jab1/CSN5 in cancer</article-title>. <source>Med. Oncol.</source> <volume>33</volume> (<issue>8</issue>), <fpage>90</fpage>. <pub-id pub-id-type="doi">10.1007/s12032-016-0805-1</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Witten</surname>
<given-names>D. M.</given-names>
</name>
<name>
<surname>Tibshirani</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Survival analysis with high-dimensional covariates</article-title>. <source>Stat. methods Med. Res.</source> <volume>19</volume> (<issue>1</issue>), <fpage>29</fpage>&#x2013;<lpage>51</lpage>. <pub-id pub-id-type="doi">10.1177/0962280209105024</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Yan</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Feng</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Ouyang</surname>
<given-names>Q.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Pyrotinib plus capecitabine versus lapatinib plus capecitabine for the treatment of HER2-positive metastatic breast cancer (PHOEBE): a multicentre, open-label, randomised, controlled, phase 3 trial</article-title>. <source>Lancet Oncol.</source> <volume>22</volume> (<issue>3</issue>), <fpage>351</fpage>&#x2013;<lpage>360</lpage>. <pub-id pub-id-type="doi">10.1016/S1470-2045(20)30702-6</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Michailidis</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>A non-negative matrix factorization method for detecting modules in heterogeneous omics multi-modal data</article-title>. <source>Bioinformatics</source> <volume>32</volume> (<issue>1</issue>), <fpage>1</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btv544</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>C. C.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Laird</surname>
<given-names>P. W.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>X. J.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Discovery of multi-dimensional modules by integrative analysis of cancer genomic data</article-title>. <source>Nucleic Acids Res.</source> <volume>40</volume> (<issue>19</issue>), <fpage>9379</fpage>&#x2013;<lpage>9391</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gks725</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>