<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Genet.</journal-id>
<journal-title>Frontiers in Genetics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Genet.</abbrev-journal-title>
<issn pub-type="epub">1664-8021</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1095330</article-id>
<article-id pub-id-type="doi">10.3389/fgene.2023.1095330</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Genetics</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>3PNMF-MKL: A non-negative matrix factorization-based multiple kernel learning method for multi-modal data integration and its application to gene signature detection</article-title>
<alt-title alt-title-type="left-running-head">Mallik et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fgene.2023.1095330">10.3389/fgene.2023.1095330</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Mallik</surname>
<given-names>Saurav</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/635395/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Sarkar</surname>
<given-names>Anasua</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2121076/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Nath</surname>
<given-names>Sagnik</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Maulik</surname>
<given-names>Ujjwal</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Das</surname>
<given-names>Supantha</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1771531/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Pati</surname>
<given-names>Soumen Kumar</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Ghosh</surname>
<given-names>Soumadip</given-names>
</name>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Zhao</surname>
<given-names>Zhongming</given-names>
</name>
<xref ref-type="aff" rid="aff6">
<sup>6</sup>
</xref>
<xref ref-type="aff" rid="aff7">
<sup>7</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/34852/overview"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Department of Environmental Health</institution>, <institution>Harvard T H Chan School of public Health</institution>, <addr-line>Boston</addr-line>, <addr-line>MA</addr-line>, <country>United States</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Department of Computer Science &#x26; Engineering</institution>, <institution>Jadavpur University</institution>, <addr-line>Kolkata</addr-line>, <country>India</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Department of Information Technology</institution>, <institution>Academy of Technology</institution>, <addr-line>Hooghly</addr-line>, <addr-line>West Bengal</addr-line>, <country>India</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Department of Bioinformatics</institution>, <institution>Maulana Abul Kalam Azad University</institution>, <addr-line>Kolkata</addr-line>, <addr-line>West Bengal</addr-line>, <country>India</country>
</aff>
<aff id="aff5">
<sup>5</sup>
<institution>Department of Computer Science &#x26; Engineering</institution>, <institution>Sister Nivedita University</institution>, <addr-line>New Town</addr-line>, <addr-line>West Bengal</addr-line>, <country>India</country>
</aff>
<aff id="aff6">
<sup>6</sup>
<institution>Human Genetics Center</institution>, <institution>School of Public Health</institution>, <institution>The University of Texas Health Science Center at Houston</institution>, <addr-line>Houston</addr-line>, <addr-line>TX</addr-line>, <country>United States</country>
</aff>
<aff id="aff7">
<sup>7</sup>
<institution>Center for Precision Health</institution>, <institution>School of Biomedical Informatics</institution>, <institution>The University of Texas Health Science Center at Houston</institution>, <addr-line>Houston</addr-line>, <addr-line>TX</addr-line>, <country>United States</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/522202/overview">Andrei Rodin</ext-link>, City of Hope National Medical Center, United States</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/838718/overview">Chunhou Zheng</ext-link>, Anhui University, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1551848/overview">Loveleen Gaur</ext-link>, Amity University, India</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Saurav Mallik, <email>sauravmtech2@gmail.com</email>, <email>smallik@hsph.harvard.edu</email>; Zhongming Zhao, <email>zhongming.zhao@uth.tmc.edu</email>
</corresp>
<fn fn-type="other">
<p>This article was submitted to Computational Genomics, a section of the journal Frontiers in Genetics</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>14</day>
<month>02</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2023</year>
</pub-date>
<volume>14</volume>
<elocation-id>1095330</elocation-id>
<history>
<date date-type="received">
<day>11</day>
<month>11</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>30</day>
<month>01</month>
<year>2023</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2023 Mallik, Sarkar, Nath, Maulik, Das, Pati, Ghosh and Zhao.</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Mallik, Sarkar, Nath, Maulik, Das, Pati, Ghosh and Zhao</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>In this current era, biomedical big data handling is a challenging task. Interestingly, the integration of multi-modal data, followed by significant feature mining (gene signature detection), becomes a daunting task. Remembering this, here, we proposed a novel framework, namely, three-factor penalized, non-negative matrix factorization-based multiple kernel learning with soft margin hinge loss (3PNMF-MKL) for multi-modal data integration, followed by gene signature detection. In brief, limma, employing the empirical Bayes statistics, was initially applied to each individual molecular profile, and the statistically significant features were extracted, which was followed by the three-factor penalized non-negative matrix factorization method used for data/matrix fusion using the reduced feature sets. Multiple kernel learning models with soft margin hinge loss had been deployed to estimate average accuracy scores and the area under the curve (AUC). Gene modules had been identified by the consecutive analysis of average linkage clustering and dynamic tree cut. The best module containing the highest correlation was considered the potential gene signature. We utilized an acute myeloid leukemia cancer dataset from The Cancer Genome Atlas (TCGA) repository containing five molecular profiles. Our algorithm generated a 50-gene signature that achieved a high classification AUC score (viz., 0.827). We explored the functions of signature genes using pathway and Gene Ontology (GO) databases. Our method outperformed the state-of-the-art methods in terms of computing AUC. Furthermore, we included some comparative studies with other related methods to enhance the acceptability of our method. Finally, it can be notified that our algorithm can be applied to any multi-modal dataset for data integration, followed by gene module discovery.</p>
</abstract>
<kwd-group>
<kwd>multi-omics</kwd>
<kwd>gene signature detection</kwd>
<kwd>feature selection</kwd>
<kwd>DNA methylation</kwd>
<kwd>matrix factorization</kwd>
</kwd-group>
<contract-num rid="cn001">CPRIT RP170668</contract-num>
<contract-sponsor id="cn001">Cancer Prevention and Research Institute of Texas<named-content content-type="fundref-id">10.13039/100004917</named-content>
</contract-sponsor>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>Rapid advances in biotechnology have enabled the generation of data in multiple platforms from the same or similar bio-samples. For example, The Cancer Genome Atlas (TCGA) comprehensively generated multi-omics profiles in 33 cancer types and subtypes. Therefore, it is made available to conduct an in-depth investigation into various molecular incidents at different biological stages and for specific tumor categories. The challenging task here is to develop algorithms to properly integrate these multi-omics (i.e., multi-modal) data, which will deepen our understanding of human tumorigenesis.</p>
<p>The integration of multi-omics profiles is a fast emerging area of the biomedical research (<xref ref-type="bibr" rid="B9">Imielinski et al., 2012</xref>; <xref ref-type="bibr" rid="B16">Mo et al., 2013</xref>; <xref ref-type="bibr" rid="B13">Mallik et al., 2017</xref>; <xref ref-type="bibr" rid="B6">Gaur et al., 2022</xref>; <xref ref-type="bibr" rid="B7">Ghose et al., 2022</xref>; <xref ref-type="bibr" rid="B22">Saeed et al., 2022</xref>). From the perspective of biology, cellular processes are based on the communication among different biomolecules (viz., mutations, epigenetic regulators, proteins, and metabolites). Molecular regulations occur in multi-layers and multi-vantage points to orchestrate complex biological events. An integrated analysis of profiles on the common set of samples from multi-omics data shows great potential to yield more biologically meaningful outcomes over an individual analysis on a single data layer. Overall, it shows a more comprehensive view and a global functional orientation of the biological system.</p>
<p>One of the major challenges for integration is to deal with the heterogeneity of these profiles. Profiles from various sources are often complicated to integrate or interpret together because of the inherent discrepancies. Various genomic variables can be measured and accumulated in different ways, which are also vulnerable to different kinds of noise and various confounding effects. Interestingly, these profiles show individual aspects of the biological system at different angles. The discrepancy among multi-omics data, therefore, provides an opportunity for detecting reliable and consistent signals for biological studies in a comprehensive manner. Multi-dimensional data integration and gene signature identification are among the most challenging tasks for bioinformaticians (<xref ref-type="bibr" rid="B12">Li et al., 2019</xref>; <xref ref-type="bibr" rid="B14">Mallik and Zhao, 2020</xref>; <xref ref-type="bibr" rid="B18">Qiu et al., 2020</xref>; <xref ref-type="bibr" rid="B17">Pellet et al., 2015</xref>; <xref ref-type="bibr" rid="B23">Serra et al., 2015</xref>). <xref ref-type="bibr" rid="B13">Mallik et al. (2017</xref>) proposed a scheme to recognize epigenetic biomarkers applying maximal relevance and minimal redundancy-based feature selection for multi-omics data. An approach of the integration of multi-omics data was proposed by <xref ref-type="bibr" rid="B12">Li et al. (2019</xref>) to identify biomarkers in the domain of cancer research. <xref ref-type="bibr" rid="B18">Qiu et al. (2020</xref>) suggested an approach regarding the revelation of 172 osteoporosis biomarkers by multi-omics data integration. A scheme of multi-omics data integration was presented by <xref ref-type="bibr" rid="B17">Pellet et al. (2015</xref>) to determine predictive molecular signatures regarding CLAD. Because specific profiles contain different characteristics/phenomena, integration of multi-view data with significant feature reduction and gene signature detection is fundamentally important. In this upcoming era, the multi-platform integration approach has been applied to accomplish various important tasks, such as signature/bio-marker detection, disease classification, and gene clustering. Prior research works in bio-marker discovery (<xref ref-type="bibr" rid="B1">Bandyopadhyay and Mallik, 2016</xref>; <xref ref-type="bibr" rid="B10">Kandimalla et al., 2022</xref>), classification (<xref ref-type="bibr" rid="B8">Henry et al., 2014</xref>; <xref ref-type="bibr" rid="B15">Maulik et al., 2015</xref>; <xref ref-type="bibr" rid="B28">Zhang and Kuster, 2019)</xref>, and clustering (<xref ref-type="bibr" rid="B25">Wang and Gu, 2016</xref>) have improved the promising performance of multi-modal integration approaches. Nevertheless, the outcomes of such approaches are not always satisfactory. <xref ref-type="bibr" rid="B28">Zhang and Kuster (2019</xref>) represented an approach with the incorporation of proteomics data to express the significance of omics data integration with higher accuracy. <xref ref-type="bibr" rid="B10">Kandimalla et al. (2022</xref>) showed mRNA&#x2013;miRNA regulatory network analyses to improve the approach of multi-omics data integration. In this work, we propose a novel framework, namely three-factor penalized non-negative matrix factorization-based multiple kernel learning with soft margin hinge loss (3PNMF-MKL), which applies consecutive utilization of a couple of multi-dimensional strategies: i) statistical empirical Bayes-based feature selection, ii) three-factor penalized non-negative matrix factorization, iii) multiple kernel learning with soft margin hinge loss, iv) average linkage clustering, and v) the dynamic tree cut method for multi-platform data integration and gene signature detection. For evaluation of the performance of our proposed approach, a cancer dataset from TCGA acute myeloid leukemia (LAML) containing five different profiles [gene expression, DNA methylation, exon expression, pathway activity, and copy number variation (CNV)] was used. We demonstrated that our approach is capable of multi-modal data integration, and thus, it can be applied to any kind of multi-platform datasets.</p>
</sec>
<sec id="s2">
<title>2 Experimental procedures</title>
<p>In this section, we illustrate our proposed approach for identifying Pareto-optimal gene signatures by feature clustering on a cancer multi-omics dataset. The major steps are described as follows.</p>
<sec id="s2-1">
<title>2.1 Feature selection by the empirical Bayes test</title>
<p>Commonly shared features (genes/probes) and samples are chosen across all the profiles from the multi-omics cancer dataset. Specifically, probes (features) from DNA methylation arrays containing any missing values are discarded. The individual profile is normalized using the zero-mean normalization for each feature (<xref ref-type="bibr" rid="B2">Bandyopadhyay et al., 2013)</xref>, as described in the following formula: <inline-formula id="inf1">
<mml:math id="m1">
<mml:msubsup>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:math>
</inline-formula>. Here, <italic>&#x3bc;</italic> is the mean across the data for the feature <italic>i</italic> prior to normalization, and <italic>&#x3c3;</italic> denotes standard deviation. <italic>x</italic>
<sub>
<italic>ik</italic>
</sub> and <inline-formula id="inf2">
<mml:math id="m2">
<mml:msubsup>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> signify the value of the <italic>i</italic>-th feature at <italic>k</italic>-th patient (sample) prior and after normalization, respectively. To determine statistically significant features, the empirical Bayes statistical test is applied using the package &#x201c;Linear Models for Microarray and RNA-Seq Data&#x201d; (<xref ref-type="bibr" rid="B24">Smyth, 2004</xref>; <xref ref-type="bibr" rid="B2">Bandyopadhyay et al., 2013)</xref>, which works better on the dataset with a small sample size. The moderated t-statistic (<xref ref-type="bibr" rid="B21">Ritchie et al., 2015</xref>) is elaborated as follows:<disp-formula id="e1">
<mml:math id="m3">
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msqrt>
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x2b;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:msqrt>
</mml:mrow>
</mml:mfrac>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:math>
<label>(1)</label>
</disp-formula>where <italic>m</italic>
<sub>1</sub> and <italic>m</italic>
<sub>2</sub> are the number of patients (cases) and that of the normal samples (controls), respectively. Here, <inline-formula id="inf3">
<mml:math id="m4">
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msub>
</mml:math>
</inline-formula> signifies the contrast estimator for the feature <italic>pr</italic>, whereas <inline-formula id="inf4">
<mml:math id="m5">
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> refers to the posterior sample variance for <italic>pr</italic>. The statistic to compute the contrast estimator for the probe <italic>pr</italic> is formulated as follows: <inline-formula id="inf5">
<mml:math id="m6">
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x223c;</mml:mo>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>. Here, <italic>N</italic> represents the normal distribution. The statistic to estimate the posterior sample variance for <italic>pr</italic> is formulated as follows:<disp-formula id="e2">
<mml:math id="m7">
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:math>
<label>(2)</label>
</disp-formula>where <italic>d</italic>
<sub>0</sub> <inline-formula id="inf6">
<mml:math id="m8">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mo>&#x3c;</mml:mo>
<mml:mi>&#x221e;</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> signifies the prior degrees of freedom, and <inline-formula id="inf7">
<mml:math id="m9">
<mml:msubsup>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> denotes the variance. In addition, <italic>d</italic>
<sub>
<italic>pr</italic>
</sub> <inline-formula id="inf8">
<mml:math id="m10">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mo>&#x3e;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> symbolizes the experimental degrees of freedom of <italic>pr</italic>, and <inline-formula id="inf9">
<mml:math id="m11">
<mml:msubsup>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> denotes the sample variance of <italic>pr</italic>. The significance of the level of the <italic>p</italic>-value is then determined from <inline-formula id="inf10">
<mml:math id="m12">
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> with the help of the cumulative distribution function (cdf). If the <italic>p</italic>-value of the feature is less than the standard cutoff of 0.05, the feature is defined as statistically significant. The filtered differentially expressed features are then ordered according to the <italic>p</italic>-values. Notably, if any gene corresponds to more than one probe (feature), the probe with the lowest <italic>p</italic>-value will be selected to represent the gene, and the rest of the probes for the gene are simply ignored. We apply the same approach to each layer of the molecular profile, and then, we perform the combination of the significant non-redundant features (genes/probes/copy number variation, etc.) from all layers (let, <italic>UF</italic>).</p>
</sec>
<sec id="s2-2">
<title>2.2 Fusion by matrix factorization</title>
<p>Let <italic>o</italic>
<sub>
<italic>i</italic>
</sub> and <italic>o</italic>
<sub>
<italic>j</italic>
</sub> denote two object types, namely, gene expression and DNA methylation, in all resulted features <italic>UF</italic>. The number of genes is N, while each gene is denoted by <italic>n</italic>
<sub>
<italic>i</italic>
</sub>, where i &#x3d; 1, 2,&#x2026;, N. There are M number of DNA methylation samples, while each sample is termed as <italic>m</italic>
<sub>
<italic>j</italic>
</sub>, where j &#x3d; 1, 2, &#x2026;, M. In addition, there is a <italic>P</italic> set consisting of <italic>p</italic> types of profiles from the multi-omics datasets. The input to this implemented variant of the 3-<italic>FPNMF</italic> model is <inline-formula id="inf11">
<mml:math id="m13">
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:math>
</inline-formula>, which is a relational block matrix shown as follows:<disp-formula id="e3">
<mml:math id="m14">
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mtable class="matrix">
<mml:mtr>
<mml:mtd columnalign="center">
<mml:mo>&#x2217;</mml:mo>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:msub>
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>12</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mo>&#x2026;</mml:mo>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:msub>
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="center">
<mml:msub>
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>21</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mo>&#x2217;</mml:mo>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mo>&#x2026;</mml:mo>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:msub>
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="center">
<mml:mo>&#x22ee;</mml:mo>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mo>&#x22ee;</mml:mo>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mo>&#x22f1;</mml:mo>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mo>&#x22ee;</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="center">
<mml:msub>
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:msub>
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mo>&#x2026;</mml:mo>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mo>&#x2217;</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
<mml:mo>.</mml:mo>
</mml:math>
<label>(3)</label>
</disp-formula>Here, &#x2217; denotes that similar object relationships are not considered in this approach. <italic>R</italic>
<sub>
<italic>ij</italic>
</sub> denotes the relationship between <italic>o</italic>
<sub>
<italic>i</italic>
</sub>th and <italic>o</italic>
<sub>
<italic>j</italic>
</sub>th object types. The respective correlation of the <italic>x</italic>th object of type <italic>o</italic>
<sub>
<italic>i</italic>
</sub> (e.g., gene) and the <italic>y</italic>th object of type <italic>o</italic>
<sub>
<italic>j</italic>
</sub> (e.g., sample) is represented as <inline-formula id="inf12">
<mml:math id="m15">
<mml:msub>
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>. In this implementation, we have experimented with six object types, as described later.</p>
<p>For each object type from each profile, there is a constraint in the input constraint block diagonal matrix, as shown in the following expression:<disp-formula id="e4">
<mml:math id="m16">
<mml:msup>
<mml:mrow>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>D</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>g</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
<mml:mo>.</mml:mo>
</mml:math>
<label>(4)</label>
</disp-formula>
</p>
<p>The relational block matrix <inline-formula id="inf13">
<mml:math id="m17">
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:math>
</inline-formula> is tri-factorized into matrix factors <italic>G</italic> and <italic>S</italic> (<xref ref-type="bibr" rid="B30">&#x17d;itnik and Zupan, 2014)</xref>, which is shown as follows:<disp-formula id="e5">
<mml:math id="m18">
<mml:mi>G</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>D</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>g</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:math>
<label>(5)</label>
</disp-formula>
<disp-formula id="e6">
<mml:math id="m19">
<mml:mi>S</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mtable class="matrix">
<mml:mtr>
<mml:mtd columnalign="center">
<mml:mo>&#x2a;</mml:mo>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:msubsup>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>12</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msubsup>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mo>&#x2026;</mml:mo>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:msubsup>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msubsup>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="center">
<mml:msubsup>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>21</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msubsup>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mo>&#x2a;</mml:mo>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mo>&#x2026;</mml:mo>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:msubsup>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msubsup>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="center">
<mml:mo>&#x22ee;</mml:mo>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mo>&#x22ee;</mml:mo>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mo>&#x22f1;</mml:mo>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mo>&#x22ee;</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="center">
<mml:msubsup>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msubsup>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:msubsup>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msubsup>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mo>&#x2026;</mml:mo>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mo>&#x2a;</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
<mml:mo>.</mml:mo>
</mml:math>
<label>(6)</label>
</disp-formula>Here, <italic>r</italic> denotes rank factorization to the object type <italic>o</italic>
<sub>
<italic>p</italic>
</sub> inferenced by the 3-<italic>FPNMF</italic> model. The factor <italic>S</italic> denotes the block relation between object types <italic>o</italic>
<sub>
<italic>i</italic>
</sub> and <italic>o</italic>
<sub>
<italic>j</italic>
</sub>. The factor <inline-formula id="inf14">
<mml:math id="m20">
<mml:msub>
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
</mml:math>
</inline-formula> reconstructs relations specifically to the object type <italic>o</italic>
<sub>
<italic>i</italic>
</sub>.</p>
<p>Thus, each relation matrix <inline-formula id="inf15">
<mml:math id="m21">
<mml:msub>
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
</mml:math>
</inline-formula> obtains matrix factorization as <inline-formula id="inf16">
<mml:math id="m22">
<mml:msub>
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula>. In a simplified way, this relational block 3-<italic>FPNMF</italic> model is shown as follows:<disp-formula id="e7">
<mml:math id="m23">
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mtable class="matrix">
<mml:mtr>
<mml:mtd columnalign="center">
<mml:mo>&#x2a;</mml:mo>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:msub>
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mo>&#x2026;</mml:mo>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:msub>
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="center">
<mml:msub>
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mo>&#x2a;</mml:mo>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mo>&#x2026;</mml:mo>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:msub>
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="center">
<mml:mo>&#x22ee;</mml:mo>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mo>&#x22ee;</mml:mo>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mo>&#x22f1;</mml:mo>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mo>&#x22ee;</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="center">
<mml:msub>
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:msub>
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mo>&#x2026;</mml:mo>
</mml:mtd>
<mml:mtd columnalign="center">
<mml:mo>&#x2a;</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
<mml:mo>.</mml:mo>
</mml:math>
<label>(7)</label>
</disp-formula>
</p>
<p>The objective function of this tri-factor penalized matrix decomposition (<italic>PMD</italic>) model is to minimize the distance between the input block relational matrix <inline-formula id="inf17">
<mml:math id="m24">
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:math>
</inline-formula> and its 3-<italic>FPNMF</italic> system adhering to the constraint matrix <italic>&#x3c4;</italic>
<sup>
<italic>P</italic>
</sup>, which is shown as follows:<disp-formula id="e8">
<mml:math id="m25">
<mml:mtable class="eqnarray">
<mml:mtr>
<mml:mtd columnalign="right">
<mml:munder>
<mml:mrow>
<mml:mi>min</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mo>&#x2265;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:munder>
<mml:mi>j</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:mo>:</mml:mo>
<mml:mi>G</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>S</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:munder>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
</mml:munder>
<mml:mo stretchy="false">&#x2016;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:msup>
<mml:mrow>
<mml:mo stretchy="false">&#x2016;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mtd>
<mml:mtd columnalign="left"/>
<mml:mtd columnalign="left"/>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="right">
<mml:mo>&#x2b;</mml:mo>
<mml:munderover accentunder="false" accent="true">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:mi>t</mml:mi>
<mml:mi>r</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
<mml:msup>
<mml:mrow>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mi>G</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mtd>
<mml:mtd columnalign="left"/>
<mml:mtd columnalign="left">
<mml:mo>.</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:math>
<label>(8)</label>
</disp-formula>
</p>
<p>Here, &#x2016;.&#x2016; denotes the Frobenius norm, and <italic>tr</italic> (.) denotes the trace. Our sparse implementation for this 3-<italic>FPNMF</italic> model reduces the missing relational matrix problem with zero values. Our model is more suitable for real-life heterogeneous datasets with missing values, which differs from those of <xref ref-type="bibr" rid="B30">&#x17d;itnik and Zupan (2014)</xref> in its non-negative sparse implementation. Our proposed 3<italic>FPNMF</italic> &#x2212; <italic>MKL</italic> model is shown briefly in <xref ref-type="fig" rid="F1">Figure 1</xref>, while a detailed flowchart is represented in <xref ref-type="sec" rid="s11">Supplementary Figure S1</xref>.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Algorithm of the proposed 3PNMF-MK model.</p>
</caption>
<graphic xlink:href="fgene-14-1095330-g001.tif"/>
</fig>
</sec>
<sec id="s2-3">
<title>2.3 Multiple kernel learning</title>
<p>Next, we introduce the multiple Kernel Learning (MKL) algorithm (<xref ref-type="bibr" rid="B26">Xu et al., 2013)</xref> with the hinge loss soft margin, in which the classifier and the kernel combination coefficients are optimized by solving the hinge loss soft margin MKL problem.</p>
<p>After using the 3-<italic>FPNMF</italic> model in the first phase, the approximate sparse relation matrix <inline-formula id="inf18">
<mml:math id="m26">
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
</mml:math>
</inline-formula> for target object type pairs <italic>o</italic>
<sub>
<italic>i</italic>
</sub> and <italic>o</italic>
<sub>
<italic>j</italic>
</sub> is reconstructed as<disp-formula id="e9">
<mml:math id="m27">
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mrow>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>.</mml:mo>
</mml:math>
<label>(9)</label>
</disp-formula>Then, to develop kernel fusion, the resulting kernel matrices are generated using the &#x201c;Kernel Trick&#x201d;: <inline-formula id="inf19">
<mml:math id="m28">
<mml:mi>K</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mo>.</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula>. The kernels are further normalized and smoothed using 2-dimensional linear filters.</p>
<p>Given <italic>p</italic> base-kernels <inline-formula id="inf20">
<mml:math id="m29">
<mml:mi mathvariant="double-struck">K</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>K</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>K</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>K</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> developed from the reconstructed relational block matrix <inline-formula id="inf21">
<mml:math id="m30">
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>p</mml:mi>
<mml:mo>;</mml:mo>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, kernel slack variables for the kernel <inline-formula id="inf22">
<mml:math id="m31">
<mml:msub>
<mml:mrow>
<mml:mi>K</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="double-struck">K</mml:mi>
</mml:math>
</inline-formula> are defined as the difference between the target margin <italic>&#x3b8;</italic> and the SVM dual objective function<disp-formula id="equ1">
<mml:math id="m32">
<mml:mi>D</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>V</mml:mi>
<mml:mi>M</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>K</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:math>
</disp-formula>
<disp-formula id="equ2">
<mml:math id="m33">
<mml:mo>&#x3d;</mml:mo>
<mml:munder>
<mml:mrow>
<mml:mi>max</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi>R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:munder>
<mml:munderover accentunder="false" accent="true">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mfrac>
<mml:munderover accentunder="false" accent="true">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:munderover accentunder="false" accent="true">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>K</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:math>
</disp-formula>subject to <inline-formula id="inf23">
<mml:math id="m34">
<mml:msubsup>
<mml:mrow>
<mml:mo movablelimits="false" form="prefix">&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2265;</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2200;</mml:mo>
<mml:mi>n</mml:mi>
</mml:math>
</inline-formula>. Then, the slack variable is <italic>&#x3b6;</italic>
<sub>
<italic>p</italic>
</sub> &#x3d; <italic>&#x3b8;</italic> &#x2212; <italic>DSVM</italic>(<italic>K</italic>
<sub>
<italic>p</italic>
</sub>, <italic>&#x3b1;</italic>), and the hinge loss is shown as follows: <disp-formula id="e10">
<mml:math id="m35">
<mml:msub>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x2113;</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b6;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>max</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b6;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>.</mml:mo>
</mml:math>
<label>(10)</label>
</disp-formula>Therefore, the objective function for this hinge loss soft margin MKL algorithm becomes<disp-formula id="e11">
<mml:math id="m36">
<mml:munder>
<mml:mrow>
<mml:mi>min</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x3b1;</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>D</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>m</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b6;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:munder>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3c0;</mml:mi>
<mml:munderover accentunder="false" accent="true">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b6;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>.</mml:mo>
</mml:math>
<label>(11)</label>
</disp-formula>subject to <italic>DSVM</italic>(<italic>K</italic>
<sub>
<italic>p</italic>
</sub>, <italic>&#x3b1;</italic>) &#x2265;<italic>&#x3b8;</italic> &#x2212; <italic>&#x3b6;</italic>
<sub>
<italic>p</italic>
</sub>, <italic>&#x3b6;</italic>
<sub>
<italic>p</italic>
</sub> &#x2265; 0, <italic>p</italic> &#x3d; 1, <italic>&#x2026;</italic>, <italic>P</italic>.</p>
<p>The objective of the aforementioned hinge loss soft margin MKL is to maximize the margin <italic>&#x3b8;</italic> while considering the &#x201c;errors&#x201d; from the given <italic>P</italic>-based kernels. The parameter <italic>&#x3c0;</italic> balances the contribution of the loss term represented by slack variables <italic>&#x3b6;</italic>
<sub>
<italic>p</italic>
</sub> and the margin <italic>&#x3b8;</italic>. <italic>&#x3c0;</italic> should be in the range {<italic>&#x3c0;</italic>&#x7c;<italic>&#x3c0;</italic> &#x2265; 1/<italic>P</italic>}. Otherwise, there is no solution to the proposed problem. Our proposed framework for gene signature detection from heterogeneous data sources using the 3<italic>FPNMF</italic> &#x2212; <italic>MKL</italic> model is depicted in <xref ref-type="fig" rid="F2">Figure 2</xref>.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Flowchart of the proposed 3PNMF-MKL framework.</p>
</caption>
<graphic xlink:href="fgene-14-1095330-g002.tif"/>
</fig>
</sec>
<sec id="s2-4">
<title>2.4 Determining best combination of class labels using non-matrix factorization and AUC</title>
<p>In biological datasets such as TCGA, the clinical data are made available. This includes patient sample groups, biological subtypes, drug treatment, and survival/prognosis information. In our current study, we obtain accuracies for different combinations of class labels using the non-matrix factorization technique for the case where there were more than two class labels or subtypes. Among them, the combination of class labels, which produces the highest area under curve (AUC), is chosen for the next step (i.e., module detection). Say, q is the specific combination of class labels, which produces the highest AUC. Find <italic>q</italic> &#x3d; {<italic>&#x2203;i</italic>, <italic>&#x2203;j</italic>}&#x7c;{<italic>&#x2203;a</italic>, <italic>&#x2203;b</italic>, <italic>&#x2203;k</italic>} such that<disp-formula id="e12">
<mml:math id="m37">
<mml:mi>A</mml:mi>
<mml:mi>U</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="italic">arg</mml:mi>
<mml:mi mathvariant="italic">max</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mo>&#x2200;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>U</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mi>c</mml:mi>
<mml:msubsup>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mo>&#x2200;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>b</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>U</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mi>c</mml:mi>
<mml:msubsup>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>b</mml:mi>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:math>
<label>(12)</label>
</disp-formula>where <italic>cl</italic> denotes the left part of the group combination, <italic>cl</italic>&#x2032; signifies the right part of any sample group combination, and <italic>i</italic> &#x2208; {1, 2, &#x2026;, (<italic>m</italic> &#x2212; 1)}, <italic>j</italic> &#x2208; {(<italic>i</italic> &#x2b; 1), (<italic>i</italic> &#x2b; 2), &#x2026;, <italic>m</italic>}, <italic>a</italic> &#x2208; {1, 2, &#x2026;, <italic>m</italic>}, <italic>b</italic> &#x2208; {1, 2, &#x2026;, <italic>m</italic>} &#x26; <italic>b</italic> &#x2260; <italic>a</italic>, <italic>k</italic> &#x2208; {, 2, &#x2026;, <italic>m</italic>}, and <italic>k</italic> &#x2260; <italic>a</italic> and <italic>k</italic> &#x2260; <italic>b</italic>.</p>
</sec>
<sec id="s2-5">
<title>2.5 Feature clustering and module detection</title>
<p>After selecting the right class-label combination, we extracted the sub-gene expression data consisting of only the selected class labels and then used them for gene module detection and signature identification.</p>
<p>In our procedure, we first evaluated the power of the soft thresholding, which was then applied to evaluate the adjacency matrix using Pearson&#x2019;s correlation. The topological overlap matrix (TOM) similarity score (<xref ref-type="bibr" rid="B19">Ravasz et al., 2002)</xref> was computed from the employed adjacency matrix. The TOM score between two nodes (say, <italic>i</italic> and <italic>j</italic>) symbolized as <italic>TOM</italic>(<italic>i</italic>, <italic>j</italic>) is defined as follows:<disp-formula id="e13">
<mml:math id="m38">
<mml:mi>T</mml:mi>
<mml:mi>O</mml:mi>
<mml:mi>M</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="{" close="">
<mml:mrow>
<mml:mtable class="cases">
<mml:mtr>
<mml:mtd columnalign="left">
<mml:mfrac>
<mml:mrow>
<mml:munder>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>v</mml:mi>
<mml:mo>&#x2260;</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:munder>
<mml:mi>X</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mi>X</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>X</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">min</mml:mi>
<mml:mfenced open="{" close="}">
<mml:mrow>
<mml:munder>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>v</mml:mi>
<mml:mo>&#x2260;</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:munder>
<mml:mi>X</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
<mml:munder>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>v</mml:mi>
<mml:mo>&#x2260;</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:munder>
<mml:mi>X</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>X</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
<mml:mspace width="1em"/>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mtext>if</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mo>&#x2260;</mml:mo>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="left">
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mspace width="1em"/>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mtext>if&#x2009;i</mml:mtext>
<mml:mo>&#x3d;</mml:mo>
<mml:mtext>j</mml:mtext>
<mml:mo>,</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:math>
<label>(13)</label>
</disp-formula>where <italic>X</italic> denotes the corresponding adjacency matrix containing Boolean entries. The entry of 1 indicates that the corresponding two nodes share the same connection (i.e., direct connection), while the entry of 0 signifies that no direct connection exists between them.</p>
<p>After obtaining the TOM score, we computed the distance/dissimilarity value between genes (<italic>i</italic> and <italic>j</italic>) denoted by <italic>dissTOM</italic>(<italic>i</italic>, <italic>j</italic>),which is shown as follows: <italic>dissTOM</italic>(<italic>i</italic>, <italic>j</italic>) &#x3d; 1 &#x2212; <italic>TOM</italic>(<italic>i</italic>, <italic>j</italic>). We conducted average linkage clustering on the multi-omics dissimilarity matrix <italic>dissTOM</italic> <italic>via</italic> considering all potential pairs of genes/features. Finally, the dynamic tree cut technique (<xref ref-type="bibr" rid="B11">Langfelder et al., 2008)</xref> was applied on the clustering dendrogram to determine the gene modules. In order to evaluate the quality of the aforementioned clustering, we calculated different cluster validity index measures, <italic>viz.</italic>, cluster coefficient, heterogeneity, Dunn Index, maximum adjacency ratio, centralization, silhouette width, and scaled connectivity.</p>
</sec>
<sec id="s2-6">
<title>2.6 Expression signature detection and classifier models</title>
<p>After finding the gene modules, we estimated Pearson&#x2019;s correlation coefficient (PCC) between each gene pair within the resulted modules. For each module, the mean of the correlations for each gene pair within that particular module was obtained. The module with the maximum mean correlation coefficient was elected as a gene signature. Notably, genes with the elected gene signature are differentially expressed between case and control samples. In order to validate the classification performance of the employed gene signature, we utilized the Prediction Analysis of Microarrays (PAM) classifier with 10-fold cross-validation (CV) on the expression sub-data to classify the underlying class labels. The entire procedure was then repeated ten times. Moreover, we calculated the average scores of several classification performance metrics such as sensitivity, specificity, precision, accuracy, and AUC, individually.</p>
</sec>
<sec id="s2-7">
<title>2.7 Functional annotation analysis</title>
<p>We carried out KEGG pathway and Gene Ontology (GO) analyses using the Enrichr database (<xref ref-type="bibr" rid="B3">Chen et al., 2013)</xref>. Notably, GO terms can be categorized into three kinds, <italic>viz.</italic>, biological process (BP), cellular component (CC), and molecular function (MF). Those significant pathways/GO terms with an adjusted <italic>p</italic>-value less than 0.05 were identified. Meanwhile, literature research studies were also performed to identify disease-related pathways/GO terms.</p>
</sec>
</sec>
<sec sec-type="results" id="s3">
<title>3 Results</title>
<sec id="s3-1">
<title>3.1 Data sources</title>
<p>For our experiment, TCGA acute myeloid leukemia (LAML) multi-omics dataset (<ext-link ext-link-type="uri" xlink:href="https://xenabrowser.net/datapages/?cohort=GDC%20TCGA%20Acute%20Myeloid%20Leukemia%20(LAML)&amp;removeHub=https%3A%2F%2Fxena.treehouse.gi.ucsc.edu%3A443">https://xenabrowser.net/datapages/?cohort&#x3d;GDC%20TCGA%20Acute%20Myeloid%20Leukemia%20(LAML)&#x26;removeHub&#x3d;https%3A%2F%2Fxena.treehouse.gi.ucsc.edu%3A443</ext-link>) contained six heterogeneous profiles such as the gene expression (IlluminaGA) profile, DNA methylation (Illumina Methylation 27k) profile, exon expression (IlluminaGA) profile, miRNA profile, pathway activity (Paradigm IPLs) profile, and copy number (GISTIC2) profile. Initially, the gene expression profile included 179 samples and 20,113 genes. For the methylation profile, there are 194 samples and 27,578 methylation probes. Particularly, for the methylation profile, many genes are profiled with more than one probe. In the exon expression profile, there are a total of 219,296 chromosome ids and 179 samples. Here, many genes are connected with more than one chromosome id. The miRNA profile contains 705 miRNAs and 188 samples. The pathway activity profile has 7,203 genes and 173 samples, while the copy number profile consists of 24,776 genes and 191 samples. There are three categories of samples (i.e., class labels) for the LAML multi-omics dataset: i) favorable, ii) intermediate (also called normal), and iii) poor. Specifically, every profile consists of 161 commonly shared LAML samples. Among them, 31 samples belong to the first category, 96 samples are in the second category, and the rest of the samples (&#x3d; 34) are in the third category. In addition, there are 1,501 uniquely matched genes among the five profiles [i.e., gene expression, DNA methylation, exon expression, pathway activity, and copy number variation (GISTIC2) profiles].</p>
</sec>
<sec id="s3-2">
<title>3.2 Statistical validation</title>
<p>First, we selected the sub-data, which contain commonly shared samples (i.e., 161) and genes (i.e., 1,501) for each of the five profiles (i.e., gene expression, DNA methylation, exon expression, pathway activity, and copy number variation profiles). Many matched genes are connected with more than one probe (or chromosome id) for each profile. In the case of the miRNA profile, we started to work with the matched samples (<italic>n</italic> &#x3d; 161) and all of its miRNAs (<italic>n</italic> &#x3d; 705). The empirical Bayes test is performed by limma software on each gene probe or chromosome id for each of the five profiles (i.e., gene expression, DNA methylation, exon expression, pathway activity, and copy number variation profiles) across all the three classes (viz., favorable, intermediate, and poor).</p>
<p>Notably, since there are three classes/groups of samples, here, limma is initially performed between each group pair (i.e., i) favorable vs. intermediate, ii) intermediate vs. poor, and finally iii) favorable vs. poor), then an F-statistics is computed, and finally, the respective <italic>p</italic>-value is generated from the F-statistics. After the test, for every gene, we only selected the probe or chromosome id with the lowest <italic>p</italic>-value achieved among all the probes or chromosome ids connected with that gene. As a result, we obtained 728, 272, 1,100, 265, and 904 significant genes for the gene expression, methylation, exon expression, pathway activity, and copy number profiles, respectively. Thereafter, we took the combination of all the significant gene sets, which led to a molecular set of a total of 1,388 genes. Furthermore, the same significance test was applied on each miRNA of the miRNA profile across all the three classes (<italic>viz.</italic>, favorable, intermediate, and poor) as well. We obtained a total of 229 significant miRNAs.</p>
</sec>
<sec id="s3-3">
<title>3.3 Expression signature detection and classification</title>
<p>Using the non-matrix factorization technique, we obtained accuracies for different combinations of class labels such as i) Class 1 (favorite) vs. Class 2 (intermediate), ii) Class 1 vs. Class 3 (Poor), iii) Class 1 vs. classes 2 and 3, iv) Class 2 vs. Class 3, v) Class 2 vs. classes 1 and 3, and vi) Class 3 vs. Classes 1 and 2 (as depicted in <xref ref-type="table" rid="T1">Table 1</xref>). Among them, the second combination, i.e., Class 1 vs. Class 3 produced the highest area under curve (AUC &#x3d; 0.7713). Hence, we selected the combination for gene signature discovery since other combinations did not produce better AUC scores. After obtaining right combinations of class labels, we first evaluated the power (&#x3d;1) for soft thresholding (illustrated in <xref ref-type="fig" rid="F3">Figure 3A</xref>), which was then applied to estimate the adjacency matrix through Pearson&#x2019;s correlation score. Then, the TOM score and distance matrix were computed. To determine gene modules, we applied average linkage clustering and dynamic tree cut methodologies. As a result, we generated a total of 10 gene modules. The numbers of participating differentially expressed genes (DEGs) for these 10 gene modules (represented by black, blue, brown, green, magenta, pink, purple, red, turquoise, and yellow colors) were 50, 99, 90, 74, 23, 25, 22, 51, 214, and 80, respectively. The dendrogram is represented in <xref ref-type="fig" rid="F3">Figure 3B</xref>. The corresponding cluster validity indices in that module detection are illustrated in <xref ref-type="table" rid="T2">Table 2</xref>. The Average silhouette width plot generated during clustering is represented in <xref ref-type="sec" rid="s11">Supplementary Figure S2</xref>. PCC was calculated between each gene pair within each module. The mean correlation scores of the 10 modules (depicted by blue, green, turquoise, magenta, brown, red, yellow, black, purple, and pink colors) were 0.0268, 0.2562, 0.0321, 0.3914, 0.1143, 0.0215, 0.0570, 0.4029, 0.3455, and 0.1605, respectively. The black module had the highest mean correlation coefficient score (&#x3d; 0.4029 in <xref ref-type="table" rid="T3">Table 3</xref>). Thus, it was selected as the gene signature. The resultant gene signature contained 50 DEGs (see <xref ref-type="table" rid="T3">Table 3</xref>). To verify the classification performance of the resultant signature, we applied the PAM classifier through the 10-fold cross-validation (CV) on all the features and samples of signature data in order to classify the groups (favorite and poor). The entire procedure was then repeated 10 times. In the experiment, the mean sensitivity, mean specificity, mean precision, mean accuracy, and mean AUC were 69.12%, 84.19%, 82.79%, 76.31, and 0.8273, respectively (see <xref ref-type="fig" rid="F4">Figure 4</xref>; <xref ref-type="table" rid="T4">Table 4</xref>). Based on the gene set enrichment analysis on the 50 genes of the signature using the Enrichr web database, we extracted significant KEGG pathway and Gene Ontology (GO) terms. Among the KEGG pathways, the Rap1 signaling pathway (hsa04015) is the most significant pathway (adjusted <italic>p</italic>-value &#x3d; 7.497 &#xd7; 10<sup>&#x2212;06</sup>) that contains eight genes (viz., <italic>EFNA1</italic>, <italic>GNAO1</italic>, <italic>TIAM1</italic>, <italic>CSF1</italic>, <italic>ITGB3</italic>, <italic>ITGA2B</italic>, <italic>THBS1</italic>, and <italic>MAPK13</italic>). Second, the most significant pathway is the PI3K-Akt signaling pathway (hsa04151) with an adjusted <italic>p</italic>-value of 1.128 &#xd7; 10<sup>&#x2212;05</sup>, which consists of nine genes (viz., <italic>EFNA1, CSF1, ITGB3, ITGA2B, IL2RB, FASLG, TP53, THBS1</italic>, and <italic>EPOR</italic>). The following eight pathways are the cytokine&#x2013;cytokine receptor interaction (hsa04060) (adj. <italic>p</italic>-value &#x3d; 1.437 &#xd7; 10<sup>&#x2212;05</sup>), inflammatory bowel disease (IBD) (hsa05321) (adj. <italic>p</italic>-value &#x3d; 2.1E-05), proteoglycans in cancer (hsa05205) (adj. <italic>p</italic>-value &#x3d; 2.1 &#xd7; 10<sup>&#x2212;05</sup>), hematopoietic cell lineage (hsa04640) (adj. <italic>p</italic>-value &#x3d; 6.752 &#xd7; 10<sup>&#x2212;05</sup>), T-cell receptor signaling pathway (hsa04660) (adj. <italic>p</italic>-value &#x3d; 1 &#xd7; 10<sup>&#x2212;4</sup>), TNF signaling pathway (hsa04668) (adj. <italic>p</italic>-value &#x3d; 2 &#xd7; 10<sup>&#x2212;4</sup>), osteoclast differentiation (hsa04380) (adj. <italic>p</italic>-value &#x3d; 3 &#xd7; 10<sup>&#x2212;4</sup>), and Ras signaling pathway (hsa04014) (adj. <italic>p</italic>-value &#x3d; 3 &#xd7; 10<sup>&#x2212;4</sup>) (also see <xref ref-type="table" rid="T5">Table 5</xref>). Among the significant GO:BP terms, the positive regulation of cellular metabolic processes (GO:0031325) (adjusted <italic>p</italic>-value &#x3d; 8.02947 &#xd7; 10<sup>&#x2212;05</sup>) was ranked as the most significant, which contains six genes (<italic>EDN1</italic>, <italic>CSF1</italic>, <italic>CCL5</italic>, <italic>GATA3</italic>, <italic>THBS1</italic>, and <italic>TP53</italic>). The second most significant GO term is the regulation of inflammatory responses (GO:0050727) with an adjusted <italic>p</italic>-value of 8.029 &#xd7; 10<sup>&#x2212;05</sup>. This term consists of seven genes (<italic>CCL5, CCL4, RORA, GATA3, ETS1, BIRC3</italic>, and <italic>MAPK13</italic>) (<xref ref-type="table" rid="T5">Table 5</xref>). Among the significant GO:CC terms, the platelet alpha granule (GO:0031091) (adjusted <italic>p</italic>-value &#x3d; 4 &#xd7; 10<sup>&#x2212;3</sup>) contains four genes (viz., <italic>ITGB3, ITGA2B, A2M</italic>, and <italic>THBS1</italic>), while among the GO:MF terms, the core promoter binding factor (GO:0001047) (adjusted <italic>p</italic>-value &#x3d; 8 &#xd7; 10<sup>&#x2212;4</sup>) contains five genes (<italic>viz.</italic>, <italic>RORA, GATA3, GATA1, TP53</italic>, and <italic>ARNTL</italic>). For details of the top significant pathways and GO terms, see <xref ref-type="table" rid="T5">Table 5</xref>.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Predictive performance of classification for each pairwise class using the proposed method in LAML multi-omics data, where classes 1, 2, and 3 denote &#x201c;favorable,&#x201d; &#x201c;intermediate/normal,&#x201d; and &#x201c;poor,&#x201d; respectively.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left"/>
<th align="center">Sensitivity</th>
<th align="center">Specificity</th>
<th align="center">Precision (PPV)</th>
<th align="center">Negative predictive value</th>
<th align="center">Accuracy</th>
<th align="center">AUC</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Class 1 vs. Class 2</td>
<td align="center">0.5161</td>
<td align="center">0.6907</td>
<td align="center">0.3478</td>
<td align="center">0.8171</td>
<td align="center">0.6484</td>
<td align="center">0.6202</td>
</tr>
<tr>
<td align="center">Class 1 vs. Class 3</td>
<td align="center">0.5484</td>
<td align="center">0.8235</td>
<td align="center">0.7391</td>
<td align="center">0.6667</td>
<td align="center">0.6923</td>
<td align="center">0.7713</td>
</tr>
<tr>
<td align="center">Class 1 vs. classes 2 and 3</td>
<td align="center">0.5385</td>
<td align="center">0.3871</td>
<td align="center">0.7865</td>
<td align="center">0.1667</td>
<td align="center">0.5093</td>
<td align="center">0.4608</td>
</tr>
<tr>
<td align="center">Class 2 vs. Class 3</td>
<td align="center">0.6289</td>
<td align="center">0.5</td>
<td align="center">0.7821</td>
<td align="center">0.3208</td>
<td align="center">0.5954</td>
<td align="center">0.5215</td>
</tr>
<tr>
<td align="center">Class 2 vs. classes 1 and 3</td>
<td align="center">0.5</td>
<td align="center">0.5052</td>
<td align="center">0.4</td>
<td align="center">0.6049</td>
<td align="center">0.5031</td>
<td align="center">0.4863</td>
</tr>
<tr>
<td align="center">Class 3 vs. classes 1 and 2</td>
<td align="center">0.5547</td>
<td align="center">0.4848</td>
<td align="center">0.8068</td>
<td align="center">0.2192</td>
<td align="center">0.5404</td>
<td align="center">0.5528</td>
</tr>
<tr>
<td align="center">Max</td>
<td align="center">0.6289</td>
<td align="center">0.8235</td>
<td align="center">0.8068</td>
<td align="center">0.8171</td>
<td align="center">0.6923</td>
<td align="center">0.7713</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Plots for soft thresholding and dendrogram for our proposed method. <bold>(A)</bold> Power computing for soft thresholding and <bold>(B)</bold> dendrogram plots with dynamic tree cut.</p>
</caption>
<graphic xlink:href="fgene-14-1095330-g003.tif"/>
</fig>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Cluster Validity Index measures of our experiment.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Cluster Validity Index</th>
<th align="center">Score</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Dunn Index</td>
<td align="char" char=".">0.6461</td>
</tr>
<tr>
<td align="left">Average scaled connectivity</td>
<td align="char" char=".">0.6834</td>
</tr>
<tr>
<td align="left">Silhouette width</td>
<td align="char" char=".">&#x2212;0.0012</td>
</tr>
<tr>
<td align="left">Average cluster coefficient</td>
<td align="char" char=".">0.2390</td>
</tr>
<tr>
<td align="left">Average maximum adjacency ratio</td>
<td align="char" char=".">0.2386</td>
</tr>
<tr>
<td align="left">Density</td>
<td align="char" char=".">0.2327</td>
</tr>
<tr>
<td align="left">Centralization</td>
<td align="char" char=".">0.1081</td>
</tr>
<tr>
<td align="left">Heterogeneity</td>
<td align="char" char=".">0.1143</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Feature (gene) names and average (avg.) Pearson&#x2019;s correlation coefficient (PCC) for the pairwise manner within the TCGA LAML signature.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Measure</th>
<th align="left">Value/description</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">&#x23; Features</td>
<td align="left">50</td>
</tr>
<tr>
<td align="left">Gene symbols</td>
<td align="left">
<italic>HK2</italic>, <italic>CHRDL1</italic>, <italic>EFNA1</italic>, <italic>ARNTL</italic>, <italic>EIF4A1</italic>, <italic>MS4A2</italic>, <italic>BMP2</italic>, <italic>FHL2</italic>, <italic>SH2D2A</italic>, <italic>CSF1</italic>, <italic>KLRG1</italic>, <italic>ITGB3</italic>, <italic>SH3BP5</italic>, <italic>CCL4</italic>, <italic>RORA</italic>, <italic>CAMK2D</italic>, <italic>BIRC3</italic>, <italic>TP53</italic>, <italic>S1PR5</italic>, <italic>GNAZ</italic>, <italic>EPOR</italic>, <italic>TBX21</italic>, <italic>GATA3</italic>, <italic>TIAM1</italic>, <italic>IL2RB</italic>, <italic>LRIG1</italic>, <italic>GRAP2</italic>, <italic>PLEKHA1</italic>, <italic>THBS1</italic>, <italic>MAF</italic>, <italic>IL18RAP</italic>, <italic>EDN1</italic>, <italic>ETS1</italic>, <italic>GATA1</italic>, <italic>ITGA2B</italic>, <italic>A2M</italic>, <italic>LCK</italic>, <italic>MAPK13</italic>, <italic>GZMB</italic>, <italic>PTGDR</italic>, <italic>MYBL1</italic>, <italic>RASGRP1</italic>, <italic>ARG1</italic>, <italic>PKLR</italic>, <italic>GNAO1</italic>, <italic>PRF1</italic>, <italic>CD8A</italic>, <italic>FASLG</italic>, <italic>ABCG2</italic>, and <italic>CCL5</italic>
</td>
</tr>
<tr>
<td align="left">Average PCC</td>
<td align="left">0.403</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Plots of the area under curve (AUC) for 10-fold cross-validation.</p>
</caption>
<graphic xlink:href="fgene-14-1095330-g004.tif"/>
</fig>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>Classification metrics for our experiment.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Evaluation metric</th>
<th align="left">Average score (std)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Precision</td>
<td align="left">0.8279 (&#xb1;0.027)</td>
</tr>
<tr>
<td align="left">Sensitivity</td>
<td align="left">0.6912 (&#xb1;0.025)</td>
</tr>
<tr>
<td align="left">Specificity</td>
<td align="left">0.8419 (&#xb1;0.028)</td>
</tr>
<tr>
<td align="left">Accuracy</td>
<td align="left">0.7631 (&#xb1;0.0208)</td>
</tr>
<tr>
<td align="left">AUC</td>
<td align="left">0.8273</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T5" position="float">
<label>TABLE 5</label>
<caption>
<p>Top five significant KEGG pathways and Gene Ontology (GO) terms&#x2a; for the gene set belonging to the LAML signature.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">KEGG pathway name</th>
<th align="left">Gene symbol</th>
<th align="left">Z-score</th>
<th align="left">Adjusted <italic>p</italic>-value</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Rap1 signaling pathway (hsa04015)</td>
<td align="left">
<italic>EFNA1</italic>, <italic>GNAO1</italic>, <italic>TIAM1</italic>, <italic>CSF1</italic>, <italic>ITGB3, ITGA2B</italic>, <italic>THBS1</italic>, and <italic>MAPK13</italic>
</td>
<td align="char" char=".">&#x2212;1.961</td>
<td align="left">7.497 &#xd7; 10&#x2212;06</td>
</tr>
<tr>
<td align="left">PI3K-Akt signaling pathway (hsa04151)</td>
<td align="left">
<italic>EFNA1</italic>, <italic>CSF1</italic>, <italic>ITGB3</italic>, <italic>ITGA2B</italic>, <italic>IL2RB</italic>, <italic>FASLG</italic>, <italic>TP53</italic>, <italic>THBS1,</italic> and <italic>EPOR</italic>
</td>
<td align="char" char=".">&#x2212;2.041</td>
<td align="left">1.128 &#xd7; 10&#x2212;05</td>
</tr>
<tr>
<td align="left">Cytokine&#x2013;cytokine receptor interaction (hsa04060)</td>
<td align="left">
<italic>BMP2</italic>, <italic>IL18RAP</italic>, <italic>CSF1</italic>, <italic>CCL5</italic>, <italic>IL2RB</italic>, <italic>CCL4</italic>, <italic>FASLG</italic>, and <italic>EPOR</italic>
</td>
<td align="char" char=".">&#x2212;1.829</td>
<td align="left">1.437 &#xd7; 10&#x2212;05</td>
</tr>
<tr>
<td align="left">Inflammatory bowel disease (IBD) (hsa05321)</td>
<td align="left">
<italic>MAF</italic>, <italic>IL18RAP</italic>, <italic>TBX21</italic>, <italic>RORA</italic>, and <italic>GATA3</italic>
</td>
<td align="char" char=".">&#x2212;1.858</td>
<td align="left">2.1 &#xd7; 10&#x2212;05</td>
</tr>
<tr>
<td align="left">Proteoglycans in cancer (hsa05205)</td>
<td align="left">
<italic>TIAM1</italic>, <italic>CAMK2D</italic>, <italic>ITGB3</italic>, <italic>FASLG</italic>, <italic>TP53</italic>, <italic>THBS1</italic>, and <italic>MAPK13</italic>
</td>
<td align="char" char=".">&#x2212;1.910</td>
<td align="left">2.1 &#xd7; 10&#x2212;05</td>
</tr>
<tr>
<td align="left">Positive regulation of the cellular metabolic process (GO:BP: GO:0031325)</td>
<td align="left">
<italic>EDN1</italic>, <italic>CSF1</italic>, <italic>CCL5</italic>, <italic>GATA3</italic>, <italic>THBS1</italic>, and <italic>TP53</italic>
</td>
<td align="char" char=".">&#x2212;1.551</td>
<td align="left">8.029 &#xd7; 10&#x2212;05</td>
</tr>
<tr>
<td align="left">Regulation of inflammatory response (GO:BP: GO:0050727)</td>
<td align="left">
<italic>CCL5</italic>, <italic>CCL4</italic>, <italic>RORA</italic>, <italic>GATA3</italic>, <italic>ETS1</italic>, <italic>BIRC3</italic>, and <italic>MAPK13</italic>
</td>
<td align="char" char=".">&#x2212;1.029</td>
<td align="left">8.029 &#xd7; 10&#x2212;05</td>
</tr>
<tr>
<td align="left">Positive regulation of gene expression (GO:BP: GO:0010628)</td>
<td align="left">
<italic>BMP2</italic>, <italic>CSF1</italic>, <italic>TBX21</italic>, <italic>FHL2</italic>, <italic>RORA</italic>, <italic>GATA3</italic>, <italic>ETS1</italic>, <italic>GATA1</italic>, <italic>MYBL1</italic>, <italic>THBS1</italic>, <italic>TP53</italic>, and <italic>ARNTL</italic>
</td>
<td align="char" char=".">&#x2212;1.668</td>
<td align="left">8.029 &#xd7; 10&#x2212;05</td>
</tr>
<tr>
<td align="left">Cytokine-mediated signaling pathway (GO:BP: GO:0019221)</td>
<td align="left">
<italic>CAMK2D</italic>, <italic>IL18RAP</italic>, <italic>CSF1</italic>, <italic>CCL5</italic>, <italic>CCL4</italic>, <italic>IL2RB</italic>, <italic>FASLG</italic>, <italic>RORA</italic>, <italic>GATA3</italic>, <italic>TP53</italic>, and <italic>BIRC3</italic>
</td>
<td align="char" char=".">&#x2212;1.343</td>
<td align="left">8.029 &#xd7; 10&#x2212;05</td>
</tr>
<tr>
<td align="left">Positive regulation of nucleic acid-templated transcription (GO:BP: GO:1903508)</td>
<td align="left">
<italic>BMP2</italic>, <italic>TBX21</italic>, <italic>FHL2</italic>, <italic>RORA</italic>, <italic>GATA3</italic>, <italic>ETS1</italic>, <italic>GATA1</italic>, <italic>MYBL1</italic>, <italic>TP53</italic>, and <italic>ARNTL</italic>
</td>
<td align="char" char=".">&#x2212;2.001</td>
<td align="left">8.029 &#xd7; 10&#x2212;05</td>
</tr>
<tr>
<td align="left">Platelet alpha-granule (GO-CC: GO:0031091)</td>
<td align="left">
<italic>ITGB3</italic>, <italic>ITGA2B</italic>, <italic>A2M</italic>, and <italic>THBS1</italic>
</td>
<td align="char" char=".">&#x2212;1.639</td>
<td align="left">4 &#xd7; 10&#x2212;3</td>
</tr>
<tr>
<td align="left">Platelet alpha-granule membrane (GO-CC: GO:0031092)</td>
<td align="left">
<italic>ITGB3</italic> and <italic>ITGA2B</italic>
</td>
<td align="char" char=".">&#x2212;2.148</td>
<td align="left">0.023</td>
</tr>
<tr>
<td align="left">Core promoter binding (GO-MF: GO:0001047)</td>
<td align="left">
<italic>RORA</italic>, <italic>GATA3</italic>, <italic>GATA1</italic>, <italic>TP53</italic>, and <italic>ARNTL</italic>
</td>
<td align="char" char=".">&#x2212;1.279</td>
<td align="left">8 &#xd7; 10&#x2212;4</td>
</tr>
<tr>
<td align="left">Core promoter sequence-specific DNA binding (GO-MF: GO:0001046)</td>
<td align="left">
<italic>RORA</italic>, <italic>GATA3</italic>, <italic>GATA1</italic>, and <italic>TP53</italic>
</td>
<td align="char" char=".">&#x2212;1.295</td>
<td align="left">1.9 &#xd7; 10&#x2212;3</td>
</tr>
<tr>
<td align="left">Transcription regulatory region DNA binding (GO-MF: GO:0044212)</td>
<td align="left">
<italic>TBX21</italic>, <italic>RORA</italic>, <italic>GATA3</italic>, <italic>GATA1</italic>, <italic>MYBL1</italic>, <italic>TP53</italic>, and <italic>ARNTL</italic>
</td>
<td align="char" char=".">&#x2212;1.322</td>
<td align="left">1.9 &#xd7; 10&#x2212;3</td>
</tr>
<tr>
<td align="left">Cytokine activity (GO-MF: GO:0005125)</td>
<td align="left">
<italic>BMP2</italic>, <italic>EDN1</italic>, <italic>CSF1</italic>, <italic>CCL5</italic>, and <italic>CCL4</italic>
</td>
<td align="char" char=".">&#x2212;1.224</td>
<td align="left">2 &#xd7; 10&#x2212;3</td>
</tr>
<tr>
<td align="left">Transcription factor activity and RNA polymerase II core promoter proximal region sequence-specific binding (GO-MF: GO:0000982)</td>
<td align="left">
<italic>GATA3</italic>, <italic>ETS1</italic>, <italic>GATA1</italic>, <italic>MYBL1</italic>, <italic>TP53</italic>, and <italic>ARNTL</italic>
</td>
<td align="char" char=".">&#x2212;1.604</td>
<td align="left">2.2 &#xd7; 10&#x2212;3</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>&#x2a;Gene Ontology (GO) has three domains: biological process (BP), cellular component (CC), and molecular function (MF).</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
</sec>
<sec sec-type="discussion" id="s4">
<title>4 Discussion</title>
<p>Multi-view data integration and gene signature detection are currently the most challenging tasks for biomedical researchers. As different datasets contain different characteristics, integration of data from multi-platforms with significant feature reduction and gene module detection will give a more comprehensive view of how biology unravels at a granular level. Therefore, we introduced the novel approach of multi-platform data integration technique, 3PNMF-MKL, for multi-platform data integration and gene signature detection. This approach applies the integrated utilization of statistical methods, data fusion through three-factor penalized non-negative matrix factorization, and soft margin hinge loss-based multiple kernel learning. We then tested our approach using TCGA LAML multi-omics dataset, which contains five different profiles (viz., gene expression, DNA methylation, exon expression, pathway activity, and copy number). Overall, our algorithm provides excellent AUC (&#x3d; 0.827) for classifying the class labels for the underlying features (genes) within the chosen gene signature. Furthermore, we performed a functional analysis using the KEGG pathway and Gene Ontology database to interpret those identified relevant feature genes. Collectively, our novel approach is applicable to any kind of multi-modal datasets.</p>
<p>Our proposed method 3PNMF-MKL includes data integration employed by means of differential expression/methylation analysis using limma, non-negative matrix factorization, and soft margin hinge loss, as well as gene signature detection together. 3PNMF-MKL employs the application of best gene module discovery with the help of dynamic linkage clustering, dynamic tree cut, and correlation analysis to achieve the use of best gene module discovery (in terms of gene signature discovery) . So far, there are many state-of-the-art methods available regarding data integration (<xref ref-type="bibr" rid="B27">Yang and Michailidis, 2016</xref>; <xref ref-type="bibr" rid="B20">Ray et al., 2017)</xref> and gene signature discovery (<xref ref-type="bibr" rid="B4">Cun and Frohlich, 2012</xref>; (<xref ref-type="bibr" rid="B29">Zhang and Xiao, 2020)</xref>, but very few existing methods are recently available where data integration and gene signature detection work together in the same framework (<xref ref-type="bibr" rid="B5">Fujita et al., 2018)</xref>. We, here, compared our proposed method 3PNMF-MKL with the existing method (<xref ref-type="bibr" rid="B29">Zhang and Xiao, 2020)</xref> used for TCGA acute myeloid leukemia dataset. In our proposed method, we obtained a 50-gene signature generated after analyzing multi-omics data integration where the other method (<xref ref-type="bibr" rid="B29">Zhang and Xiao, 2020)</xref> produced an eight-gene signature from analyzing the only gene expression data not by multi-omics data integration. Also, we obtained 0.87 as the training set&#x2019;s 1-year AUC and 0.72 as the test set&#x2019;s 1-year AUC in the signature survival study (by cox regression), while the other method obtained 0.86 as the training set&#x2019;s 1-year AUC and 0.69 as the test set&#x2019;s 1-year AUC for the gene expression data. Therefore, in all perspectives, our signatures are stronger than the other.</p>
</sec>
<sec id="s5">
<title>5 Conclusion and future directions</title>
<p>No method, which deals with data integration non-matrix factorization, soft margin hinge loss, and gene signature together, exists in the field of bioinformatics, whereas our work is concerned with the process of integration of multi-omics data employing multi-dimensional schemes such as differential expression/methylation analysis using limma, non-negative matrix factorization, soft margin hinge loss, and gene signature detection through the use of best gene module discovery using dynamic linkage clustering, dynamic tree cut method, and correlation analysis, respectively. The achievement of a high classification accuracy of 0.8273 also represents superior performance for our proposed algorithm. In addition, our method outperformed the state-of-the-art methods in terms of computing AUC. Expansion of our current approach with a deep learning strategy to tackle the integrative problem at a single-cell level is our future directive. In future work, we will collaborate with a wet laboratory to validate our experimental results in order to make it more promising.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="sec" rid="s11">Supplementary Material</xref>, further inquiries can be directed to the corresponding authors.</p>
</sec>
<sec id="s7">
<title>Author contributions</title>
<p>SM and AS formulated the problem and conceived the design of the study. SM, AS, and SN performed the experimental analysis. SD, SG, SP, UM, and ZZ wrote the manuscript. All authors contributed in editing and revising the manuscript.</p>
</sec>
<sec id="s8">
<title>Funding</title>
<p>ZZ was partially supported by the Cancer Prevention and Research Institute of Texas (CPRIT RP170668 and RP180734) (to ZZ). Publication costs were funded by ZZ&#x2019;s Professorship Fund. The funder had no role in the study design, data collection and analysis, decision to publish, or preparation of the manuscript.</p>
</sec>
<sec sec-type="COI-statement" id="s9">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s10">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors, and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s11">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fgene.2023.1095330/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fgene.2023.1095330/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material>
<label>SUPPLEMENTARY FIGURE S1</label>
<caption>
<p>Detailed flowchart of the proposed 3PNMF-MKL framework.</p>
</caption>
</supplementary-material>
<supplementary-material>
<label>SUPPLEMENTARY FIGURE S2</label>
<caption>
<p>Average silhouette width during clustering.</p>
</caption>
</supplementary-material>
<supplementary-material xlink:href="Image1.TIFF" id="SM1" mimetype="application/TIFF" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Image2.TIFF" id="SM2" mimetype="application/TIFF" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bandyopadhyay</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Mallik</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Integrating multiple data sources for combinatorial marker discovery: A study in tumorigenesis</article-title>. <source>IEEE/ACM Trans. Comput. Biol. Bioinform</source> <volume>15</volume>, <fpage>673</fpage>&#x2013;<lpage>687</lpage>. <pub-id pub-id-type="doi">10.1109/TCBB.2016.2636207</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bandyopadhyay</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Mallik</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Mukhopadhyay</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>A survey and comparative study of statistical tests for identifying differential expression from microarray data</article-title>. <source>IEEE/ACM Trans. Comput. Biol. Bioinform</source> <volume>11</volume>, <fpage>95</fpage>&#x2013;<lpage>115</lpage>. <pub-id pub-id-type="doi">10.1109/TCBB.2013.147</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>E. Y.</given-names>
</name>
<name>
<surname>Tan</surname>
<given-names>C. M.</given-names>
</name>
<name>
<surname>Kou</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Duan</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Meirelles</surname>
<given-names>G. V.</given-names>
</name>
<etal/>
</person-group> (<year>2013</year>). <article-title>Enrichr: Interactive and collaborative html5 gene list enrichment analysis tool</article-title>. <source>BMC Bioinforma.</source> <volume>14</volume>, <fpage>128</fpage>. <pub-id pub-id-type="doi">10.1186/1471-2105-14-128</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cun</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Frohlich</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Biomarker gene signature discovery integrating network knowledge</article-title>. <source>Biol. (Basel)</source> <volume>1</volume>, <fpage>5</fpage>&#x2013;<lpage>17</lpage>. <pub-id pub-id-type="doi">10.3390/biology1010005</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fujita</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Mizuarai</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Murakami</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Nakai</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Biomarker discovery by integrated joint non-negative matrix factorization and pathway signature analyses</article-title>. <source>Sci. Rep.</source> <volume>8</volume>, <fpage>9743</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-018-28066-w</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gaur</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Bhandari</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Razdan</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Mallik</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Explanation-driven deep learning model for prediction of brain tumour status using mri image data</article-title>. <source>Front. Genet.</source> <volume>448</volume>, <fpage>822666</fpage>. <pub-id pub-id-type="doi">10.3389/fgene.2022.822666</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ghose</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Alavi</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Tabassum</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Uddin</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Biswas</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Mahbub</surname>
<given-names>K.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Detecting Covid-19 infection status from chest x-ray and ct scan via single transfer learning-driven approach</article-title>. <source>Front. Genet.</source> <volume>13</volume>, <fpage>980338</fpage>. <pub-id pub-id-type="doi">10.3389/fgene.2022.980338</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Henry</surname>
<given-names>V. J.</given-names>
</name>
<name>
<surname>Bandrowski</surname>
<given-names>A. E.</given-names>
</name>
<name>
<surname>Pepin</surname>
<given-names>A.-S.</given-names>
</name>
<name>
<surname>Gonzalez</surname>
<given-names>B. J.</given-names>
</name>
<name>
<surname>Desfeux</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Omictools: An informative directory for multi-omic data analysis</article-title>. <source>Database</source> <volume>2014</volume>, <fpage>bau069</fpage>. <pub-id pub-id-type="doi">10.1093/database/bau069</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Imielinski</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Cha</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Rejtar</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Richardson</surname>
<given-names>E. A.</given-names>
</name>
<name>
<surname>Karger</surname>
<given-names>B. L.</given-names>
</name>
<name>
<surname>Sgroi</surname>
<given-names>D. C.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Integrated proteomic, transcriptomic, and biological network analysis of breast carcinoma reveals molecular features of tumorigenesis and clinical relapse</article-title>. <source>Mol. Cell. Proteomics</source> <volume>11</volume>, <fpage>M111.014910</fpage>. <pub-id pub-id-type="doi">10.1074/mcp.M111.014910</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kandimalla</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Shimura</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Mallik</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Sonohara</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Tsai</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Evans</surname>
<given-names>D. B.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Identification of serum mirna signature and establishment of a nomogram for risk stratification in patients with pancreatic ductal adenocarcinoma</article-title>. <source>Ann. Surg.</source> <volume>275</volume>, <fpage>e229</fpage>&#x2013;<lpage>e237</lpage>. <pub-id pub-id-type="doi">10.1097/SLA.0000000000003945</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Langfelder</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Horvath</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Defining clusters from a hierarchical cluster tree: The dynamic tree cut package for r</article-title>. <source>Bioinformatics</source> <volume>24</volume>, <fpage>719</fpage>&#x2013;<lpage>720</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btm563</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Integration of multi-omics data to mine cancer-related gene modules</article-title>. <source>J. Bioinforma. Comput. Biol.</source> <volume>17</volume>, <fpage>1950038</fpage>. <pub-id pub-id-type="doi">10.1142/S0219720019500380</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mallik</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Bhadra</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Maulik</surname>
<given-names>U.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Identifying epigenetic biomarkers using maximal relevance and minimal redundancy based feature selection for multi-omics data</article-title>. <source>IEEE Trans. Nanobioscience</source> <volume>16</volume>, <fpage>3</fpage>&#x2013;<lpage>10</lpage>. <pub-id pub-id-type="doi">10.1109/TNB.2017.2650217</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mallik</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Graph-and rule-based learning algorithms: A comprehensive review of their applications for cancer type classification and prognosis using genomic data</article-title>. <source>Briefings Bioinforma.</source> <volume>21</volume>, <fpage>368</fpage>&#x2013;<lpage>394</lpage>. <pub-id pub-id-type="doi">10.1093/bib/bby120</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Maulik</surname>
<given-names>U.</given-names>
</name>
<name>
<surname>Mallik</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Mukhopadhyay</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Bandyopadhyay</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Analyzing large gene expression and methylation data profiles using statbicrm: Statistical biclustering-based rule mining</article-title>. <source>PLoS One</source> <volume>10</volume>, <fpage>e0119448</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0119448</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mo</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Seshan</surname>
<given-names>V. E.</given-names>
</name>
<name>
<surname>Olshen</surname>
<given-names>A. B.</given-names>
</name>
<name>
<surname>Schultz</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Sander</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2013</year>). <article-title>Pattern discovery and cancer gene identification in integrated cancer genomic data</article-title>. <source>Proc. Natl. Acad. Sci.</source> <volume>110</volume>, <fpage>4245</fpage>&#x2013;<lpage>4250</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.1208949110</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pellet</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Lefaudeux</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Royer</surname>
<given-names>P.-J.</given-names>
</name>
<name>
<surname>Koutsokera</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Bourgoin-Voillard</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Schmitt</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>A multi-omics data integration approach to identify a predictive molecular signature of clad</article-title>. <source>Eur. Respir. J.</source> <volume>46</volume>, <fpage>OA3271</fpage>. <pub-id pub-id-type="doi">10.1183/13993003.congress-2015.OA3271</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Qiu</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Su</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Multi-omics data integration for identifying osteoporosis biomarkers and their biological interaction and causal mechanisms</article-title>. <source>Iscience</source> <volume>23</volume>, <fpage>100847</fpage>. <pub-id pub-id-type="doi">10.1016/j.isci.2020.100847</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ravasz</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Somera</surname>
<given-names>A. L.</given-names>
</name>
<name>
<surname>Mongru</surname>
<given-names>D. A.</given-names>
</name>
<name>
<surname>Oltvai</surname>
<given-names>Z. N.</given-names>
</name>
<name>
<surname>Barab&#xe1;si</surname>
<given-names>A.-L.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>Hierarchical organization of modularity in metabolic networks</article-title>. <source>Science</source> <volume>297</volume>, <fpage>1551</fpage>&#x2013;<lpage>1555</lpage>. <pub-id pub-id-type="doi">10.1126/science.1073374</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ray</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Fenyo</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Adaptive multiview nonnegative matrix factorization algorithm for integration of multimodal biomedical data</article-title>. <source>Cancer Inf.</source> <volume>16</volume>, <fpage>1176935117725727</fpage>. <pub-id pub-id-type="doi">10.1177/1176935117725727</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ritchie</surname>
<given-names>M. E.</given-names>
</name>
<name>
<surname>Phipson</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Law</surname>
<given-names>C. W.</given-names>
</name>
<name>
<surname>Shi</surname>
<given-names>W.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Limma powers differential expression analyses for rna-sequencing and microarray studies</article-title>. <source>Nucleic acids Res.</source> <volume>43</volume>, <fpage>e47</fpage>. <pub-id pub-id-type="doi">10.1093/nar/gkv007</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Saeed</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Haroon</surname>
<given-names>H. B.</given-names>
</name>
<name>
<surname>Naqvi</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Jhanjhi</surname>
<given-names>N. Z.</given-names>
</name>
<name>
<surname>Ahmad</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Gaur</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2022</year>). &#x201c;<article-title>A systematic mapping study of low-grade tumor of brain cancer and csf fluid detecting approaches and parameters</article-title>,&#x201d; in <source>Approaches and applications of deep learning in virtual medical care</source>, <fpage>236</fpage>&#x2013;<lpage>259</lpage>. <pub-id pub-id-type="doi">10.4018/978-1-7998-8929-8.ch010</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Serra</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Fratello</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Fortino</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Raiconi</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Tagliaferri</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Greco</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Mvda: A multi-view genomic data integration methodology</article-title>. <source>BMC Bioinforma.</source> <volume>16</volume>, <fpage>261</fpage>. <pub-id pub-id-type="doi">10.1186/s12859-015-0680-3</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Smyth</surname>
<given-names>G. K.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>Linear models and empirical bayes methods for assessing differential expression in microarray experiments</article-title>. <source>Stat. Appl. Genet. Mol. Biol.</source> <volume>3</volume>, <fpage>3</fpage>. <pub-id pub-id-type="doi">10.2202/1544-6115.1027</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Gu</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Integrative clustering methods of multi-omics data for molecule-based cancer classifications</article-title>. <source>Quant. Biol.</source> <volume>4</volume>, <fpage>58</fpage>&#x2013;<lpage>67</lpage>. <pub-id pub-id-type="doi">10.1007/s40484-016-0063-4</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Tsang</surname>
<given-names>I. W.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Soft margin multiple kernel learning</article-title>. <source>IEEE Trans. neural Netw. Learn. Syst.</source> <volume>24</volume>, <fpage>749</fpage>&#x2013;<lpage>761</lpage>. <pub-id pub-id-type="doi">10.1109/TNNLS.2012.2237183</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Michailidis</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>A non-negative matrix factorization method for detecting modules in heterogeneous omics multi-modal data</article-title>. <source>Bioinformatics</source> <volume>32</volume>, <fpage>1</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btv544</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Kuster</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Proteomics is not an island: Multi-omics integration is the key to understanding biological systems</article-title>. <source>Mol. Cell. Proteomics</source> <volume>18</volume>, <fpage>S1</fpage>&#x2013;<lpage>S4</lpage>. <pub-id pub-id-type="doi">10.1074/mcp.E119.001693</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Xiao</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Identification and validation of a prognostic 8-gene signature for acute myeloid leukemia</article-title>. <source>Leukemia Lymphoma</source> <volume>61</volume>, <fpage>1981</fpage>&#x2013;<lpage>1988</lpage>. <pub-id pub-id-type="doi">10.1080/10428194.2020.1742898</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>&#x17d;itnik</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Zupan</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Data fusion by matrix factorization</article-title>. <source>IEEE Trans. pattern analysis Mach. Intell.</source> <volume>37</volume>, <fpage>41</fpage>&#x2013;<lpage>53</lpage>. <pub-id pub-id-type="doi">10.1109/TPAMI.2014.2343973</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>