<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Archiving and Interchange DTD v2.3 20070202//EN" "archivearticle.dtd">
<article article-type="methods-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Genet.</journal-id>
<journal-title>Frontiers in Genetics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Genet.</abbrev-journal-title>
<issn pub-type="epub">1664-8021</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">783713</article-id>
<article-id pub-id-type="doi">10.3389/fgene.2021.783713</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Genetics</subject>
<subj-group>
<subject>Methods</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>PathwayMultiomics: An R Package for Efficient Integrative Analysis of Multi-Omics Datasets With Matched or Un-matched Samples</article-title>
<alt-title alt-title-type="left-running-head">Odom et&#x20;al.</alt-title>
<alt-title alt-title-type="right-running-head">Integrative Analysis With PathwayMultiomics</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Odom</surname>
<given-names>Gabriel J.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1554426/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Colaprico</surname>
<given-names>Antonio</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1086099/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Silva</surname>
<given-names>Tiago C.</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1425562/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Chen</surname>
<given-names>X. Steven</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Wang</surname>
<given-names>Lily</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1357200/overview"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Department of Biostatistics, Stempel College of Public Health, Florida International University</institution>, <addr-line>Miami</addr-line>, <addr-line>FL</addr-line>, <country>United&#x20;States</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Department of Public Health Sciences, Miller School of Medicine, University of Miami</institution>, <addr-line>Miami</addr-line>, <addr-line>FL</addr-line>, <country>United&#x20;States</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Sylvester Comprehensive Cancer Center, Miller School of Medicine, University of Miami</institution>, <addr-line>Miami</addr-line>, <addr-line>FL</addr-line>, <country>United&#x20;States</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Dr. John T Macdonald Foundation Department of Human Genetics, Miller School of Medicine, University of Miami</institution>, <addr-line>Miami</addr-line>, <addr-line>FL</addr-line>, <country>United&#x20;States</country>
</aff>
<aff id="aff5">
<sup>5</sup>
<institution>John P. Hussman Institute for Human Genomics, Miller School of Medicine, University of Miami</institution>, <addr-line>Miami</addr-line>, <addr-line>FL</addr-line>, <country>United&#x20;States</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/900071/overview">Farhad Maleki</ext-link>, McGill University, Canada</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1236881/overview">Lingling Jin</ext-link>, University of Saskatchewan, Canada</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1273432/overview">Yan Yan</ext-link>, Thompson Rivers University, Canada</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/201561/overview">Paola Lecca</ext-link>, Free University of Bozen-Bolzano, Italy</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Lily Wang, <email>lily.wang@miami.edu</email>
</corresp>
<fn fn-type="other">
<p>This article was submitted to Computational Genomics, a section of the journal Frontiers in Genetics</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>22</day>
<month>12</month>
<year>2021</year>
</pub-date>
<pub-date pub-type="collection">
<year>2021</year>
</pub-date>
<volume>12</volume>
<elocation-id>783713</elocation-id>
<history>
<date date-type="received">
<day>26</day>
<month>09</month>
<year>2021</year>
</date>
<date date-type="accepted">
<day>07</day>
<month>12</month>
<year>2021</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2021 Odom, Colaprico, Silva, Chen and Wang.</copyright-statement>
<copyright-year>2021</copyright-year>
<copyright-holder>Odom, Colaprico, Silva, Chen and Wang</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these&#x20;terms.</p>
</license>
</permissions>
<abstract>
<p>Recent advances in technology have made multi-omics datasets increasingly available to researchers. To leverage the wealth of information in multi-omics data, a number of integrative analysis strategies have been proposed recently. However, effectively extracting biological insights from these large, complex datasets remains challenging. In particular, matched samples with multiple types of omics data measured on each sample are often required for multi-omics analysis tools, which can significantly reduce the sample size. Another challenge is that analysis techniques such as dimension reductions, which extract association signals in high dimensional datasets by estimating a few variables that explain most of the variations in the samples, are typically applied to whole-genome data, which can be computationally demanding. Here we present pathwayMultiomics, a pathway-based approach for integrative analysis of multi-omics data with categorical, continuous, or survival outcome variables. The input of pathwayMultiomics is pathway <italic>p-</italic>values for individual omics data types, which are then integrated using a novel statistic, the MiniMax statistic, to prioritize pathways dysregulated in multiple types of omics datasets. Importantly, pathwayMultiomics is computationally efficient and does not require matched samples in multi-omics data. We performed a comprehensive simulation study to show that pathwayMultiomics significantly outperformed currently available multi-omics tools with improved power and well-controlled false-positive rates. In addition, we also analyzed real multi-omics datasets to show that pathwayMultiomics was able to recover known biology by nominating biologically meaningful pathways in complex diseases such as Alzheimer&#x2019;s disease.</p>
</abstract>
<kwd-group>
<kwd>pathway analysis</kwd>
<kwd>gene set analysis</kwd>
<kwd>multi-omics</kwd>
<kwd>integrative analysis</kwd>
<kwd>R package</kwd>
<kwd>Alzheheimer&#x2019;s disease</kwd>
</kwd-group>
<contract-num rid="cn001">R01CA158472</contract-num>
<contract-sponsor id="cn001">National Institutes of Health<named-content content-type="fundref-id">10.13039/100000002</named-content>
</contract-sponsor>
</article-meta>
</front>
<body>
<sec id="s1">
<title>Introduction</title>
<p>Recent advances in technology have made multi-omics datasets increasingly available to researchers. For example, The Cancer Genome Atlas (TCGA) and the Clinical Proteomic Tumor Analysis Consortium (CPTAC) have generated comprehensive molecular profiles including genomic, epigenomic, and proteomic expressions on matched samples for many types of human tumors. The underlying hypothesis is that multiple types of molecular profiles (e.g., copy number, DNA methylation, protein) might provide a more coherent and complete signature of the disease process.</p>
<p>To leverage the wealth of information in multi-omics data, a number of integrative analysis strategies have been proposed (<xref ref-type="bibr" rid="B28">Meng et&#x20;al., 2016</xref>; <xref ref-type="bibr" rid="B12">Huang et&#x20;al., 2017</xref>) and compared (<xref ref-type="bibr" rid="B22">Le Cao et&#x20;al., 2009</xref>; <xref ref-type="bibr" rid="B33">Pucher et&#x20;al., 2019</xref>). These methods can be roughly classified into three different categories, characterized by the way they leverage information from the multi-omics datasets. The first group of methods (<xref ref-type="bibr" rid="B31">Parkhomenko et&#x20;al., 2009</xref>; <xref ref-type="bibr" rid="B41">Waaijenborg and Zwinderman, 2009</xref>; <xref ref-type="bibr" rid="B43">Witten and Tibshirani, 2009</xref>; <xref ref-type="bibr" rid="B24">Lin et&#x20;al., 2013</xref>) analyzes only intersecting (i.e.,&#x20;matched) samples from the multiple omics datasets and only shared genes measured by all types of omics platforms. The second group of methods (<xref ref-type="bibr" rid="B7">Dray and Dufour, 2007</xref>; <xref ref-type="bibr" rid="B16">Kaspi and Ziemann, 2020</xref>) analyzes only genes shared by multiple types of omics datasets, which may be measured on the same or distinct samples in different omics datasets. The third group of methods (<xref ref-type="bibr" rid="B8">Gao et&#x20;al., 2004</xref>; <xref ref-type="bibr" rid="B20">Kutalik et&#x20;al., 2008</xref>; <xref ref-type="bibr" rid="B46">Zhang et&#x20;al., 2012</xref>; <xref ref-type="bibr" rid="B27">Meng et&#x20;al., 2014</xref>) analyzes matched samples in multi-omics datasets, where each dataset may have the same or distinct&#x20;genes.</p>
<p>Because of the complexities in multi-omics datasets, effectively extracting biological insights from these datasets remains challenging. A major challenge for multi-omics data analysis is that the samples are often measured on one or a few, but not all, omics data types. Therefore, multi-omics analysis tools that require matched samples (with measurements for all omics data types) as input can significantly limit the sample size when several omics data types are considered. Another challenge is that analysis techniques such as dimension reduction techniques are typically applied to genome-wide data, which can be computationally demanding. Thus, to maximally leverage information from the multi-omics datasets, there is a critical need for developing additional integrative methods that are not restricted to only matched samples and/or shared genes in the input datasets.</p>
<p>Here we present pathwayMultiomics, a pathway-based approach for integrative analysis of multi-omics data. Instead of testing individual genes, pathway analysis tests joint effects of multiple genes belonging to the same biological pathway, such as those defined in the KEGG (<xref ref-type="bibr" rid="B15">Kanehisa et&#x20;al., 2012</xref>) database. Higher power in the pathway-based analysis is achieved by combining weak signals from a number of individual genes in the pathway (<xref ref-type="bibr" rid="B36">Subramanian et&#x20;al., 2005</xref>). The input of pathwayMultiomics is pathway <italic>p-</italic>values for individual omics data types, which are then integrated using a novel statistic, the MiniMax statistic, to prioritize pathways dysregulated in multiple types of omics datasets. Because pathwayMultiomics only requires summary statistics (i.e.,&#x20;pathway <italic>p-</italic>values) as input, it is computationally efficient. In addition, it is also flexible and can be used to analyze multi-omics datasets with categorical, continuous, or survival outcome variables. Importantly, using summary statistics as input allows pathwayMultiomics to maximally leverage information in multi-omics datasets by not restricting to only shared samples and/or genes. Using simulated datasets, we showed that pathwayMultiomics significantly outperforms currently available multi-omics methods with improved power and well-controlled false-positive rates. In addition, we also analyzed multi-omics datasets in Alzheimer&#x2019;s disease to show that pathwayMultiomics was able to recover known biology by nominating biologically meaningful pathways.</p>
</sec>
<sec sec-type="materials|methods" id="s2">
<title>Materials and Methods</title>
<sec id="s2-1">
<title>An Overview of pathwayMultiomics Algorithm</title>
<p>
<xref ref-type="fig" rid="F1">Figure&#x20;1</xref> illustrates the workflow of the pathwayMultiomics analysis pipeline. We next describe the input datasets, analytical algorithm, and output in detail. The pathwayMultiomics package for R can be accessed from <ext-link ext-link-type="uri" xlink:href="https://github.com/TransBioInfoLab/pathwayMultiomics">https://github.com/TransBioInfoLab/pathwayMultiomics</ext-link>.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Workflow of pathwayMultiomics analysis.</p>
</caption>
<graphic xlink:href="fgene-12-783713-g001.tif"/>
</fig>
<sec id="s2-1-1">
<title>Input Datasets</title>
<p>The input dataset consists of omics datasets for several different molecular traits, such as SNPs, DNA methylation (DNAm), copy number alterations (CNAs), or gene expressions. Of particular interest are dysregulated pathways at multiple molecular levels, for example, those with changes in both DNA methylation and gene expressions. Importantly, pathwayMultiomics is flexible; the samples can be either matched (multiple types of molecular traits are measured on the same set of samples), or un-matched (distinct samples from the same disease are measured with different types of omics technology). Moreover, because the units of analyses for pathwayMultiomics are pathways (i.e.,&#x20;groups of genes participating in the same biological processes), different omics datasets can also include different genes, as long as pathway-level association statistics that relate each type of omics profiles to the phenotype (e.g., pathway <italic>p-</italic>values) can be computed. This flexibility enables pathwayMultiomics to take advantage of different pathway analysis software to model and account for special characteristics in different types of omics datasets. For example, for pathway analysis of DNAm data, the missMethyl method (<xref ref-type="bibr" rid="B32">Phipson et&#x20;al., 2016</xref>), which takes account of the varying number of probes mapped to each gene, could be used. For pathway analysis of gene expression data, pathwayPCA method (<xref ref-type="bibr" rid="B29">Odom et&#x20;al., 2020</xref>), which selects the coherent subset of genes before estimating and testing principal components with phenotypes, could be applied.</p>
</sec>
<sec id="s2-1-2">
<title>MiniMax Statistic</title>
<p>Given pathway <italic>p-</italic>values for each omics data type, pathwayMultiomics next computes the MiniMax statistic. To this end, we first consider all pairs of <italic>p-</italic>values from different omics types and take the maximum for each pair of <italic>p-</italic>values. Next, we take the <bold>mini</bold>mum of all <bold>max</bold>imums computed from the last step. For example, suppose we are interested in an apoptosis pathway for a cancer study, which has <italic>p-</italic>values of 0.01, 0.03, and 0.05 for copy number variations, gene expressions, and protein assays, respectively. We then have a total of three pairs of <italic>p-</italic>values (0.01, 0.03), (0.01, 0.05), (0.03, 0.05), with maximums 0.03, 0.05, and 0.05 respectively. The MiniMax statistic is the smallest value of these maximums, which is 0.03. Intuitively, the MiniMax statistic provides a way to identify pathways with differential changes (i.e.,&#x20;small <italic>p-</italic>values) in <italic>at least two</italic> types of omics data. Note that in this case, the MiniMax statistic is equivalent to taking the second smallest <italic>p-</italic>value among all <italic>p-</italic>values; that is, the second-order statistic, <italic>P</italic>
<sub>(2)</sub>, of the pathway <italic>p-</italic>values. Instead of considering pairs of <italic>p-</italic>values, the MiniMax statistic can also be computed for triplets or quadruplets of <italic>p-</italic>values from three, four, or more types of omics data similarly to identify pathways with differential changes (i.e.,&#x20;small <italic>p</italic>-values) in more than two types of omics&#x20;data.</p>
</sec>
<sec id="s2-1-3">
<title>Statistical Significance Assessment</title>
<p>To compute <italic>p-</italic>values for the MiniMax statistic, pathwayMultiomics has two modes: 1) by approximation or 2) by simulation. More specifically, the &#x201c;approximation&#x201d; approach is based on the theory that when different types of omics data are independent, the <italic>r</italic>th order statistic <italic>p</italic>
<sub>(r)</sub> of the <italic>p</italic>-values follows a Beta distribution, that is, <inline-formula id="inf1">
<mml:math id="m1">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>r</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msub>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#x223c;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mi mathvariant="normal">&#x212c;</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>r</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x3b2;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>G</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>r</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula id="inf2">
<mml:math id="m2">
<mml:mrow>
<mml:mi mathvariant="normal">&#x212c;</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mo>&#x22c5;</mml:mo>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> denotes the Beta distribution and <inline-formula id="inf3">
<mml:math id="m3">
<mml:mi>G</mml:mi>
</mml:math>
</inline-formula> is the number of different types of omics data (<xref ref-type="bibr" rid="B10">Gentle, 2009</xref>; <xref ref-type="bibr" rid="B13">Jones, 2009</xref>). Therefore, for integrative analysis that identifies pathways with differential changes in at least two types of omics datasets, the MiniMax statistic is the second-order statistic and has the distribution <inline-formula id="inf4">
<mml:math id="m4">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msub>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#x223c;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mi mathvariant="normal">&#x212c;</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mn>3</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="normal">&#x212c;</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mn>2,2</mml:mn>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> under the null hypotheses. The &#x201c;approximation&#x201d; approach is easy to compute and is useful when computational resources are limited or when raw data in different omics data types are not available.</p>
<p>On the other hand, in the &#x201c;simulation&#x201d; approach, we simulate the distribution of MiniMax statistics under the null hypothesis, that is, when there is no association between phenotype and the pathway in each type of omics data. More specifically, we generate random phenotype labels for each sample and then re-compute pathway <italic>p-</italic>values. These resulting <italic>p-</italic>values are our empirical null <italic>p-</italic>values. To account for non-independence in the different data types, instead of using the above formula, we estimate values for <inline-formula id="inf5">
<mml:math id="m5">
<mml:mi>&#x3b1;</mml:mi>
</mml:math>
</inline-formula> and <inline-formula id="inf6">
<mml:math id="m6">
<mml:mi>&#x3b2;</mml:mi>
</mml:math>
</inline-formula> from the empirical null <italic>p-</italic>values. In practice, we have found that the more correlated the <italic>p</italic>-values are across the multi-omics platforms, the smaller <inline-formula id="inf7">
<mml:math id="m7">
<mml:mrow>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>&#x3b1;</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>,</mml:mo>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>&#x3b2;</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>&#x3c;</mml:mo>
<mml:mi>G</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> are. The &#x201c;simulation&#x201d; approach provides more accurate statistical significance estimation and is recommended when both raw data for different omics and large computational resources are available.</p>
</sec>
<sec id="s2-1-4">
<title>Output</title>
<p>The output of pathwayMultiomics is prioritized pathways with small <italic>p-</italic>values in multiple omics data types, the MiniMax statistic and significance level for each pathway, and the omics data types that were contributing to the MiniMax statistic. For example, in the apoptosis pathway example we described above, the MiniMax statistic was 0.03, its <italic>p</italic>-value (using the approximate <inline-formula id="inf8">
<mml:math id="m8">
<mml:mrow>
<mml:mi mathvariant="normal">&#x212c;</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mn>2,2</mml:mn>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> distribution) would be 0.0026, and the omics data that contributed to MiniMax statistic were the copy number variations and gene expression&#x20;data.</p>
</sec>
</sec>
<sec id="s2-2">
<title>Design of Simulation Studies</title>
<p>We performed a comprehensive simulation study to evaluate and compare the performance of the proposed pathwayMultiomics approach with four alternative methods for prioritizing pathways enriched with concordant but often subtle associations signals. To simulate multi-omics datasets with realistic correlation patterns, we used the TCGA COADREAD dataset (<xref ref-type="bibr" rid="B40">Vasaikar et&#x20;al., 2018</xref>) as our input dataset, which included 614, 222, and 90 samples of copy number alterations (CNAs), gene expression, and proteomics data, respectively. More specifically, the CNA data included gene-level GISTIC2 log<sub>2</sub> ratios for 24,776 genes; gene expression data included normalized counts (<inline-formula id="inf9">
<mml:math id="m9">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>log</mml:mi>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> transformation) of 6,149 genes generate by the Illumina GenomeAnalyzer platform; and the proteins data include log-ratio normalized protein expression levels of 5,538&#x20;genes.</p>
<p>To simulate multi-omics datasets for a collection of pathways, we first created synthetic pathways by performing hierarchical clustering on the 1,710 genes measured by all three types of assays for CNA, gene expression, and protein. More specifically, first, a data matrix with 1,710 genes and 928 samples (from the 623 subjects with at least one type of omics data) was created. Next, within each data type, data for each gene were centered and scaled. Finally, a modified Ward&#x2019;s method (method &#x3d; &#x201c;ward.D&#x201d; in hclust() function) was then used to partition the genes into 50 clusters or 50 synthetic pathways. The number of genes in the resulting pathways ranged from 9 to 74, with an average of 34&#x20;genes.</p>
<p>Next, we simulated treated (i.e.,&#x20;true positive) and un-treated (i.e.,&#x20;true negative) pathways. First, we randomly assigned each of the 623 subjects to one of two cancer subtypes: A or B. Next, among the 50 synthetic pathways, we selected five pathways to be our true positive pathways, and treatment effects at different levels (&#xb5; &#x3d; 0.1, 0.2, 0.3, 0.4, 0.5) were added to a subset of genes (<italic>p</italic>&#x20;&#x3d; 20, 40, 60, 80%) within each pathway in each of the multi-omics datasets for samples in subtype A group. This process was then repeated 100&#x20;times to create 100 simulated multi-omics datasets, each including 50 pathways, among which 5 pathways are true positive pathways. Overall, we generated datasets for a total of 20 simulation scenarios (5 values for &#xb5; &#xd7; 4 values for <italic>p</italic>). This benchmark dataset (available at <ext-link ext-link-type="uri" xlink:href="https://zenodo.org/record/5683002">https://zenodo.org/record/5683002&#x23;.YZF5SGDMKUk</ext-link>), which was systematically modified from real multi-omics data, can be used for reproducing analyses in this study as well as benchmarking future multi-omics data analysis methods.</p>
<p>To evaluate the false positive rate of each method, we also repeated the same procedures described above, except by setting &#xb5; &#x3d; 0 (i.e.,&#x20;not adding any treatment effect). Multi-omics data was created for a total of 5,000 pathways by generating random sample labels 100&#x20;times for the 50 synthetic pathways. The false-positive rate (i.e.,&#x20;test size) for each method was then estimated by the percentage of pathways <italic>p</italic>-values less than&#x20;0.05.</p>
<p>Given the known status of the pathways, we next computed the area under the ROC curve (AUC) for each method. The receiver operating characteristic (ROC) curves is a plot of sensitivity versus 1-specificity as the cutoff for declaring significant pathways is varied. AUC assesses the overall discriminative ability of the methods to determine whether a given pathway is significantly associated with the phenotype (i.e.,&#x20;subtype group of the samples) over all possible significance cutoffs. More specifically, for each of the simulation scenarios, we recorded the rankings of the 50 pathways from most to least extreme (by either a <italic>p</italic>-value, test statistic, or score returned by a method), constructed ROC curves, and estimated AUC for each method.</p>
</sec>
<sec id="s2-3">
<title>Methods Compared in the Simulation Study</title>
<p>We compared pathwayMultiomics with four alternative multi-omics analysis methods: Sparse Multiple Canonical Correlation Analysis (sparse mCCA) (<xref ref-type="bibr" rid="B43">Witten and Tibshirani, 2009</xref>), MFA (<xref ref-type="bibr" rid="B7">Dray and Dufour, 2007</xref>), iProFun (<xref ref-type="bibr" rid="B35">Song et&#x20;al., 2019</xref>), and mitch (<xref ref-type="bibr" rid="B16">Kaspi and Ziemann, 2020</xref>). We chose mCCA to represent multi-omics matrix factorization techniques because it performed best in a recent comparative study of multi-omics analysis methods (<xref ref-type="bibr" rid="B33">Pucher et&#x20;al., 2019</xref>). The last three methods, mitch, iProFun, and MFA were chosen because they were proposed in recent years and can also be applied to un-matched or partially matched datasets (<xref ref-type="table" rid="T1">Table&#x20;1</xref>). Note that each of these tools was designed specifically for the analysis of multi-omics data, either matching by samples, genomic features (e.g., gene or probe), or both. In the following, we briefly describe each of the methods compared in our simulation study. In the following, we briefly describe each of the methods compared in our simulation&#x20;study.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Methods compared by simulation study. Methods that analyze only matched samples would require multiple types of molecular data (e.g., gene expression and protein) to be generated for the same subject, methods that analyzes only matched genes would require multiple types of molecular data to be generated for the same gene. Summary data refers to resulting statistics such as <italic>p</italic>-values or <italic>t</italic>-statistics from differential expression analysis for genes or pathways. All function calls used default function arguments unless specified.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Method</th>
<th align="center">Matches on</th>
<th align="center">Analyzes only matched samples</th>
<th align="center">Analyzes only matched genes</th>
<th align="center">Can analyze summary data</th>
<th align="center">Implementation R package::function</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">sCCA</td>
<td align="left">Samples measured by all omics data types</td>
<td align="center">Yes</td>
<td align="center">Yes</td>
<td align="center">No</td>
<td align="left">PMA::MultiCCA.permute() with nperms &#x3d; 100; and PMA::MultiCCA()</td>
</tr>
<tr>
<td align="left">MFA</td>
<td align="left">Features (e.g., genes)</td>
<td align="center">No</td>
<td align="center">Yes</td>
<td align="center">No</td>
<td align="left">ade4::ktab.list.df() and ade4:mfa() with option &#x3d; &#x201c;lambda1&#x201d;</td>
</tr>
<tr>
<td align="left">mitch</td>
<td align="left">Features (e.g., genes)</td>
<td align="center">No</td>
<td align="center">Yes</td>
<td align="center">Yes</td>
<td align="left">mitch::mitch_calc() with minsetsize &#x3d; 5 and priority &#x3d; &#x201c;effect&#x201d;</td>
</tr>
<tr>
<td align="left">iProFun</td>
<td align="left">Samples measured on at least two omics data types</td>
<td align="center">No</td>
<td align="center">Yes</td>
<td align="center">No</td>
<td align="left">iProFun::iProFun_permutate() with parameters in package example (pi &#x3d; rep (0.05, 2); grids &#x3d; c (seq (0.75, 0.99, 0.01), seq (0.991, 0.999, 0.001), seq (0.9991, 0.9999, 0.0001)); filter &#x3d; 1; seed &#x3d; 123).</td>
</tr>
<tr>
<td align="left">pathwayMultiomics</td>
<td align="left">Pathways</td>
<td align="center">No</td>
<td align="center">No</td>
<td align="center">Yes</td>
<td align="left">pathwayMultiomics:MiniMax() with parameters orderStat &#x3d; 2 and method &#x3d; &#x201c;parametric"</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Abbreviations: sCCA, Sparse Canonical Correlates Analysis; MFA, Multi-Factor Analysis; mitch, multivariate gene set enrichment analysis; iProFun, Integrative Proteogenomic Functional Traits Analysis.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<sec id="s2-3-1">
<title>pathwayMultiomics</title>
<p>To compute pathway <italic>p-</italic>values for single omics data, we used pathwayPCA R package (<xref ref-type="bibr" rid="B29">Odom et&#x20;al., 2020</xref>). PathwayPCA integrates prior biological knowledge to extract Adaptive Elastic-net Sparse PCs (AES-PCs) within each pathway for each omics dataset separately, the first AES-PC with the largest variance was then tested against binary outcome &#x201c;cancer subtype&#x201d; using a logistic regression model. The pathway <italic>p-</italic>values for each type of omics data were then used as input for pathwayMultiomics, to identify pathways dysregulated in more than one omics data type. Because the pathway <italic>p-</italic>values are calculated for each omics dataset separately, the statistical accuracy and power in pathwayMultiomics analysis will not change as the number of matched samples or shared features decreases.</p>
</sec>
<sec id="s2-3-2">
<title>Sparse Multiple Canonical Correlates Analysis (sCCA)</title>
<p>Sparse Canonical Correlation Analysis (sCCA) is a matrix factorization method that uses penalized multivariate analysis for identifying linear combinations of two groups of variables that are highly correlated. <xref ref-type="bibr" rid="B43">Witten and Tibshirani (2009)</xref> (<xref ref-type="bibr" rid="B43">Witten and Tibshirani, 2009</xref>) extended sCCA to sparse multiple CCA (mCCA), which can perform integrative analysis of more than two sets of variables measured on the same subjects. In the first step, sparse mCCA finds the set of intersecting (i.e.,&#x20;shared) samples and genes across all multi-omics datasets, i.e.,&#x20;the same set of genes are measured on the same subjects in each of the omics datasets. Therefore, the statistical accuracy and power of sparse mCCA to detect multi-omics changes will decrease as the number of shared samples or features decreases because samples or features not shared across all data sets will be discarded. In particular, in the TCGA COADREAD multi-omics datasets, only 71 samples and 1710 genes were measured on all three omics data types (CNA, gene expression, protein). Next, sparse mCCA uses a permutation procedure to determine the thresholds and to extract a single vector of selected genes for each omics data type. The union of these selected genes from each omics data type is then taken as the genes selected by sparse multiple CCA. Finally, a Fisher&#x2019;s Exact Test is used to determine if a pathway is enriched with selected genes. We used mCCA implemented via the MultiCCA() function in the PMA R package (<ext-link ext-link-type="uri" xlink:href="https://cran.r-project.org/web/packages/PMA/index.html">https://cran.r-project.org/web/packages/PMA/index.html</ext-link>), optimal weights and penalties were identified by the MultiCCA.permute() function.</p>
</sec>
<sec id="s2-3-3">
<title>Multi-Factor Analysis (MFA)</title>
<p>The MFA method is also a matrix factorization technique, but it differs from sparse mCCA in that it only requires data to be matched on features rather than samples. For MFA analysis of multi-omics data, the main requirement is that the same set of <italic>p</italic> genes are measured on all omics data types on potentially different subjects. Therefore, the statistical accuracy and power of MFA to detect multi-omics changes will not be affected by the number of matched samples, but will decrease as the number of shared features decreases, because features not shared across all data sets will be discarded. In the first step, MFA reshapes data by stacking the multi-omics datasets, each with samples as rows and the same <italic>p</italic> genes as columns. Next, MFA performs a weighted principal components analysis, where the weights from each data set are inversely related to the principal eigenvalue of the data set (a measurement of the overall variability in the dataset). Then, genes are given a score measuring its concordance across the datasets for different omics types, where the distribution of these scores follows <italic>N</italic> (<italic>0, p</italic>
<sup>&#x2212;1/2</sup>) where <italic>p</italic> is the number of genes measured on all omics data types. Finally, genes with upper-sided <italic>p</italic>-values <inline-formula id="inf10">
<mml:math id="m10">
<mml:mrow>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>0.05</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> are selected, and Fisher&#x2019;s Exact Test is used to identify pathways significantly enriched with selected genes. We implemented the MFA method using the mfa() function in ade4&#x20;R package under default settings.</p>
</sec>
<sec id="s2-3-4">
<title>Multi-Contrast Pathway Enrichment Analysis (mitch)</title>
<p>The mitch method is very similar to the proposed MiniMax statistic because it also computes pathway-level enrichment scores from summary statistics rather than using the data itself. There are several steps in the mitch algorithm: first, users identify the set of <inline-formula id="inf11">
<mml:math id="m11">
<mml:mi>p</mml:mi>
</mml:math>
</inline-formula> genes measured by all <italic>G</italic> omics data types, and subsets the multi-omics datasets to include only these <inline-formula id="inf12">
<mml:math id="m12">
<mml:mi>p</mml:mi>
</mml:math>
</inline-formula> genes. Next, for each omics dataset, methods appropriate for each platform (e.g., DESeq2 for RNASeq data) are used to compute gene-wise summary statistics or gene scores (e.g., <italic>p-</italic>values or <italic>t</italic>-statistics) that associate each gene with the phenotype. This step produces a <italic>p</italic>&#x20;&#xd7; <italic>G</italic> data matrix (i.e.,&#x20;<italic>p</italic> genes &#xd7; <italic>G</italic> omics data types). Therefore, the statistical accuracy and power of mitch to detect multi-omics changes will not be affected by the number of matched samples, but will decrease as the number of shared features decreases, because features not shared across all data sets will be discarded. Finally, for each pathway, mitch performs a one-way MANOVA to test if gene scores across the <italic>G</italic> omics data types are different for genes within the pathways compared to background genes. We compared the mitch algorithm, computed using the mitch_calc() routine from the mitch R package with priority &#x3d; &#x201c;effect&#x201d;, with two alternative gene-wise summary statistics: the gene-specific <italic>t</italic>-statistic obtained after fitting a linear model that associated each gene with subtype group effect (labeled as &#x201c;mitch_tStat&#x201d; in <xref ref-type="fig" rid="F2">Figure&#x20;2</xref>), and the gene-specific <italic>p-</italic>values from the same linear models (labeled as &#x201c;mitch_pValue&#x201d;). Note that using the <italic>t</italic>-statistic accounted for different directions of associations among genes while using the <italic>p</italic>-value did&#x20;not.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Performance of different multi-omics analysis methods in the simulation study. To simulate multi-omics datasets, we used the TCGA COADREAD datasets (in copy number alterations, gene expressions and proteomics data) as an input, created 50 synthetic pathways by clustering genes measured by all three types of omics data, and then added treatment with different effect sizes (mu) to a proportion (p &#x3d; 0.2, 0.4, 0.6, 0.8) of the genes. This process was repeated for 100&#x20;times to create 100 simulated multi-omics datasets for each simulation scenario (i.e.,&#x20;different combinations of mu and p). Shown are area under ROC curves (AUCs) for each method averaged over 100 simulation datasets at each simulation scenario.</p>
</caption>
<graphic xlink:href="fgene-12-783713-g002.tif"/>
</fig>
</sec>
<sec id="s2-3-5">
<title>Integrative Screening for Proteogenomic Functional Traits (iProFun)</title>
<p>The iProFun method (<xref ref-type="bibr" rid="B35">Song et&#x20;al., 2019</xref>) aims to detect DNA copy numbers (CNA) and methylation alterations (DNAm) with downstream functional consequences in mRNA expression levels, global protein abundances, or phosphoprotein abundances. In the first step, iProFun fits three linear models, each with a molecular trait (mRNA, global protein, or phosphoprotein) as the outcome, and CNA or DNAm as the predictor, along with additional covariate variables (e.g., age, sex). Next, multiple comparison correction is applied to <italic>p-</italic>values of the predictor (CNA or DNAm) in each of the three linear models, and genes with at least one significant predictor are selected. Finally, Fisher&#x2019;s Exact Test is used to identify pathways enriched with selected genes. Notably, iProFun allows more flexibility in the input dataset and can take advantage of samples not completely measured on all omics types. Specifically, iProFun requires samples to be measured by at least one genomic (e.g., copy number, DNA methylation) trait and at least one transcriptomic (i.e.,&#x20;mRNA) or proteomic (e.g., global, phosphor protein) trait, but it does not require samples to be measured by more than one genomic trait or more than one transcriptomic/proteomic traits. In the simulation study, the number of shared samples analyzed by iProFun were 216 (copy number and RNAseq) and 88 (copy number and proteomics). The statistical accuracy and power of sparse iProFun to detect multi-omics changes will decrease as the number of these shared samples (between copy number and RNAseq, or between copy number and proteomics) decreases, because samples not shared by at least two data sets will be discarded. In our simulation study, we used the iProFun_permutate() function in the iProFun package to independently predict synthetic gene expressions and proteomics data from simulated copy number aberrations. Default parameter values, as shown in package examples, were used for all functions.</p>
</sec>
</sec>
<sec id="s2-4">
<title>Analysis of Multi-Omics Datasets in Alzheimer&#x2019;s Disease</title>
<sec id="s2-4-1">
<title>pathwayMultiomics Analysis</title>
<p>We next applied pathwayMultiomics to analyze a set of multi-omics datasets in Alzheimer&#x2019;s disease. The input of pathwayMultiomics analysis is pathway <italic>p-</italic>values for single omics data. Therefore, we first performed pathway analysis for genetic variants, DNAm, and gene expressions using the mixed model approach (<xref ref-type="bibr" rid="B42">Wang et&#x20;al., 2011</xref>), MissMethyl (<xref ref-type="bibr" rid="B32">Phipson et&#x20;al., 2016</xref>), and fgsea (<xref ref-type="bibr" rid="B18">Korotkevich et&#x20;al., 2021</xref>) methods, which were specifically designed for pathway analyses of these different omics data&#x20;types.</p>
<p>More specifically, for the analysis of genetic variants, <xref ref-type="bibr" rid="B19">Kunkle et&#x20;al. (2019)</xref> (<xref ref-type="bibr" rid="B19">Kunkle et&#x20;al., 2019</xref>) described a recent large meta-analysis of more than 90,000 individuals to identify genetic variants associated with AD. We downloaded summary statistics for individual variants obtained in this study from <ext-link ext-link-type="uri" xlink:href="https://www.niagads.org/igap-rv-summary-stats-kunkle-p-value-data">https://www.niagads.org/igap-rv-summary-stats-kunkle-p-value-data</ext-link> (&#x201c;Kunkle_et&#x20;al._Stage1_results.txt&#x201d;). Next, we performed GWAS pathway analysis using the mixed model approach (<xref ref-type="bibr" rid="B42">Wang et&#x20;al., 2011</xref>), which tested the combined association signals from a group of variants in the same pathway against the null hypothesis that there is no overall association between SNPs in a pathway and the outcome (i.e.,&#x20;AD status). An empirical null distribution, estimated using the bacon R package (<xref ref-type="bibr" rid="B39">van Iterson et&#x20;al., 2017</xref>), was used to estimate the statistical significance of the pathways.</p>
<p>For the analysis of DNA methylation data, we recently performed a meta-analysis of more than 1,000 prefrontal cortex brain samples (<xref ref-type="bibr" rid="B44">Zhang et&#x20;al., 2020</xref>) to identify epigenetic changes associated with AD Braak stage, a standardized measure of neurofibrillary tangle burden determined at autopsy. Braak scores range from 0 to 6, corresponding to increased severity of the disease (<xref ref-type="bibr" rid="B2">Braak and Braak, 1995</xref>). <xref ref-type="sec" rid="s11">Supplementary Tables 1, 2</xref> in <xref ref-type="bibr" rid="B44">Zhang et&#x20;al. (2020)</xref> included summary statistics for 3,751 differentially methylated individual CpGs and 119 differentially methylated regions (DMRs) that reached a 5% FDR significance threshold in our meta-analysis. The combined collections of the significant individual CpGs and CpGs located in the DMRs were then used as input for pathway analysis via the MissMethyl R package (<xref ref-type="bibr" rid="B32">Phipson et&#x20;al., 2016</xref>), which performs over-representation analysis by determining if AD Braak-associated CpGs are significantly enriched in a pathway. In particular, MissMethyl models the multiple probes mapped to each gene on the methylation arrays using the Wallenius&#x2019; noncentral hypergeometric&#x20;test.</p>
<p>For the analysis of RNASeq data, we analyzed 640 samples of RNAseq data measured on postmortem prefrontal cortex brain samples in the ROSMAP AD study. Normalized FPKM (Fragments Per Kilobase of transcript per Million mapped reads) gene expression values generated by the ROSMAP AD study were downloaded from the AMP-AD Knowledge Portal (Synapse ID: syn3388564). For each gene, we assessed the association between gene expression and Braak stage. More specifically, for each gene, we fitted the linear model log2 (normalized FPKM values &#x2b;1) &#x223c; Braak stage &#x2b; ageAtDeath &#x2b; sex &#x2b; markers for cell types. The last term, &#x201c;markers for cell types,&#x201d; included multiple covariate variables to adjust for the multiple types of cells in the brain samples. Specifically, we estimated expression levels of genes that are specific for the five main cell types present in the CNS: ENO2 for neurons, GFAP for astrocytes, CD68 for microglia, OLIG2 for oligodendrocytes, and CD34 for endothelial cells, and included these as variables in the above linear regression model, as was done in a previous large study of AD samples (<xref ref-type="bibr" rid="B6">De Jager et&#x20;al., 2014</xref>). This linear model identifies genes for which gene expressions are associated with AD Braak stage linearly (<xref ref-type="bibr" rid="B44">Zhang et&#x20;al., 2020</xref>). For pathway analysis, we ranked each gene by <italic>p-</italic>values for the Braak stage in the above linear model, which was then used as input for the Fast Gene Set Enrichment Analysis (fgsea) (<xref ref-type="bibr" rid="B18">Korotkevich et&#x20;al., 2021</xref>) software. The fgsea software performs pathway analysis of genome-wide gene expression data by determining if genes within a pathway are enriched on top of the gene list (ranked by gene-wise differential gene expression <italic>p-</italic>values) compared to the rest of the&#x20;genes.</p>
<p>The pairwise correlations of <italic>p-</italic>values in individual omics data types are very small, at <italic>&#x3c1;</italic> &#x3d; 0.0045 (SNP pathway <italic>p</italic>-values vs. DNAm pathway <italic>p</italic>-values), &#x2212;0.0263 (SNP pathway <italic>p</italic>-values vs. RNAseq pathway <italic>p</italic>-values), and 0.0432 (DNAm pathway <italic>p</italic>-values vs. RNAseq pathway <italic>p-</italic>values). In pathwayMultiomics, we used the approximation approach, supported by the relatively low pairwise correlations in pathway <italic>p-</italic>values of individual omics data&#x20;types.</p>
</sec>
<sec id="s2-4-2">
<title>mitch Analysis</title>
<p>The input of mitch R package is summary statistics for genes such as <italic>p-</italic>values for different types of omics data. For the GWAS meta-analysis results described in (<xref ref-type="bibr" rid="B19">Kunkle et&#x20;al., 2019</xref>), we assigned SNPs to a gene if they were located within 5&#xa0;kb upstream of the first exon or downstream of the last exon (<xref ref-type="bibr" rid="B42">Wang et&#x20;al., 2011</xref>). Next, we represented each gene by the smallest <italic>p-</italic>value if there are multiple SNPs associated with it. To remove selection bias due to different numbers of SNPs associated with each gene (i.e.,&#x20;the smallest <italic>p-</italic>value for a gene with many SNPs is likely to be smaller than the smallest <italic>p-</italic>value for a gene with only a few SNPs), we next fit a generalized additive model using the R package gam: <inline-formula id="inf13">
<mml:math id="m13">
<mml:mrow>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x223c;</mml:mo>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>n</mml:mi>
<mml:mo>.</mml:mo>
<mml:mi>l</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>k</mml:mi>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> where <italic>Y</italic>
<sub>
<italic>i</italic>
</sub> is - log<sub>10</sub> transformation of the smallest <italic>p-</italic>value for gene <italic>i</italic>, <italic>n. links</italic>
<sub>
<italic>i</italic>
</sub> is the number of SNPs associated with gene <italic>i,</italic> and <italic>f</italic> is a spline function. We assumed gamma distribution for <italic>Y</italic>
<sub>
<italic>i</italic>
</sub>, as under the null hypothesis of no association, <italic>Y</italic>
<sub>
<italic>i</italic>
</sub> follows the chi-square distribution (a special case of gamma distribution). The spline model allows us to model linear and nonlinear associations between the number of SNPs mapped to a gene and the strength of significance for the gene as previously described (<xref ref-type="bibr" rid="B45">Zhang et&#x20;al., 2021</xref>). The residuals from this model, which represented -log<sub>10</sub> transformation of the <italic>p-</italic>values with gene size effects removed, were then estimated, and used as input for genetic data in&#x20;mitch.</p>
<p>Similarly, for the analysis of DNA methylation data, we assigned CpGs to genes based on Illumina annotation, represented each gene by the CpG with the smallest <italic>p-</italic>value, and removed the bias due to gene size using the same spline model described above, except <italic>n. links</italic>
<sub>
<italic>i</italic>
</sub> is the number of CpGs associated with gene <italic>i.</italic> The residuals from the spline model were then used as input for DNAm data in&#x20;mitch.</p>
<p>For the analysis of RNAseq data, we used the R package fgsea&#x20;(<xref ref-type="bibr" rid="B18">Korotkevich et&#x20;al., 2021</xref>). For each gene, we fit a linear model log2 (normalized FPKM values &#x2b;1) &#x223c; Braak stage &#x2b; ageAtDeath &#x2b; sex &#x2b; markers for cell types. As described above, the last term, &#x201c;markers for cell types&#x201d; included covariate variables (marker gene expressions of ENO2, <italic>GFAP, CD68, OLIG2, CD34</italic>) to adjust for the multiple types of cells in the brain samples. The -log10 transformation of the <italic>p-</italic>values for the Braak stage in the above model was then used as input for RNASeq data in&#x20;mitch.</p>
<p>All analyses were performed using the R software (version 4.0) and SAS software (version 9.4). We used the venny tool (<xref ref-type="bibr" rid="B30">Oliveros, 2007</xref>-2015). To account for multiple comparisons, we computed the false discovery rate using the method of Benjamini and Hochberg (<xref ref-type="bibr" rid="B1">Benjamini Y and Y, 1995</xref>). The scripts for the analysis performed in this study can be accessed at <ext-link ext-link-type="uri" xlink:href="https://github.com/TransBioInfoLab/pathwayMultiomics_manuscript_supplement">https://github.com/TransBioInfoLab/pathwayMultiomics_manuscript_supplement</ext-link>.</p>
</sec>
</sec>
</sec>
<sec sec-type="results" id="s3">
<title>Results</title>
<sec id="s3-1">
<title>Results of the Simulation Study</title>
<p>As discussed in Methods, pathwayMultiomics has two approaches for computing <italic>p-</italic>values, either by approximation using formula or by simulation. Our results showed the estimated parameters <italic>&#x3b1;</italic> and <italic>&#x3b2;</italic> for Beta distribution based on simulation are <italic>&#x3b1;</italic> &#x3d; 1.85 and <italic>&#x3b2;</italic> &#x3d; 1.9, which are very similar to the theoretical values of <italic>&#x3b1;</italic> &#x3d; 2 and <italic>&#x3b2;</italic> &#x3d; 2 used in the approximation approach. The results in <xref ref-type="sec" rid="s11">Supplementary Table&#x20;1</xref> showed that both the simulation and approximation approaches had Type-I error rates close to 5%. Therefore, we next compared AUCs for the pathwayMultiomics method in the approximation approach with the other four methods.</p>
<p>Among all methods, the pathwayMultiomics method performed best with the highest AUCs across all 20 simulation scenarios (<xref ref-type="fig" rid="F2">Figure&#x20;2</xref>, <xref ref-type="sec" rid="s11">Supplementary Table&#x20;2</xref>). The second-best performing method is mitch, for which ranking genes by <italic>p-</italic>values performed better than ranking genes by <italic>t</italic>-statistic in most simulation scenarios, except the ones with weak association signals (i.e.,&#x20;effect size &#x3d; 0.1). The iProFun method also performed well in the simulated pathways that included a high proportion (e.g., 80%) of genes with large association signals (e.g., effect size &#x3d; 0.5). On the other hand, the sparse mCCA and MFA methods lacked power, probably because these matrix factorization techniques lost information by requiring matched samples or genes across all platforms, and their unsupervised framework also ignored phenotype information. Because sparse mCCA lacked power even in the last simulation scenario with the strongest signal (80% genes in a true positive pathway are treated with an effect size of 0.5), we only included AUC for sparse mCCA in the last simulation scenario.</p>
</sec>
<sec id="s3-2">
<title>Case Study: Analysis of Multi-Omics Datasets in Alzheimer&#x2019;s Disease</title>
<p>We next applied the two methods that performed best in our simulation study, pathwayMultiomics and mitch, to analyze a collection of real multi-omics datasets in Alzheimer&#x2019;s disease, which included summary statistics for genetic variants and DNA methylation from two recent large-scale meta-analysis studies (<xref ref-type="bibr" rid="B19">Kunkle et&#x20;al., 2019</xref>; <xref ref-type="bibr" rid="B44">Zhang et&#x20;al., 2020</xref>), as well as a gene expression dataset measured on the prefrontal cortex of brain samples generated by the ROSMAP study (<xref ref-type="bibr" rid="B6">De Jager et&#x20;al., 2014</xref>; <xref ref-type="bibr" rid="B5">De Jager et&#x20;al., 2018</xref>). Note that because we did not have access to raw genotype data included in the meta-analysis, many of the tools that require raw omics data would not be applicable here. In contrast, pathwayMultiomics and mitch can be applied to analyze summary statistics obtained in meta-analyses. For comparison, we also included a third method, the commonly used Venn diagram method, which identifies pathways that are significant in multiple omics data&#x20;types.</p>
<p>We analyzed 2,833 canonical pathways (C2:CP collection) in MSigDB (<xref ref-type="bibr" rid="B36">Subramanian et&#x20;al., 2005</xref>) that included between 3 and 200 genes. Analyzing each omics data type individually, at a 5% false discovery rate (FDR), we identified 66, 2, and 666 pathways associated with AD in SNP, DNAm, and gene expression data, respectively (<xref ref-type="sec" rid="s11">Supplementary Table&#x20;3&#x2013;5</xref>). There was little agreement between the FDR-significant pathways identified in different omics datasets (<xref ref-type="fig" rid="F3">Figure&#x20;3</xref>). A possible reason could be the lack of power in single omics studies for Alzheimer&#x2019;s disease, which has relatively weaker association signals than other complex diseases such as cancers. Among the top pathways, only seven pathways reached 5% FDR in more than one omics data type. These seven pathways, which reached 5% FDR in both GWAS and RNASeq analysis, are MHC Class II antigen presentation, TCR signaling, factors involved in megakaryocyte development and production, Rig I like receptor signaling pathway, DDX58 IFIH1 mediated induction of interferon alpha-beta, and regulation of toll-like receptor signaling pathway, all of which are involved in inflammatory responses, highlighting the importance of immune processes in AD (<xref ref-type="bibr" rid="B4">Cunningham, 2013</xref>; <xref ref-type="bibr" rid="B11">Heneka et&#x20;al., 2015</xref>).</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Venn diagram of pathway analyses results for individual omics data types. A total of 666, 2 and 66 significant pathways reached 5% false discovery rate in the analyses of GWAS, DNA methylation (DNAm) and RNASeq data pathway analyses, respectively. Very few pathways (n &#x3d; 7) were significantly associated with AD in more than one omics data types. The mixed models approach, MissMethyl, and fgsea, which were specifically designed for pathway analyses of genetic variants, DNAm, and gene expression data were used to analyze a total of 2,833 canonical pathways in MsigDB database.</p>
</caption>
<graphic xlink:href="fgene-12-783713-g003.tif"/>
</fig>
<p>At 5% FDR, pathwayMultiomics identified 74 significant pathways (<xref ref-type="sec" rid="s11">Supplementary Table&#x20;6</xref>). Note that for this analysis example, the MiniMax statistics in pathwayMultiomics is the minimum of all maximums in pairs of <italic>p-</italic>values from individual omics, that is min{ max (SNP pathway <italic>p-</italic>value, DNAm pathway <italic>p-</italic>value), max (SNP pathway <italic>p-</italic>value, RNAseq pathway <italic>p-</italic>value), max (DNAm pathway <italic>p-</italic>value, RNAseq pathway <italic>p-</italic>value) }. For these significant pathways, we next examined which two omics data types contributed to the MiniMax statistics. Among the 74 pathways, the significance of the pathwayMultiomics <italic>p-</italic>value (for MiniMax statistic) was driven by pathway <italic>p-</italic>values for DNAm and RNA in the majority of pathways (n &#x3d; 40, 54%), followed by pathway <italic>p-</italic>values for SNP and RNA (n &#x3d; 25, 34%), recapitulating the prominent gene regulatory role of DNAm in AD (<xref ref-type="bibr" rid="B17">Klein et&#x20;al., 2016</xref>). In contrast, pathwayMultiomics <italic>p-</italic>values were driven by <italic>p-</italic>values for SNP and DNAm in only 9 (12%) out of the 74 significant pathways, consistent with the relatively independent contributions of genetic variants and DNA methylations in influencing AD susceptibility (<xref ref-type="bibr" rid="B3">Chibnik et&#x20;al., 2015</xref>; <xref ref-type="bibr" rid="B17">Klein et&#x20;al., 2016</xref>). The majority of the top 10 most significant pathways identified by pathwayMultiomics (<xref ref-type="table" rid="T2">Table&#x20;2</xref>) involved signaling pathways activated by the immune system in responses to amyloid-&#x3b2; induced neurotoxicity in AD brains, such as the activation of chemokines (<xref ref-type="bibr" rid="B14">Jorda et&#x20;al., 2020</xref>), toll-like receptors (<xref ref-type="bibr" rid="B21">Landreth and Reed-Geaghan, 2009</xref>), T&#x20;cell receptors (<xref ref-type="bibr" rid="B9">Gate et&#x20;al., 2020</xref>), PDGFR-beta receptors (<xref ref-type="bibr" rid="B25">Liu H. et&#x20;al., 2018</xref>), and CXCR4 receptors (<xref ref-type="bibr" rid="B23">Li and Wang, 2017</xref>). Notably, seven out of these top 10 pathways did not reach 5% FDR in more than one type of omics in the analysis of individual omics data types (<xref ref-type="fig" rid="F3">Figure&#x20;3</xref>), so these pathways would have been missed by the conventional Venn diagram method.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Top 10 most significant pathways identified by pathwayMultiomics in the analysis of multiomics Alzheimer&#x2019;s datasets.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th colspan="2" align="left">&#xa0;</th>
<th colspan="3" align="center">Single omics <italic>p</italic>-values</th>
<th colspan="3" align="center">Single omics FDRs</th>
<th colspan="4" align="center">pathwayMultiomics</th>
</tr>
<tr>
<th align="left">Pathway</th>
<th align="left">Size</th>
<th align="center">SNP</th>
<th align="center">DNAm</th>
<th align="center">RNASeq</th>
<th align="center">SNP</th>
<th align="center">DNAm</th>
<th align="center">RNASeq</th>
<th align="center">MiniMax</th>
<th align="center">
<italic>p</italic>-value</th>
<th align="center">FDR</th>
<th align="center">Contributing Omics</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">PID_PDGFRB_PATHWAY</td>
<td align="left">126</td>
<td align="center">6.99E-01</td>
<td align="center">1.45E-04</td>
<td align="center">1.67E-04</td>
<td align="center">9.99E-01</td>
<td align="center">1.37E-01</td>
<td align="center">3.30E-03</td>
<td align="center">1.67E-04</td>
<td align="center">8.33E-08</td>
<td align="center">2.36E-04</td>
<td align="left">DNAm, RNA</td>
</tr>
<tr>
<td align="left">WP_CHEMOKINE_SIGNALING_PATHWAY</td>
<td align="left">155</td>
<td align="center">8.19E-01</td>
<td align="center">3.17E-04</td>
<td align="center">1.94E-05</td>
<td align="center">9.99E-01</td>
<td align="center">1.39E-01</td>
<td align="center">9.00E-04</td>
<td align="center">3.17E-04</td>
<td align="center">3.02E-07</td>
<td align="center">3.28E-04</td>
<td align="left">DNAm, RNA</td>
</tr>
<tr>
<td align="left">KEGG_HEMATOPOIETIC_CELL_LINEAGE</td>
<td align="left">80</td>
<td align="center">3.67E-36</td>
<td align="center">3.40E-04</td>
<td align="center">7.61E-01</td>
<td align="center">3.24E-34</td>
<td align="center">1.39E-01</td>
<td align="center">8.11E-01</td>
<td align="center">3.40E-04</td>
<td align="center">3.48E-07</td>
<td align="center">3.28E-04</td>
<td align="left">SNP, DNAm</td>
</tr>
<tr>
<td align="left">PID_TCR_PATHWAY</td>
<td align="left">58</td>
<td align="center">4.48E-04</td>
<td align="center">2.75E-02</td>
<td align="center">4.90E-04</td>
<td align="center">2.04E-02</td>
<td align="center">6.43E-01</td>
<td align="center">6.55E-03</td>
<td align="center">4.90E-04</td>
<td align="center">7.20E-07</td>
<td align="center">5.10E-04</td>
<td align="left">SNP, RNA</td>
</tr>
<tr>
<td align="left">WP_REGULATION_OF_TOLLLIKE_RECEPTOR_SIGNALING_PATHWAY</td>
<td align="left">128</td>
<td align="center">3.76E-05</td>
<td align="center">3.32E-02</td>
<td align="center">6.55E-04</td>
<td align="center">2.08E-03</td>
<td align="center">6.68E-01</td>
<td align="center">7.70E-03</td>
<td align="center">6.55E-04</td>
<td align="center">1.29E-06</td>
<td align="center">5.69E-04</td>
<td align="left">SNP, RNA</td>
</tr>
<tr>
<td align="left">KEGG_CHEMOKINE_SIGNALING_PATHWAY</td>
<td align="left">172</td>
<td align="center">7.90E-01</td>
<td align="center">6.72E-04</td>
<td align="center">2.98E-04</td>
<td align="center">9.99E-01</td>
<td align="center">1.47E-01</td>
<td align="center">4.70E-03</td>
<td align="center">6.72E-04</td>
<td align="center">1.35E-06</td>
<td align="center">5.69E-04</td>
<td align="left">DNAm, RNA</td>
</tr>
<tr>
<td align="left">PID_KIT_PATHWAY</td>
<td align="left">52</td>
<td align="center">2.55E-01</td>
<td align="center">6.84E-04</td>
<td align="center">1.10E-04</td>
<td align="center">9.99E-01</td>
<td align="center">1.47E-01</td>
<td align="center">2.69E-03</td>
<td align="center">6.84E-04</td>
<td align="center">1.40E-06</td>
<td align="center">5.69E-04</td>
<td align="left">DNAm, RNA</td>
</tr>
<tr>
<td align="left">WP_KIT_RECEPTOR_SIGNALING_PATHWAY</td>
<td align="left">57</td>
<td align="center">3.05E-02</td>
<td align="center">3.68E-04</td>
<td align="center">1.41E-03</td>
<td align="center">5.95E-01</td>
<td align="center">1.39E-01</td>
<td align="center">1.26E-02</td>
<td align="center">1.41E-03</td>
<td align="center">5.94E-06</td>
<td align="center">2.10E-03</td>
<td align="left">DNAm, RNA</td>
</tr>
<tr>
<td align="left">PID_CXCR4_PATHWAY</td>
<td align="left">98</td>
<td align="center">4.87E-04</td>
<td align="center">1.50E-03</td>
<td align="center">7.29E-02</td>
<td align="center">2.19E-02</td>
<td align="center">2.36E-01</td>
<td align="center">1.56E-01</td>
<td align="center">1.50E-03</td>
<td align="center">6.76E-06</td>
<td align="center">2.13E-03</td>
<td align="left">SNP, DNAm</td>
</tr>
<tr>
<td align="left">REACTOME_TCR_SIGNALING</td>
<td align="left">112</td>
<td align="center">6.06E-52</td>
<td align="center">2.07E-01</td>
<td align="center">2.16E-03</td>
<td align="center">6.11E-50</td>
<td align="center">9.46E-01</td>
<td align="center">1.68E-02</td>
<td align="center">2.16E-03</td>
<td align="center">1.40E-05</td>
<td align="center">3.98E-03</td>
<td align="left">SNP, RNA</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>At 5% FDR, mitch identified 237 pathways (<xref ref-type="sec" rid="s11">Supplementary Table&#x20;7</xref>). The most significant pathway pointed to systemic lupus erythematosus (SLE), an autoimmune disease in which the immune system attacks the body&#x2019;s own tissues. A recent meta-analysis found that patients with SLE have a significantly higher risk for cognitive impairment (<xref ref-type="bibr" rid="B47">Zhao et&#x20;al., 2018</xref>). Other top pathways (<xref ref-type="table" rid="T3">Table&#x20;3</xref>) highlighted key biological processes regulated by proteins previously shown to be important in AD, such as PRC2 (<xref ref-type="bibr" rid="B44">Zhang et&#x20;al., 2020</xref>), which regulates neuronal lineage specification, proliferation, and differentiation (<xref ref-type="bibr" rid="B26">Liu P.-P. et&#x20;al., 2018</xref>); PKN1, which was shown to have a neuroprotective role (<xref ref-type="bibr" rid="B37">Thauerer et&#x20;al., 2014</xref>); and histone deacetylases (HDACS), which maintains the histone acetylation homeostasis and play important roles in the process of neuronal differentiation, neurite outgrowth and neuroprotection (<xref ref-type="bibr" rid="B34">Shukla and Tekwani, 2020</xref>).</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Top 10 most significant pathways identified by the mitch method in the analysis of Alzheimer&#x2019;s disease multi-omics datasets.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Pathway</th>
<th align="center">Size</th>
<th align="center">
<italic>p-</italic>value</th>
<th align="center">FDR</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">KEGG_SYSTEMIC_LUPUS_ERYTHEMATOSUS</td>
<td align="char" char=".">128</td>
<td align="center">7.49E-19</td>
<td align="center">2.11E-15</td>
</tr>
<tr>
<td align="left">REACTOME_SIRT1_NEGATIVELY_REGULATES_RRNA_EXPRESSION</td>
<td align="char" char=".">65</td>
<td align="center">3.16E-15</td>
<td align="center">4.46E-12</td>
</tr>
<tr>
<td align="left">REACTOME_DNA_METHYLATION</td>
<td align="char" char=".">62</td>
<td align="center">1.21E-13</td>
<td align="center">1.14E-10</td>
</tr>
<tr>
<td align="left">REACTOME_ACTIVATED_PKN1_STIMULATES_TRANSCRIPTION_OF_AR_ANDROGEN_RECEPTOR_REGULATED_GENES_KLK2_AND_KLK3</td>
<td align="char" char=".">64</td>
<td align="center">2.40E-13</td>
<td align="center">1.69E-10</td>
</tr>
<tr>
<td align="left">REACTOME_HDACS_DEACETYLATE_HISTONES</td>
<td align="char" char=".">91</td>
<td align="center">6.03E-13</td>
<td align="center">3.40E-10</td>
</tr>
<tr>
<td align="left">REACTOME_CONDENSATION_OF_PROPHASE_CHROMOSOMES</td>
<td align="char" char=".">71</td>
<td align="center">5.28E-12</td>
<td align="center">2.48E-09</td>
</tr>
<tr>
<td align="left">REACTOME_HDMS_DEMETHYLATE_HISTONES</td>
<td align="char" char=".">45</td>
<td align="center">5.17E-11</td>
<td align="center">2.08E-08</td>
</tr>
<tr>
<td align="left">REACTOME_FORMATION_OF_THE_CORNIFIED_ENVELOPE</td>
<td align="char" char=".">129</td>
<td align="center">4.21E-10</td>
<td align="center">1.48E-07</td>
</tr>
<tr>
<td align="left">REACTOME_PRC2_METHYLATES_HISTONES_AND_DNA</td>
<td align="char" char=".">70</td>
<td align="center">5.40E-10</td>
<td align="center">1.69E-07</td>
</tr>
<tr>
<td align="left">REACTOME_TRANSCRIPTIONAL_REGULATION_OF_GRANULOPOIESIS</td>
<td align="char" char=".">88</td>
<td align="center">6.79E-10</td>
<td align="center">1.91E-07</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Between the three methods (pathwayMultiomics, mitch, and Venn diagram), there was only modest overlap (<xref ref-type="fig" rid="F4">Figure&#x20;4</xref>). A total of 32 pathways (11%) reached 5% FDR by both pathwayMultiomics and mitch methods. PathwayMultiomics identified all seven significant pathways that were significant in more than one type of omics data type based on the Venn diagram method. There was no overlap between significant pathways by mitch and Venn diagram method, except for one pathway (T&#x20;cell Receptor pathway), which was identified by all three methods.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>A comparison of FDR significant pathways identified by pathwayMultiomics, mitch, and Venn diagram analyses. At 5% FDR, pathwayMultiomics and mitch identified 74 and 237 pathways, respectively. The Venn diagram method identified 7 pathways with 5% FDR in more than one type of omics data type. There was only modest overlap between the three methods. A total of 32 pathways (11%) were significant in both pathwayMultiomics and mitch methods. PathwayMultiomics identified all the significant pathways using the Venn diagram method. There was no overlap between significant pathways by mitch and Venn diagram, except for one pathway (T&#x20;cell Receptor pathway), which was identified by all three methods.</p>
</caption>
<graphic xlink:href="fgene-12-783713-g004.tif"/>
</fig>
</sec>
</sec>
<sec sec-type="discussion" id="s4">
<title>Discussion</title>
<p>To identify pathways dysregulated in multiple types of omics datasets, we developed the pathwayMultiomics R package. PathwayMultiomics is flexible and only requires pathway <italic>p-</italic>values for individual omics data types as input, thus making it possible to take advantage of pathway analysis tools that are specially designed for each omics data type. In addition, pathwayMultiomics is computationally efficient, does not require matched samples from multi-omics data, and is applicable in&#x20;situations when raw omics data are not available, such as when aggregating summary statistics from meta-analyses related to the same disease. PathwayMultiomics is also informative; the individual omics data type that contributed to pathwayMultiomics significance can be used to distinguish pathways with potentially different underlying regulatory mechanisms, such as the pathways for which gene expressions are regulated by DNA methylation versus pathways for which gene expressions are mainly regulated by genetic variants.</p>
<p>We performed a comprehensive simulation study to assess the statistical properties of our method. To emulate correlation patterns in real omics datasets, we generated simulation datasets using real TCGA multi-omics datasets as input. We showed that pathwayMultiomics significantly outperforms currently available multi-omics methods with improved power and well-controlled false-positive rates. A challenge with analyzing multi-omics datasets is that many of the samples with data recorded for one molecular type did not have matching data from other data types. Therefore, methods that require matched samples across all data types (e.g., mSCCA) would only analyze a subset of the samples, which would result in reduced statistical power. Also, often only a subset of genes is measured by multiple omics platforms. Therefore, methods that require the same set of genes measured on all omics data types (e.g., MFA) may also exclude important biological signals, leading to reduced power. Finally, unsupervised methods (e.g., NMF, sCCA, and iProFun) might also lose power because they do not leverage information in the phenotypes. In contrast, pathwayMultiomics gains power by leveraging information in all samples (including the un-matched samples), and all features (e.g., genes) mapped to the pathways, as well as phenotype information along with multi-omics&#x20;data.</p>
<p>To further assess the performance of pathwayMultiomics on real datasets, we also compared it with two alternative approaches using the Venn diagram and mitch. When multiple types of omics data are available, a commonly used strategy is to test for marginal associations between each type of omics data with phenotype first, and then use Venn diagram to intersect significant pathways or genes that overlap in different omics data types. Although a good visualization tool, Venn diagrams do not provide prioritization or any statistical assessment for pathways. In addition, it might be overly stringent because when several types of omics data are considered, often few (if any) pathways pass the threshold of statistical significance in all omics data types. In contrast, pathwayMultiomics provides prioritization and statistical assessment for pathways with moderate to strong association signals in multiple omics data types. In our analysis of multi-omics AD datasets, at 5% FDR, pathwayMultiomics identified 67 pathways in addition to the seven FDR-significant pathways in more than one type of omics data as identified by the Venn diagram method. The discrepancy in multi-omics analysis results by pathwayMultiomics and mitch is not unexpected. In addition to the differences in underlying algorithms, an important reason might also be the different hypotheses these methods test. While mitch tests the competitive null hypothesis that the genes in a pathway show the same magnitude of associations with the disease phenotype compared with genes in the rest of the genome, pathwayMultiomics tests the self-contained null hypothesis that the genes in a pathway are not associated with the disease phenotype (<xref ref-type="bibr" rid="B38">Tian et&#x20;al., 2005</xref>). Therefore, mitch and pathwayMultiomics analysis complement each other in the analysis of multi-omics datasets. PathwayMultiomics is available as an R package and can be accessed at <ext-link ext-link-type="uri" xlink:href="https://github.com/TransBioInfoLab/pathwayMultiomics">https://github.com/TransBioInfoLab/pathwayMultiomics</ext-link>.</p>
</sec>
<sec sec-type="conclusions" id="s5">
<title>Conclusions</title>
<p>In summary, we have presented the pathwayMultiomics method, which can be used to analyze multi-omics data with any type of outcome variables (e.g., categorical, continuous, or survival phenotypes). We have shown that pathwayMultiomics significantly outperforms currently available multi-omics methods with improved power and well-controlled false-positive rates. In addition, we also analyzed multi-omics datasets in Alzheimer&#x2019;s disease to show that pathwayMultiomics was able to recover known biology, as well as nominate novel biologically meaningful pathways. We expect pathwayMultiomics to be a useful tool for integrative analysis of multiple types of omics&#x20;data.</p>
</sec>
</body>
<back>
<sec id="s6">
<title>Data Availability Statement</title>
<p>The TCGA cancer datasets can be accessed from the LinkedOmics repository <ext-link ext-link-type="uri" xlink:href="http://linkedomics.org/login.php">http://linkedomics.org/login.php</ext-link>, the Alzheimer&#x2019;s GWAS summary statistics can be accessed from <ext-link ext-link-type="uri" xlink:href="https://www.niagads.org/igap-rv-summary-stats-kunkle-p-value-data">https://www.niagads.org/igap-rv-summary-stats-kunkle-p-value-data</ext-link> (file &#x201c;Kunkle_et&#x20;al._Stage1_results.txt&#x201d;), the ROSMAP RNASeq dataset can be accessed from AMP-AD (accession: syn3388564). The pathwayMultiomics software can be accessed at <ext-link ext-link-type="uri" xlink:href="https://github.com/TransBioInfoLab/pathwayMultiomics">https://github.com/TransBioInfoLab/pathwayMultiomics</ext-link> The scripts for the analysis performed in this study can be accessed at&#x20;<ext-link ext-link-type="uri" xlink:href="https://github.com/TransBioInfoLab/pathwayMultiomics_manuscript_supplement">https://github.com/TransBioInfoLab/pathwayMultiomics_manuscript_supplement</ext-link> The benchmark dataset used in the simulation study is available at <ext-link ext-link-type="uri" xlink:href="https://zenodo.org/record/5683002">https://zenodo.org/record/5683002&#x23;.YZF5SGDMKUk</ext-link>.</p>
</sec>
<sec id="s7">
<title>Author Contributions</title>
<p>GO, LW, XC, AC, and TS designed the computational analysis. GO, AC, TS, and LW analysed the data. GO, LW, XC, and AC contributed to the interpretation of the results. GO, LW wrote the paper, and all authors participated in the review and revision of the manuscript. LW conceived the original idea and supervised the project.</p>
</sec>
<sec id="s8">
<title>Funding</title>
<p>This work was supported by the National Institutes of Health (R01CA158472 (XC), R01 CA200987 (XC), P30CA240139 (XC), R01AG061127 (LW), R01AG062634 (LW), and R21AG060459 (LW)). The ROSMAP study data were provided by the Rush Alzheimer&#x2019;s Disease Center, Rush University Medical Center, Chicago. Data collection was supported through funding by NIA grants P30AG10161, R01AG15819, R01AG17917, R01AG30146, R01AG36836, U01AG32984, U01AG46152, the Illinois Department of Public Health, and the Translational Genomics Research Institute.</p>
</sec>
<sec sec-type="COI-statement" id="s9">
<title>Conflict of Interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s10">
<title>Publisher&#x2019;s Note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ack>
<p>The authors would like to thank Dr. Bing Zhang for helpful discussions.</p>
</ack>
<sec id="s11">
<title>Supplementary Material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fgene.2021.783713/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fgene.2021.783713/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="DataSheet1.xlsx" id="SM1" mimetype="application/xlsx" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Benjamini</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Hochberg</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>1995</year>). <article-title>Controlling the False Discovery Rate: A Practical and Powerful Approach to Multiple Testing</article-title>. <source>J.&#x20;R. Stat. Soc. Ser. B (Methodological)</source> <volume>57</volume>, <fpage>289</fpage>&#x2013;<lpage>300</lpage>. <pub-id pub-id-type="doi">10.1111/j.2517-6161.1995.tb02031.x</pub-id> </citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Braak</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Braak</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>1995</year>). <article-title>Staging of Alzheimer&#x27;s Disease-Related Neurofibrillary Changes</article-title>. <source>Neurobiol. Aging</source> <volume>16</volume>, <fpage>271</fpage>&#x2013;<lpage>278</lpage>. <pub-id pub-id-type="doi">10.1016/0197-4580(95)00021-6</pub-id> </citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chibnik</surname>
<given-names>L. B.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Eaton</surname>
<given-names>M. L.</given-names>
</name>
<name>
<surname>Srivastava</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Schneider</surname>
<given-names>J.&#x20;A.</given-names>
</name>
<name>
<surname>Kellis</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Alzheimer&#x27;s Loci: Epigenetic Associations and Interaction with Genetic Factors</article-title>. <source>Ann. Clin. Transl Neurol.</source> <volume>2</volume>, <fpage>636</fpage>&#x2013;<lpage>647</lpage>. <pub-id pub-id-type="doi">10.1002/acn3.201</pub-id> </citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cunningham</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Microglia and Neurodegeneration: the Role of Systemic Inflammation</article-title>. <source>Glia</source> <volume>61</volume>, <fpage>71</fpage>&#x2013;<lpage>90</lpage>. <pub-id pub-id-type="doi">10.1002/glia.22350</pub-id> </citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>De Jager</surname>
<given-names>P. L.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Mccabe</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Vardarajan</surname>
<given-names>B. N.</given-names>
</name>
<name>
<surname>Felsky</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>A Multi-Omic Atlas of the Human Frontal Cortex for Aging and Alzheimer&#x27;s Disease Research</article-title>. <source>Sci. Data</source> <volume>5</volume>, <fpage>180142</fpage>. <pub-id pub-id-type="doi">10.1038/sdata.2018.142</pub-id> </citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>De Jager</surname>
<given-names>P. L.</given-names>
</name>
<name>
<surname>Srivastava</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Lunnon</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Burgess</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Schalkwyk</surname>
<given-names>L. C.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>L.</given-names>
</name>
<etal/>
</person-group> (<year>2014</year>). <article-title>Alzheimer&#x27;s Disease: Early Alterations in Brain DNA Methylation at ANK1, BIN1, RHBDF2 and Other Loci</article-title>. <source>Nat. Neurosci.</source> <volume>17</volume>, <fpage>1156</fpage>&#x2013;<lpage>1163</lpage>. <pub-id pub-id-type="doi">10.1038/nn.3786</pub-id> </citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dray</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Dufour</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>The Ade4 Package: Implementing the Duality Diagram for Ecologists</article-title>. <source>J.&#x20;Stat. Softw.</source> <volume>22</volume>, <fpage>1</fpage>&#x2013;<lpage>20</lpage>. <pub-id pub-id-type="doi">10.18637/jss.v022.i04</pub-id> </citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gao</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Foat</surname>
<given-names>B. C.</given-names>
</name>
<name>
<surname>Bussemaker</surname>
<given-names>H. J.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>Defining Transcriptional Networks through Integrative Modeling of mRNA Expression and Transcription Factor Binding Data</article-title>. <source>BMC Bioinformatics</source> <volume>5</volume>, <fpage>31</fpage>. <pub-id pub-id-type="doi">10.1186/1471-2105-5-31</pub-id> </citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gate</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Saligrama</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Leventhal</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>A. C.</given-names>
</name>
<name>
<surname>Unger</surname>
<given-names>M. S.</given-names>
</name>
<name>
<surname>Middeldorp</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Clonally Expanded CD8 T&#x20;Cells Patrol the Cerebrospinal Fluid in Alzheimer&#x27;s Disease</article-title>. <source>Nature</source> <volume>577</volume>, <fpage>399</fpage>&#x2013;<lpage>404</lpage>. <pub-id pub-id-type="doi">10.1038/s41586-019-1895-7</pub-id> </citation>
</ref>
<ref id="B10">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Gentle</surname>
<given-names>J.&#x20;E.</given-names>
</name>
</person-group> (<year>2009</year>). <source>Computational Statistics</source>. <publisher-loc>Berlin/Heidelberg, Germany</publisher-loc>: <publisher-name>Springer</publisher-name>. </citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Heneka</surname>
<given-names>M. T.</given-names>
</name>
<name>
<surname>Carson</surname>
<given-names>M. J.</given-names>
</name>
<name>
<surname>Khoury</surname>
<given-names>J.&#x20;E.</given-names>
</name>
<name>
<surname>Landreth</surname>
<given-names>G. E.</given-names>
</name>
<name>
<surname>Brosseron</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Feinstein</surname>
<given-names>D. L.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Neuroinflammation in Alzheimer&#x27;s Disease</article-title>. <source>Lancet Neurol.</source> <volume>14</volume>, <fpage>388</fpage>&#x2013;<lpage>405</lpage>. <pub-id pub-id-type="doi">10.1016/s1474-4422(15)70016-5</pub-id> </citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Chaudhary</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Garmire</surname>
<given-names>L. X.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>More Is Better: Recent Progress in Multi-Omics Data Integration Methods</article-title>. <source>Front. Genet.</source> <volume>8</volume>, <fpage>84</fpage>. <pub-id pub-id-type="doi">10.3389/fgene.2017.00084</pub-id> </citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jones</surname>
<given-names>M. C.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Kumaraswamy&#x27;s Distribution: A Beta-type Distribution with Some Tractability Advantages</article-title>. <source>Stat. Methodol.</source> <volume>6</volume>, <fpage>70</fpage>&#x2013;<lpage>81</lpage>. <pub-id pub-id-type="doi">10.1016/j.stamet.2008.04.001</pub-id> </citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jorda</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Campos-Campos</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Iradi</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Aldasoro</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Aldasoro</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Vila</surname>
<given-names>J.&#x20;M.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>The Role of Chemokines in Alzheimer&#x27;s Disease</article-title>. <source>Emiddt</source> <volume>20</volume>, <fpage>1383</fpage>&#x2013;<lpage>1390</lpage>. <pub-id pub-id-type="doi">10.2174/1871530320666200131110744</pub-id> </citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kanehisa</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Goto</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Sato</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Furumichi</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Tanabe</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>KEGG for Integration and Interpretation of Large-Scale Molecular Data Sets</article-title>. <source>Nucleic Acids Res.</source> <volume>40</volume>, <fpage>D109</fpage>&#x2013;<lpage>D114</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkr988</pub-id> </citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kaspi</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Ziemann</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Mitch: Multi-Contrast Pathway Enrichment for Multi-Omics and Single-Cell Profiling Data</article-title>. <source>BMC Genomics</source> <volume>21</volume>, <fpage>447</fpage>. <pub-id pub-id-type="doi">10.1186/s12864-020-06856-9</pub-id> </citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Klein</surname>
<given-names>H.-U.</given-names>
</name>
<name>
<surname>Bennett</surname>
<given-names>D. A.</given-names>
</name>
<name>
<surname>De Jager</surname>
<given-names>P. L.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>The Epigenome in Alzheimer&#x27;s Disease: Current State and Approaches for a New Path to Gene Discovery and Understanding Disease Mechanism</article-title>. <source>Acta Neuropathol.</source> <volume>132</volume>, <fpage>503</fpage>&#x2013;<lpage>514</lpage>. <pub-id pub-id-type="doi">10.1007/s00401-016-1612-7</pub-id> </citation>
</ref>
<ref id="B18">
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>Korotkevich</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Sukhov</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Budin</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Shpak</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Artyomov</surname>
<given-names>M. N.</given-names>
</name>
<name>
<surname>Sergushichev</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Fast Gene Set Enrichment Analysis. <italic>bioRxiv</italic>
</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://www.biorxiv.org/content/10.1101/060012v060013.full.pdf">https://www.biorxiv.org/content/10.1101/060012v060013.full.pdf</ext-link>
</comment>. </citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kunkle</surname>
<given-names>B. W.</given-names>
</name>
<name>
<surname>Grenier-Boley</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Grenier-Boley</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Sims</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Bis</surname>
<given-names>J.&#x20;C.</given-names>
</name>
<name>
<surname>Damotte</surname>
<given-names>V.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Genetic Meta-Analysis of Diagnosed Alzheimer&#x27;s Disease Identifies New Risk Loci and Implicates A&#x3b2;, Tau, Immunity and Lipid Processing</article-title>. <source>Nat. Genet.</source> <volume>51</volume>, <fpage>414</fpage>&#x2013;<lpage>430</lpage>. <pub-id pub-id-type="doi">10.1038/s41588-019-0358-2</pub-id> </citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kutalik</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Beckmann</surname>
<given-names>J.&#x20;S.</given-names>
</name>
<name>
<surname>Bergmann</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>A Modular Approach for Integrative Analysis of Large-Scale Gene-Expression and Drug-Response Data</article-title>. <source>Nat. Biotechnol.</source> <volume>26</volume>, <fpage>531</fpage>&#x2013;<lpage>539</lpage>. <pub-id pub-id-type="doi">10.1038/nbt1397</pub-id> </citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Landreth</surname>
<given-names>G. E.</given-names>
</name>
<name>
<surname>Reed-Geaghan</surname>
<given-names>E. G.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Toll-like Receptors in Alzheimer&#x27;s Disease</article-title>. <source>Curr. Top. Microbiol. Immunol.</source> <volume>336</volume>, <fpage>137</fpage>&#x2013;<lpage>153</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-642-00549-7_8</pub-id> </citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>L&#xea; Cao</surname>
<given-names>K.-A.</given-names>
</name>
<name>
<surname>Martin</surname>
<given-names>P. G.</given-names>
</name>
<name>
<surname>Robert-Grani&#xe9;</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Besse</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Sparse Canonical Methods for Biological Data Integration: Application to a Cross-Platform Study</article-title>. <source>BMC Bioinformatics</source> <volume>10</volume>, <fpage>34</fpage>. <pub-id pub-id-type="doi">10.1186/1471-2105-10-34</pub-id> </citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>A Focus on CXCR4 in Alzheimer&#x27;s Disease</article-title>. <source>Brain Circ.</source> <volume>3</volume>, <fpage>199</fpage>&#x2013;<lpage>203</lpage>. <pub-id pub-id-type="doi">10.4103/bc.bc_13_17</pub-id> </citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lin</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Calhoun</surname>
<given-names>V. D.</given-names>
</name>
<name>
<surname>Deng</surname>
<given-names>H.-W.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.-P.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Group Sparse Canonical Correlation Analysis for Genomic Data Integration</article-title>. <source>BMC Bioinformatics</source> <volume>14</volume>, <fpage>245</fpage>. <pub-id pub-id-type="doi">10.1186/1471-2105-14-245</pub-id> </citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Saffi</surname>
<given-names>G. T.</given-names>
</name>
<name>
<surname>Vasefi</surname>
<given-names>M. S.</given-names>
</name>
<name>
<surname>Choi</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Kruk</surname>
<given-names>J.&#x20;S.</given-names>
</name>
<name>
<surname>Ahmed</surname>
<given-names>N.</given-names>
</name>
<etal/>
</person-group> (<year>2018a</year>). <article-title>Amyloid-&#x3b2; Inhibits PDGF&#x3b2; Receptor Activation and Prevents PDGF-BBInduced Neuroprotection</article-title>. <source>Car</source> <volume>15</volume>, <fpage>618</fpage>&#x2013;<lpage>627</lpage>. <pub-id pub-id-type="doi">10.2174/1567205015666180110110321</pub-id> </citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>P.-P.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>Y.-J.</given-names>
</name>
<name>
<surname>Teng</surname>
<given-names>Z.-Q.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>C.-M.</given-names>
</name>
</person-group> (<year>2018b</year>). <article-title>Polycomb Repressive Complex 2: Emerging Roles in the Central Nervous System</article-title>. <source>Neuroscientist</source> <volume>24</volume>, <fpage>208</fpage>&#x2013;<lpage>220</lpage>. <pub-id pub-id-type="doi">10.1177/1073858417747839</pub-id> </citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Meng</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Kuster</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Culhane</surname>
<given-names>A. C.</given-names>
</name>
<name>
<surname>Gholami</surname>
<given-names>A. M.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>A Multivariate Approach to the Integration of Multi-Omics Datasets</article-title>. <source>BMC Bioinformatics</source> <volume>15</volume>, <fpage>162</fpage>. <pub-id pub-id-type="doi">10.1186/1471-2105-15-162</pub-id> </citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Meng</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Zeleznik</surname>
<given-names>O. A.</given-names>
</name>
<name>
<surname>Thallinger</surname>
<given-names>G. G.</given-names>
</name>
<name>
<surname>Kuster</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Gholami</surname>
<given-names>A. M.</given-names>
</name>
<name>
<surname>Culhane</surname>
<given-names>A. C.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Dimension Reduction Techniques for the Integrative Analysis of Multi-Omics Data</article-title>. <source>Brief Bioinform</source> <volume>17</volume>, <fpage>628</fpage>&#x2013;<lpage>641</lpage>. <pub-id pub-id-type="doi">10.1093/bib/bbv108</pub-id> </citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Odom</surname>
<given-names>G. J.</given-names>
</name>
<name>
<surname>Ban</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Colaprico</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Silva</surname>
<given-names>T. C.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>X.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>PathwayPCA: an R/Bioconductor Package for Pathway Based Integrative Analysis of Multi-Omics Data</article-title>. <source>Proteomics</source> <volume>20</volume>, <fpage>e1900409</fpage>. <pub-id pub-id-type="doi">10.1002/pmic.201900409</pub-id> </citation>
</ref>
<ref id="B30">
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>Oliveros</surname>
<given-names>J.&#x20;C.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>Venny. An Interactive Tool for Comparing Lists with Venn&#x27;s Diagrams</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://bioinfogp.cnb.csic.es/tools/venny/index.html">https://bioinfogp.cnb.csic.es/tools/venny/index.html</ext-link>
</comment>. </citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Parkhomenko</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Tritchler</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Beyene</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Sparse Canonical Correlation Analysis with Application to Genomic Data Integration</article-title>. <source>Stat. Appl. Genet. Mol. Biol.</source> <volume>8</volume>&#x2013;<lpage>1</lpage>. <pub-id pub-id-type="doi">10.2202/1544-6115.1406</pub-id> </citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Phipson</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Maksimovic</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Oshlack</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>missMethyl: an R Package for Analyzing Data from Illumina&#x27;s HumanMethylation450 Platform</article-title>. <source>Bioinformatics</source> <volume>32</volume>, <fpage>286</fpage>&#x2013;<lpage>288</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btv560</pub-id> </citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pucher</surname>
<given-names>B. M.</given-names>
</name>
<name>
<surname>Zeleznik</surname>
<given-names>O. A.</given-names>
</name>
<name>
<surname>Thallinger</surname>
<given-names>G. G.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Comparison and Evaluation of Integrative Methods for the Analysis of Multilevel Omics Data: a Study Based on Simulated and Experimental Cancer Data</article-title>. <source>Brief Bioinform</source> <volume>20</volume>, <fpage>671</fpage>&#x2013;<lpage>681</lpage>. <pub-id pub-id-type="doi">10.1093/bib/bby027</pub-id> </citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shukla</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Tekwani</surname>
<given-names>B. L.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Histone Deacetylases Inhibitors in Neurodegenerative Diseases, Neuroprotection and Neuronal Differentiation</article-title>. <source>Front. Pharmacol.</source> <volume>11</volume>, <fpage>537</fpage>. <pub-id pub-id-type="doi">10.3389/fphar.2020.00537</pub-id> </citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Song</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Ji</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Gleason</surname>
<given-names>K. J.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Martignetti</surname>
<given-names>J.&#x20;A.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>L. S.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Insights into Impact of DNA Copy Number Alteration and Methylation on the Proteogenomic Landscape of Human Ovarian Cancer via a Multi-Omics Integrative Analysis</article-title>. <source>Mol. Cell Proteomics</source> <volume>18</volume>, <fpage>S52</fpage>&#x2013;<lpage>S65</lpage>. <pub-id pub-id-type="doi">10.1074/mcp.ra118.001220</pub-id> </citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Subramanian</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Tamayo</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Mootha</surname>
<given-names>V. K.</given-names>
</name>
<name>
<surname>Mukherjee</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Ebert</surname>
<given-names>B. L.</given-names>
</name>
<name>
<surname>Gillette</surname>
<given-names>M. A.</given-names>
</name>
<etal/>
</person-group> (<year>2005</year>). <article-title>Gene Set Enrichment Analysis: a Knowledge-Based Approach for Interpreting Genome-wide Expression Profiles</article-title>. <source>Proc. Natl. Acad. Sci.</source> <volume>102</volume>, <fpage>15545</fpage>&#x2013;<lpage>15550</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.0506580102</pub-id> </citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Thauerer</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Zur Nedden</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Baier-Bitterlich</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Protein Kinase C-Related Kinase (PKN/PRK). Potential Key-Role for PKN1 in Protection of Hypoxic Neurons</article-title>. <source>Cn</source> <volume>12</volume>, <fpage>213</fpage>&#x2013;<lpage>218</lpage>. <pub-id pub-id-type="doi">10.2174/1570159x11666131225000518</pub-id> </citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tian</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Greenberg</surname>
<given-names>S. A.</given-names>
</name>
<name>
<surname>Kong</surname>
<given-names>S. W.</given-names>
</name>
<name>
<surname>Altschuler</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Kohane</surname>
<given-names>I. S.</given-names>
</name>
<name>
<surname>Park</surname>
<given-names>P. J.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>Discovering Statistically Significant Pathways in Expression Profiling Studies</article-title>. <source>Proc. Natl. Acad. Sci.</source> <volume>102</volume>, <fpage>13544</fpage>&#x2013;<lpage>13549</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.0506577102</pub-id> </citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Van Iterson</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Van Zwet</surname>
<given-names>E. W.</given-names>
</name>
<name>
<surname>van Zwet</surname>
<given-names>E. W.</given-names>
</name>
<name>
<surname>Heijmans</surname>
<given-names>B. T.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Controlling Bias and Inflation in Epigenome- and Transcriptome-wide Association Studies Using the Empirical Null Distribution</article-title>. <source>Genome Biol.</source> <volume>18</volume>, <fpage>19</fpage>. <pub-id pub-id-type="doi">10.1186/s13059-016-1131-9</pub-id> </citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Vasaikar</surname>
<given-names>S. V.</given-names>
</name>
<name>
<surname>Straub</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>LinkedOmics: Analyzing Multi-Omics Data within and across 32 Cancer Types</article-title>. <source>Nucleic Acids Res.</source> <volume>46</volume>, <fpage>D956</fpage>&#x2013;<lpage>D963</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkx1090</pub-id> </citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Waaijenborg</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zwinderman</surname>
<given-names>A. H.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Sparse Canonical Correlation Analysis for Identifying, Connecting and Completing Gene-Expression Networks</article-title>. <source>BMC Bioinformatics</source> <volume>10</volume>, <fpage>315</fpage>. <pub-id pub-id-type="doi">10.1186/1471-2105-10-315</pub-id> </citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Jia</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Wolfinger</surname>
<given-names>R. D.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Grayson</surname>
<given-names>B. L.</given-names>
</name>
<name>
<surname>Aune</surname>
<given-names>T. M.</given-names>
</name>
<etal/>
</person-group> (<year>2011</year>). <article-title>An Efficient Hierarchical Generalized Linear Mixed Model for Pathway Analysis of Genome-wide Association Studies</article-title>. <source>Bioinformatics</source> <volume>27</volume>, <fpage>686</fpage>&#x2013;<lpage>692</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btq728</pub-id> </citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Witten</surname>
<given-names>D. M.</given-names>
</name>
<name>
<surname>Tibshirani</surname>
<given-names>R. J.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Extensions of Sparse Canonical Correlation Analysis with Applications to Genomic Data</article-title>. <source>Stat. Appl. Genet. Mol. Biol.</source> <volume>8</volume>, <fpage>Article28</fpage>. <pub-id pub-id-type="doi">10.2202/1544-6115.1470</pub-id> </citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Silva</surname>
<given-names>T. C.</given-names>
</name>
<name>
<surname>Young</surname>
<given-names>J.&#x20;I.</given-names>
</name>
<name>
<surname>Gomez</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Schmidt</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Hamilton-Nelson</surname>
<given-names>K. L.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Epigenome-wide Meta-Analysis of DNA Methylation Differences in Prefrontal Cortex Implicates the Immune Processes in Alzheimer&#x27;s Disease</article-title>. <source>Nat. Commun.</source> <volume>11</volume>, <fpage>6114</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-020-19791-w</pub-id> </citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Young</surname>
<given-names>J.&#x20;I.</given-names>
</name>
<name>
<surname>Gomez</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Silva</surname>
<given-names>T. C.</given-names>
</name>
<name>
<surname>Schmidt</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Cai</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Sex-specific DNA Methylation Differences in Alzheimer&#x27;s Disease Pathology</article-title>. <source>Acta Neuropathol. Commun.</source> <volume>9</volume>, <fpage>77</fpage>. <pub-id pub-id-type="doi">10.1186/s40478-021-01177-8</pub-id> </citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>C.-C.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Laird</surname>
<given-names>P. W.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>X. J.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Discovery of Multi-Dimensional Modules by Integrative Analysis of Cancer Genomic Data</article-title>. <source>Nucleic Acids Res.</source> <volume>40</volume>, <fpage>9379</fpage>&#x2013;<lpage>9391</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gks725</pub-id> </citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Rocha</surname>
<given-names>N. P.</given-names>
</name>
<name>
<surname>Salem</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Diniz</surname>
<given-names>B. S.</given-names>
</name>
<name>
<surname>Teixeira</surname>
<given-names>A. L.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>The Association between Systemic Lupus Erythematosus and Dementia A Meta-Analysis</article-title>. <source>Dement. Neuropsychol.</source> <volume>12</volume>, <fpage>143</fpage>&#x2013;<lpage>151</lpage>. <pub-id pub-id-type="doi">10.1590/1980-57642018dn12-020006</pub-id> </citation>
</ref>
</ref-list>
</back>
</article>
