<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Pharmacol.</journal-id>
<journal-title>Frontiers in Pharmacology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Pharmacol.</abbrev-journal-title>
<issn pub-type="epub">1663-9812</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1089217</article-id>
<article-id pub-id-type="doi">10.3389/fphar.2022.1089217</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Pharmacology</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Specific gene module pair-based target identification and drug discovery</article-title>
<alt-title alt-title-type="left-running-head">Li et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fphar.2022.1089217">10.3389/fphar.2022.1089217</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Li</surname>
<given-names>Peng</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<xref ref-type="fn" rid="fn1">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/502629/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Bai</surname>
<given-names>Chujie</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="fn" rid="fn1">
<sup>&#x2020;</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhan</surname>
<given-names>Lingmin</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2128092/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhang</surname>
<given-names>Haoran</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhang</surname>
<given-names>Yuanyuan</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhang</surname>
<given-names>Wuxia</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/961882/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Wang</surname>
<given-names>Yingdong</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhao</surname>
<given-names>Jinzhong</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1336385/overview"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Shanxi key lab for modernization of TCVM</institution>, <institution>College of Basic Sciences</institution>, <institution>Shanxi Agricultural University</institution>, <addr-line>Jinzhong</addr-line>, <addr-line>Shanxi</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Department of Orthopedic Oncology</institution>, <institution>Peking University Cancer Hospital &#x26; Institute</institution>, <institution>Key Laboratory of Carcinogenesis and Translational Research (Ministry of Education)</institution>, <addr-line>Beijing</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/556980/overview">Jo&#xe3;o Manuel Cunha Rodrigues</ext-link>, Universidade da Madeira, Portugal</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/230565/overview">Dezso Modos</ext-link>, Quadram Institute, United Kingdom</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2106606/overview">Visvaldas Kairys</ext-link>, Vilnius University, Lithuania</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Peng Li, <email>lip@sxau.edu.cn</email>
</corresp>
<fn fn-type="equal" id="fn1">
<label>
<sup>&#x2020;</sup>
</label>
<p>These authors have contributed equally to this work</p>
</fn>
<fn fn-type="other">
<p>This article was submitted to Experimental Pharmacology and Drug Discovery, a section of the journal Frontiers in Pharmacology</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>16</day>
<month>01</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2022</year>
</pub-date>
<volume>13</volume>
<elocation-id>1089217</elocation-id>
<history>
<date date-type="received">
<day>04</day>
<month>11</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>28</day>
<month>12</month>
<year>2022</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2023 Li, Bai, Zhan, Zhang, Zhang, Zhang, Wang and Zhao.</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Li, Bai, Zhan, Zhang, Zhang, Zhang, Wang and Zhao</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Identification of the biological targets of a compound is of paramount importance for the exploration of the mechanism of action of drugs and for the development of novel drugs. A concept of the Connectivity Map (CMap) was previously proposed to connect genes, drugs, and disease states based on the common gene-expression signatures. For a new query compound, the CMap-based method can infer its potential targets by searching similar drugs with known targets (reference drugs) and measuring the similarities into their specific transcriptional responses between the query compound and those reference drugs. However, the available methods are often inefficient due to the requirement of the reference drugs as a medium to link the query agent and targets. Here, we developed a general procedure to extract target-induced consensus gene modules from the transcriptional profiles induced by the treatment of perturbagens of a target. A specific transcriptional gene module pair (GMP) was automatically identified for each target and could be used as a direct target signature. Based on the GMPs, we built the target network and identified some target gene clusters with similar biological mechanisms. Moreover, a gene module pair-based target identification (GMPTI) approach was proposed to predict novel compound&#x2013;target interactions. Using this method, we have discovered novel inhibitors for three PI3K pathway proteins PI3K&#x3b1;/&#x3b2;/&#x3b4;, including PU-H71, alvespimycin, reversine, astemizole, raloxifene HCl, and tamoxifen.</p>
</abstract>
<kwd-group>
<kwd>transcriptome</kwd>
<kwd>gene module pair</kwd>
<kwd>drug target prediction</kwd>
<kwd>drug discovery</kwd>
<kwd>drug&#x2013;target association</kwd>
</kwd-group>
</article-meta>
</front>
<body>
<sec id="s1">
<title>Introduction</title>
<p>When the sequencing of the human genome identifies risk-associated loci or genetic mutation for diseases, understanding the biological function and effects of the corresponding genes (proteins) is the top priority in the life science study. Similarly, for drugs with unknown molecular mechanisms, identification of their mechanistic targets is of paramount importance for the development of novel drugs. Truly understanding the biological effects of drugs requires monitoring the molecular pathways targeted by drugs and the subsequent impacts, such as the overall gene expression profiles. Evidently, omics techniques are naturally suited for capturing these systemic effects, such as transcriptomics, proteomics, and metabonomics (<xref ref-type="bibr" rid="B29">Trapotsi and Hosseini-Gerami, 2022</xref>). Until now, there have been many large-scale databases that integrate different types of omics data induced by genetic or compound perturbation on biological samples (<xref ref-type="bibr" rid="B1">Barrett et al., 2013</xref>; <xref ref-type="bibr" rid="B25">Sj&#xf6;stedt and Zhong, 2020</xref>; <xref ref-type="bibr" rid="B31">Wishart et al., 2022</xref>). Among them, the low-cost transcriptomics is the most useful for detecting functional associations between drugs and genes, as the constructed compendia of comprehensive and uniform-quality genetic and compound-induced gene expression data, such as the Connectivity Map (CMap) (<xref ref-type="bibr" rid="B13">Lamb et al., 2006</xref>; <xref ref-type="bibr" rid="B27">Subramanian et al., 2017</xref>). The CMap-based concept is a potential solution and has established systematic, large-scale compendia of the cellular effects of pharmacological and genetic perturbation. CMap-based approaches explore the actions of compounds by comparing their induced gene-expression profiles with the gene-expression profiles of perturbagens with known mechanisms. For example, if a query compound has expression profiles similar with the landmarked compounds with known mechanisms of action or genetic perturbagens, we can infer the compound has similar upstream targets or pathways with the landmarked compounds and genetic perturbagens (<xref ref-type="bibr" rid="B23">Qu and Rajpal, 2012</xref>; <xref ref-type="bibr" rid="B21">Musa et al., 2018</xref>).</p>
<p>Until now, two versions of CMap have been built. The pilot (old) CMap database contains 6,100 gene-expression profiles obtained by the treatment of a set of 1,309 different molecules (<xref ref-type="bibr" rid="B13">Lamb et al., 2006</xref>). Since then, CMap-based methods have been widely used for discovering the drug&#x2019;s mode of action and drug repositioning. For example, based on CMap, Brum et al. found that parbendazole can induce osteogenic differentiation and explored withaferin A, calcium folinate, and amylocaine as potential osteogenic drugs (<xref ref-type="bibr" rid="B4">Brum et al., 2015</xref>; <xref ref-type="bibr" rid="B3">Brum et al., 2018</xref>). <xref ref-type="bibr" rid="B19">Manzotti et al. (2015</xref>) found that amantadine is associated with monocyte&#x2013;macrophage-like differentiation of myeloid leukemia cell lines. <xref ref-type="bibr" rid="B18">Liu et al. (2015</xref>) explored celastrol as a sensitization of leptin, and it can be used to treat obesity. In recent years, in view of the small scale of the pilot CMap dataset, the L1000 platform expands the CMap resource in different dimensions including the number of perturbations, cell lines, doses, and times (<xref ref-type="bibr" rid="B27">Subramanian et al., 2017</xref>). The new version CMap can further accelerate the discovery of drug actions. For example, <xref ref-type="bibr" rid="B5">Chen et al. (2021</xref>) used the L1000 platform to discover hyperforin as a stimulator of thermogenesis by stimulating AMPK and PGC-1a <italic>via</italic> a Ucp1-dependent pathway. <xref ref-type="bibr" rid="B30">van Leeuwen et al. (2022</xref>) integrated the L1000 data and identified drugs that potentiate the anti-breast cancer activity of statins. In addition, the large-scale transcriptomic data of genetic and chemical perturbations from the CMap database also provide opportunities for updating current computational pharmacogenomics and drug design methodologies. For example, <xref ref-type="bibr" rid="B32">Zhang and Gant (2008</xref>) proposed a novel pattern matching the algorithm named statistically significant connectivity map (ssCMap) to help reduce noise effects in CMap-based approaches. <xref ref-type="bibr" rid="B7">Fortney et al. (2015</xref>) presented a method, CMapBatch, which adapted parallelly processed multiple-gene signatures. The L1000CDS<sup>2</sup> search engine optimized CMap data and methods to improve the ability of knowledge extraction from the CMap platform (<xref ref-type="bibr" rid="B6">Duan et al., 2016</xref>).</p>
<p>The CMap-based methods explored connections among drugs, pathways, and diseases by measuring the gene-expression signature similarity. However, the perturbagens as a medium are indispensable for these efforts to discover the biological connections. If we want to predict a potential drug&#x2013;target interaction, the query drug has to be linked to targets mediated by perturbagens in the CMap database. Because of the diversity of treatment conditions, the same perturbagens might connect to the query drug with sharply different scores and make users hard to determine which one is suitable. To solve this problem, we developed a general procedure to capture target-induced consensus gene modules hidden in the transcriptional profiles following the treatment of target&#x2019;s perturbagens across multiple cell lines and dosages. A specific transcriptional gene module pair (GMP) was automatically extracted for each target and can be used as a gene signature to represent the target. Based on the GMPs of targets, we built the target network by calculating the similarity among GMPs of all targets and identified some target gene clusters with similar biological mechanisms. Moreover, the gene module pair-based target identification (GMPTI) approach was proposed to predict novel compound&#x2013;target interactions based on a compound-induced gene expression profile.</p>
</sec>
<sec sec-type="materials|methods" id="s2">
<title>Materials and methods</title>
<sec id="s2-1">
<title>Data source and preprocessing</title>
<p>All LINCS-funded CMap L1000 data are available from GEO. Both LINCS Phase 1 data in GEO Series GSE92742 and LINCS Phase 2 data in GEO series GSE70138 were combined. The L1000 platform carries out a rigorous five-step data-processing pipeline to transform raw data from Luminex scanners to replicate consensus signatures. The final LEVEL 5 data were used in this work. It totally contains 594,697 signatures (118,050 in GSE70138 and 473,647 in GSE92742). The L1000 assay directly measures 978 landmark genes and infers additional 11,350 genes. Of the inferred genes, 9,196 are well inferred. Our work only used the high-fidelity 10,174 genes, including 978 measured landmarks and 9,196 well-inferred genes.</p>
<p>We collated gene targets for all perturbagens from the cloud-based computing environment termed CLUE (Connectivity Map Linked User Environment), available at <ext-link ext-link-type="uri" xlink:href="https://clue.io/">https://clue.io/</ext-link>. Genetic perturbagens refer to two types of knockdown (KD) or overexpression (OE) on targeted genes. The effects of compounds on targets were artificially annotated. These perturbagens with clear targets were then mapped to the LEVEL 5 data to extract corresponding signatures. As a result, 138,310 signatures for 5,852 perturbagens with 4,540 gene targets were retained for this study.</p>
</sec>
<sec id="s2-2">
<title>Distance between two signatures</title>
<p>The distance between two signatures was measured by a modified gene set enrichment analysis (GSEA)-based method (<xref ref-type="bibr" rid="B12">Iorio et al., 2010</xref>). Given two signatures <italic>X</italic> and <italic>Y</italic>, following the work of Iorio et al., we selected 250 upregulated genes <inline-formula id="inf1">
<mml:math id="m1">
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>p</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="{" close="}" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mn>250</mml:mn>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and downregulated genes <inline-formula id="inf2">
<mml:math id="m2">
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="{" close="}" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mn>250</mml:mn>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> to represent each signature. The distance between two signatures was defined as follows:<disp-formula id="equ1">
<mml:math id="m3">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mrow>
<mml:mi>X</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>Y</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>I</mml:mi>
<mml:mi>T</mml:mi>
<mml:mi>E</mml:mi>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>X</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>Y</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>I</mml:mi>
<mml:mi>T</mml:mi>
<mml:mi>E</mml:mi>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>Y</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>X</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>where<disp-formula id="equ2">
<mml:math id="m4">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>I</mml:mi>
<mml:mi>T</mml:mi>
<mml:mi>E</mml:mi>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>X</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>Y</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mi>Y</mml:mi>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
</mml:msubsup>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mi>Y</mml:mi>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:mfrac>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>Here, <inline-formula id="inf3">
<mml:math id="m5">
<mml:mrow>
<mml:mi>I</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>E</mml:mi>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>X</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>Y</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, defining the distance from <italic>X</italic> to <italic>Y</italic>, is the inverse total enrichment score of the signature <italic>X</italic> gene sets {<italic>up</italic>, <italic>dn</italic>}, with respect to the signature of <italic>Y</italic>. <inline-formula id="inf4">
<mml:math id="m6">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mi>Y</mml:mi>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>r</mml:mi>
</mml:msub>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> (with <italic>r</italic> <inline-formula id="inf5">
<mml:math id="m7">
<mml:mrow>
<mml:mo>&#x2208;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> {<italic>up</italic>, <italic>dn</italic>}) is the enrichment score of the signature of <italic>X</italic> (the upregulated part and the downregulated one) with respect to the signature of <italic>Y</italic>. Similarly, <inline-formula id="inf6">
<mml:math id="m8">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>I</mml:mi>
<mml:mi>T</mml:mi>
<mml:mi>E</mml:mi>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>Y</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>X</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> describes the distance from <italic>Y</italic> to <italic>X</italic>.<disp-formula id="equ3">
<mml:math id="m9">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>I</mml:mi>
<mml:mi>T</mml:mi>
<mml:mi>E</mml:mi>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>Y</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>X</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mi>X</mml:mi>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
</mml:msubsup>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mi>X</mml:mi>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:mfrac>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
<p>Then, we performed a hierarchical cluster analysis for all signatures using the calculated distances.</p>
</sec>
<sec id="s2-3">
<title>Cluster analysis of signatures for each target</title>
<p>For a target, its signatures denote all signatures of perturbagens of this target. We clustered signatures for each target on their pairwise distance values and plot the dendrogram. Then, signatures cut by a pre-defined threshold of 0.8 in the dendrogram were considered outliers and removed in the dendrogram of each target.</p>
<p>The distance threshold value (i.e., 0.8) was determined by the following considerations. First, a significant threshold was estimated by a multiple random sampling approach. In all 138,310 signatures, we randomly selected 1,000 signatures and calculated pairwise distances between them, resulting in <inline-formula id="inf7">
<mml:math id="m10">
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mtable columnalign="left">
<mml:mtr>
<mml:mtd>
<mml:mn>1000</mml:mn>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mn>2</mml:mn>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula> &#x3d; 499,500 distance values. The empirical probability distribution function (pdf) of these data was used to estimate a significance threshold for the distance. The upper bound of the 5% quantile of this empirical pdf was chosen as the distance significance threshold value. This procedure was repeated 1,000 times, and the mean of 1,000 threshold values approximately 0.8 was retained as the significant threshold. Based on the calculated threshold value, we manually inspected each cluster tree of the 4,540 targets and selected 0.8 as the threshold to remove outliers. Finally, 4,461 were retained with at least three signatures.</p>
</sec>
<sec id="s2-4">
<title>Co-expression analysis</title>
<p>It was hypothesized that on-target gene expression effects of different perturbagens for the same target should be similar and co-expressed. To find co-expression module genes induced by one target, we performed a co-expression analysis for signatures of each target using the weighted correlation network analysis (WGCNA) method (<xref ref-type="bibr" rid="B15">Langfelder and Horvath, 2008</xref>).</p>
</sec>
<sec id="s2-5">
<title>Target-specific gene modules</title>
<p>After the co-expression analysis, those genes that were not in any co-expressed modules were removed from signatures of each target. To extract the target-specific gene modules from co-expressed genes, the Borda merging method implementing a majority voting system was used to sort genes according to their values in each signature:<disp-formula id="equ4">
<mml:math id="m11">
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mn>3</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>g</mml:mi>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="equ5">
<mml:math id="m12">
<mml:mrow>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:munderover>
<mml:mstyle displaystyle="true">
<mml:mo>&#x2211;</mml:mo>
</mml:mstyle>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>m</mml:mi>
</mml:munderover>
<mml:mrow>
<mml:msubsup>
<mml:mi>v</mml:mi>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mi>j</mml:mi>
</mml:msubsup>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>where <italic>G</italic> is a ranked gene list of size <italic>n</italic> by sorting the corresponding merging value <inline-formula id="inf8">
<mml:math id="m13">
<mml:mrow>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> for each gene <inline-formula id="inf9">
<mml:math id="m14">
<mml:mrow>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, in decreasing order. <inline-formula id="inf10">
<mml:math id="m15">
<mml:mrow>
<mml:msub>
<mml:mi>v</mml:mi>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> denotes the sum (merging value) of <inline-formula id="inf11">
<mml:math id="m16">
<mml:mrow>
<mml:msubsup>
<mml:mi>v</mml:mi>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mi>j</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> in signatures 1 to <italic>m</italic>. <inline-formula id="inf12">
<mml:math id="m17">
<mml:mrow>
<mml:msubsup>
<mml:mi>v</mml:mi>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mi>j</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> is the value of gene <inline-formula id="inf13">
<mml:math id="m18">
<mml:mrow>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> in signatures <italic>j</italic>.</p>
<p>To this step, each target corresponds to a gene list <italic>G</italic>, among which specific gene modules for this target can be extracted. We selected the top 250 genes (<italic>t</italic>
<sub>
<italic>up</italic>
</sub>) of each gene list and the bottom 250 ones (<italic>t</italic>
<sub>
<italic>down</italic>
</sub>) as the target-specific gene module pair (<italic>t</italic>
<sub>
<italic>up</italic>
</sub>, <italic>t</italic>
<sub>
<italic>down</italic>
</sub>).</p>
</sec>
<sec id="s2-6">
<title>Characterization of the target-specific gene module pair in human gene networks</title>
<p>InWeb_Inbiomap (Inbiomap) focuses on a scored physical protein&#x2013;protein interactions (<xref ref-type="bibr" rid="B16">Li et al., 2017</xref>), available from <ext-link ext-link-type="uri" xlink:href="https://www.lagelab.org/resources/">https://www.lagelab.org/resources/</ext-link>. Pathway commons (Pathcom) was downloaded from <ext-link ext-link-type="uri" xlink:href="http://www.pathwaycommons.org/">http://www.pathwaycommons.org/</ext-link>. Pathcom concentrates on biological pathways integrated from public pathway and gene interactions (<xref ref-type="bibr" rid="B24">Rodchenkov et al., 2020</xref>). The Search Tool for Recurring Instances of Neighboring Genes (STRING; <ext-link ext-link-type="uri" xlink:href="https://string-db.org">https://string-db.org</ext-link>) quantitatively integrates different studies and interaction types into a single integrated score for each gene pair based on the total weight of evidence (<xref ref-type="bibr" rid="B26">Snel et al., 2000</xref>). The Genome-scale Integrated Analysis of gene Networks in Tissues (GIANT; <ext-link ext-link-type="uri" xlink:href="https://hb.flatironinstitute.org/">https://hb.flatironinstitute.org/</ext-link>) network covers functional association genes and inferred functional relations (<xref ref-type="bibr" rid="B11">Huang et al., 2018</xref>).</p>
<p>We analyzed the enrichment of the module gene members in the network by calculating the ratio of protein&#x2013;protein connections among the fully connected network. When both top and bottom modules were analyzed together, the fully connected network has <inline-formula id="inf14">
<mml:math id="m19">
<mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mtable columnalign="left">
<mml:mtr>
<mml:mtd>
<mml:mn>500</mml:mn>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mn>2</mml:mn>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>124,750</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> links. When the top and bottom modules were analyzed, the fully connected network has <inline-formula id="inf15">
<mml:math id="m20">
<mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mtable columnalign="left">
<mml:mtr>
<mml:mtd>
<mml:mn>250</mml:mn>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mn>2</mml:mn>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>31,125</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> links. The significance of the enrichment was measured by comparing the actual ratio with that of a random model. In the random model, a collection of genes with the same number as the module genes was randomly selected from the network, and then the connection ratio was calculated. This step was repeated 1,000 times, and a null distribution was constructed.</p>
</sec>
<sec id="s2-7">
<title>Target network</title>
<p>The similarity between two targets is estimated by the number of intersection genes between the two targets&#x2019; specific module pairs. The more overlapping the genes are, the more similar the two targets are. Then, we considered each target as a node in the network and connected two nodes with a weighted edge, if their similarity is below a significant threshold value. To evaluate the significance of the linkage between targets, we generated a null distribution for each target by randomly permuting top and bottom transcriptional modules and repeated the calculation 1,000 times for target connections. This null model uses the gene module-based permutation test procedure and preserves gene&#x2013;gene correlations of the gene expression data, providing a more biologically reasonable assessment of significance than would be obtained by permuting genes. The edge weight is proportional to the similarity that is intersection genes of two targets&#x2019; specific module pairs, where the significant threshold is computed by the hypergeometric test (<italic>p</italic> &#x3c; 0.05).</p>
</sec>
<sec id="s2-8">
<title>Target community identification</title>
<p>The affinity propagation algorithm is used to identify target communities in the target network (<xref ref-type="bibr" rid="B8">Frey and Dueck, 2007</xref>; <xref ref-type="bibr" rid="B2">Bodenhofer et al., 2011</xref>). This algorithm takes in the target pairwise similarity matrix and outputs a set of clusters. Each cluster is represented by a cluster center data point called exemplar, whose features best interpolate the features of all the other points in the cluster.</p>
</sec>
<sec id="s2-9">
<title>Specific gene module pair-based target identification</title>
<p>GMPTI considers experiments with gene-expression profiles from a collection of samples belonging to two classes, for example, drug treated vs. control. The genes can be ordered in a ranked list <italic>L</italic>, according to their differential expression between the classes. Given the defined GMP for each target, the goal of GMPTI is to compare <italic>L</italic> to each target-specific GMP using a similarity metric slightly adjusted with that used in gene set enrichment analysis (<xref ref-type="bibr" rid="B28">Subramanian et al., 2005</xref>). We defined the raw similarity score as follows:<disp-formula id="equ6">
<mml:math id="m21">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mi>L</mml:mi>
<mml:mi>t</mml:mi>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>E</mml:mi>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mi>L</mml:mi>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mi>L</mml:mi>
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>w</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>where <inline-formula id="inf16">
<mml:math id="m22">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mi>L</mml:mi>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> is the enrichment of <italic>t</italic>
<sub>
<italic>up</italic>
</sub> for <italic>L</italic>, and <inline-formula id="inf17">
<mml:math id="m23">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mi>L</mml:mi>
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>w</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> is the enrichment of <italic>t</italic>
<sub>
<italic>down</italic>
</sub> for <italic>L</italic>. <inline-formula id="inf18">
<mml:math id="m24">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mi>L</mml:mi>
<mml:mi>t</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> denotes the total correlation score of the GMP (<italic>t</italic>
<sub>
<italic>up</italic>
</sub>, <italic>t</italic>
<sub>
<italic>down</italic>
</sub>) of one target, with respect to signature <italic>L</italic>. The total correlation score (<italic>TCS</italic>) ranges between &#x2212;2 and 2. It measures the degree of similarity between query <italic>L</italic> and target-induced gene-expression profiles. It will be positive for targets that are positively related to <italic>L</italic>, negative for those that are inversely similar, and near zero for signatures that are unrelated. A zero value is assigned when both <inline-formula id="inf19">
<mml:math id="m25">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mi>L</mml:mi>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf20">
<mml:math id="m26">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mi>L</mml:mi>
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>w</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> are the same sign.</p>
</sec>
<sec id="s2-10">
<title>Normalization of similarity scores</title>
<p>To allow for the comparison of similarity scores across multiple expression datasets, the scores are normalized to account for differences in query ranked gene lists. GMPTI normalizes the <inline-formula id="inf21">
<mml:math id="m27">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mi>L</mml:mi>
<mml:mi>t</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> values within each ranked gene list as follows:<disp-formula id="equ7">
<mml:math id="m28">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mi>L</mml:mi>
<mml:mi>t</mml:mi>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mi>L</mml:mi>
<mml:mi>t</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>where <inline-formula id="inf22">
<mml:math id="m29">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mi>L</mml:mi>
<mml:mi>t</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> and <italic>&#x03BC;</italic> are, respectively, the normalized correlation score and the absolute mean of <inline-formula id="inf23">
<mml:math id="m30">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mi>L</mml:mi>
<mml:mi>t</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> (the mean of absolute values) for all target-specific module pairs corresponding to the query gene list. By normalizing <inline-formula id="inf24">
<mml:math id="m31">
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>S</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, GMPTI accounts for differences in correlations between GMPs and the expression dataset; therefore, the normalized correlation scores (<italic>NCS</italic>) can be used to compare the analysis results across different expression profiles.</p>
</sec>
<sec id="s2-11">
<title>Estimating significance</title>
<p>We assess the significance of an actual NCS value by comparing it with the set of scores NCS<sub>NULL</sub> computed with random permutations of both top and bottom gene modules for each target. 1) We generated a random GMP for each target by randomly permuting top and bottom transcriptional modules in our target space. 2) Step 1 was repeated for 1,000 permutations, and a histogram of the corresponding similarity scores NCS<sub>NULL</sub> was created for a query gene list. 3) A nominal <italic>p</italic>-value for the NCS<italic>i</italic> of a target <italic>i</italic> was estimated by using the portion of the NCS<sub>NULL</sub> distribution above the actual NCS<italic>i</italic> as follows:<disp-formula id="equ8">
<mml:math id="m32">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
</mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
</mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mi>U</mml:mi>
<mml:mi>L</mml:mi>
<mml:mi>L</mml:mi>
<mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2265;</mml:mo>
<mml:mi>a</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
</mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mrow>
<mml:mn>1000</mml:mn>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>where <inline-formula id="inf25">
<mml:math id="m33">
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>s</mml:mi>
<mml:mo>(</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mi>U</mml:mi>
<mml:mi>L</mml:mi>
<mml:mi>L</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the absolute value of all correlation scores for random GMPs with respect to a query gene list <italic>L</italic>. <inline-formula id="inf26">
<mml:math id="m34">
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is the absolute value of the similarity score of target <italic>i</italic> with respect to <italic>L</italic>.</p>
</sec>
<sec id="s2-12">
<title>PI3K&#x3b1;/&#x3b2;/&#x3b4; kinase assay</title>
<p>The test compounds including varenicline tartrate, PU-H71, alvespimycin, reversine, astemizole, raloxifene HCl, and tamoxifen were purchased from Shanghai Aladdin Biochemical Technology Co., Ltd. PI3K&#x3b1;/&#x3b2;/&#x3b4; were purchased from Carna Biosciences. This study aims to determine the effect of test compounds on PI3K&#x3b1;/&#x3b2;/&#x3b4; enzyme activity using ADP-Glo-based biochemical assay (Vendor: Promega, Cat&#x23;: V9102), following the manufacturer&#x2019;s instruction. The classical PI3K inhibitor wortmannin was used as a positive control. Luminescence signal (RLU) is detected for each well by using a multimode plate reader (Vendor: BioTek, Cat&#x23;: Synergy4) and converted to % inhibition. Then, the IC50s were calculated by fitting % inhibition values and the log of compound concentrations to the hill slope with the variable slope (called the variable slope model or four-parameter dose-response curve), and the log (inhibitor) vs. response curve was built by GraphPad Prism version 7.0 (GraphPad Software). Data are presented as mean &#xb1; SEM, with n &#x3d; 3 for each drug dose.</p>
</sec>
</sec>
<sec sec-type="results" id="s3">
<title>Results</title>
<sec id="s3-1">
<title>Target-specific gene module pair</title>
<p>It was hypothesized that on-target gene-expression effects of different perturbagens for the same target should be similar and co-expressed. For a gene target, its specific GMP indicates two gene sets that are specifically expressed at the top and bottom of the gene-expression profiles induced by perturbing this target. To extract the GMP for each target, we exploited a library of gene transcriptional responses to different perturbagens (e.g., small-molecule compounds and shRNAs): the newly expanded Connectivity Map (CMap) containing 476,251 gene expression profiles (consolidating replicates) obtained by the treatment of 77 different human cell lines at different dosages with a set of 27,927 perturbagens (<xref ref-type="bibr" rid="B27">Subramanian et al., 2017</xref>). We collected gene targets of all perturbagens from CLUE. Then, each target was mapped to its transcriptional signatures that are the differential gene profiles induced by the perturbagens of the target including both small-molecule compounds and shRNAs. As a result, 138,310 signatures for 5,852 perturbagens with 4,540 gene targets were retained.</p>
<p>Based on these data, we proposed a novel method to extract the GMP for a target (<xref ref-type="fig" rid="F1">Figure 1A</xref>). First, we integrated co-expression genes for each target by performing the WGCNA on its signatures. For a target&#x2019;s signatures, there may be some outliers that are distinct from most signatures and are difficult to reflect transcriptional activities induced by perturbing this target. To reduce the influence of these outliers in the construction of GMPs, we clustered signatures for each target on their pairwise distances and removed outlier signatures in the dendrogram by a pre-defined threshold (see Materials and Methods). The distance between two signatures was measured by a modified GSEA-based method (<xref ref-type="bibr" rid="B12">Iorio et al., 2010</xref>). In order to equally weight the contribution of all signatures to the co-expressed genes, the Borda merging method, implementing a majority voting system, was used to sort the co-expressed genes according to their ranks in each signature. The GMP including the two top/bottom gene sets was extracted from the merged gene list by selecting the first 250 genes at the top of the gene list (most overexpressed) and the last 250 ones at the bottom of the gene list (most downregulated) following a previous work (<xref ref-type="bibr" rid="B12">Iorio et al., 2010</xref>). Finally, the GMPs were successfully constructed for 3,505 targets. Out of these, we noted that the GMPs for 229 targets were integrated from only a small number of multi-target perturbagens. For example, the GMP of adiponectin receptor protein 2 (ADIPOR2) was concluded by 70 signatures of the compound parthenolide, which is not only an adiponectin receptor agonist but also an NF-&#x3ba;B inhibitor. For these targets, it is hard to judge the specificity of their GMPs; thus, they were removed from the target space. The existing 3,275 targets were confidential, and their GMPs capture the consensus transcriptional response of the targets across different perturbations, reducing non-relevant effects due to off-target, dosage, or cell line (<xref ref-type="sec" rid="s10">Supplementary Table S1</xref>).</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>
<bold>(A)</bold> Procedure to extract the gene module pair (GMP) of a target. <bold>(B)</bold> Characterization of the target-specific GMP in human gene networks. Four types of gene networks were collected from Pathcom, Inbiomap, STRING, and GIANT. We evaluated the functional enrichment of module genes in each network by calculating the ratio of protein&#x2013;protein interaction numbers among the link numbers of fully connected networks (PPI ratio). The actual PPI ratio is compared with that of a random model to assess the significance of the enrichment. In the random model, a collection of genes with the same number as the module genes were randomly selected from the network, and then the PPI ratio was calculated. The distribution with blue and red colors is random and observed data, respectively. Rows 1&#x2013;4 rows correspond to the analysis of the Pathcom, Inbiomap, STRING, and GIANT networks, respectively. Columns 1&#x2013;3 correspond to analysis of gene module pairs, top modules, and bottom modules, respectively.</p>
</caption>
<graphic xlink:href="fphar-13-1089217-g001.tif"/>
</fig>
</sec>
<sec id="s3-2">
<title>Characterization of the target-specific gene module pair in human gene networks</title>
<p>To check the functional coherence of target-induced transcriptional modules, we compared their gene members in four genome-wide interaction networks with different gene interaction types. Out of networks, Inbiomap focuses on a scored physical protein&#x2013;protein interactions (<xref ref-type="bibr" rid="B16">Li et al., 2017</xref>). Pathcom concentrates on biological pathways integrated from public pathway and gene interactions (<xref ref-type="bibr" rid="B24">Rodchenkov et al., 2020</xref>). STRING quantitatively integrates different studies and interaction types into a single integrated score for each gene pair based on the total weight of evidence (<xref ref-type="bibr" rid="B26">Snel et al., 2000</xref>). The GIANT network covers functional association genes and inferred functional relations (<xref ref-type="bibr" rid="B9">Greene et al., 2015</xref>). These networks differing in both interaction type and coverage (<xref ref-type="sec" rid="s10">Supplementary Table S2</xref>) could systemically evaluate the function relation of the target-induced gene modules in this study.</p>
<p>We first analyzed the gene members of both top/bottom modules together. In the four networks, we observed that Pathcom enriched a minimum of 758 (&#x223c;22%) GMPs compared with its null model (nominal <italic>p</italic>-value &#x3c;0.05; <xref ref-type="fig" rid="F1">Figure 1B</xref>), though this number is evidently less than that in other networks. The three networks, Inbiomap, STRING, and GIANT, significantly cover more gene relations than their corresponding null models on at least 2,200 GMPs (49%), while 1,180 GMPs (26%) were enriched in all the three networks (nominal <italic>p</italic>-value &#x3c;0.05; <xref ref-type="fig" rid="F1">Figure 1B</xref>). Moreover, the top and bottom modules were analyzed. In agreement with functional analyses of the combined co-expression modules, except Pathcom, all networks enriched a large amount of modules (from 1,000 to 1,981 upregulated modules and from 1,000 to 2,331 downregulated modules) (nominal <italic>p</italic>-value &#x3c;0.05; <xref ref-type="fig" rid="F1">Figure 1B</xref>). These results indicate that gene members in target-induced transcriptional modules are mostly functionally relevant and cover a diversity of molecular interaction types.</p>
</sec>
<sec id="s3-3">
<title>Gene module pair based-target gene map</title>
<p>GMPs reflecting the transcriptional response of targets&#x2019; perturbation can be used to relate different target genes. The similarity between collections of GMPs allowed us to calculate a target map connecting target genes together through sequential linkage. The similarity was estimated by the quantity of intersections between two GMPs. Then, we consider each target as a node in the network and connected two nodes with a weighted edge, if their similarity is below a significant threshold value. To evaluate the significance of the linkage between targets, we generated a null distribution for each target by randomly permuting top and bottom transcriptional modules and repeated the calculation 1,000 times for target connections. This null model uses the gene module-based permutation test procedure and preserves gene&#x2013;gene correlations of the gene expression data, providing a more biologically reasonable assessment of significance than would be obtained by permuting genes (See methods). It can be seen that 2,593 (&#x223c;82.5%) targets are connected in a map with 221,275 edges (permutation based <italic>p</italic>-value &#x3c; 0.05; <xref ref-type="fig" rid="F2">Figure 2A</xref>; <xref ref-type="sec" rid="s10">Supplementary Table S3</xref>), corresponding to &#x223c;4% of a fully connected network with all 3,275 targets (5,361,175 edges).</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Target network. Part of the target map with 5,000 links were displayed (see more details in <xref ref-type="sec" rid="s10">Supplementary Table S4</xref>). Clusters 17, 96, 175, and 178 were magnified (see more details in <xref ref-type="sec" rid="s10">Supplementary Table S5</xref>).</p>
</caption>
<graphic xlink:href="fphar-13-1089217-g002.tif"/>
</fig>
<p>To further detect the target relations, the affinity propagation algorithm is used to identify target clusters in the target map. This algorithm takes in the target pairwise similarity matrix and outputs a set of clusters. Each cluster is represented by a cluster center data point called exemplar, whose features best interpret the features of all the other points in the cluster. We identified 225 clusters with at least two target nodes in the target map (<xref ref-type="fig" rid="F2">Figure 2A</xref>; <xref ref-type="sec" rid="s10">Supplementary Table S4</xref>). Each cluster was coded with a numerical identifier. As only gene expression information is used to calculate the cross-target similarity, each cluster should reflect a similar transcriptional regulatory activity of biologically related targets. As expected, we observed that targets with similar functions cluster together in the map. For example, 11 histone deacetylases gather in Cluster <bold>96</bold>. Likewise, sodium/potassium-transporting ATPase proteins stay together in Cluster <bold>17</bold>. Also, target genes within a pathway should co-localize and intra-connect in the map as their similar transcriptional regulatory activity. Thus, PI3 and PI4 kinase sets localize with other kinases including ATM, ATR, PLK1, PLK3, and MTOR.</p>
</sec>
<sec id="s3-4">
<title>Gene module pair-based target identification</title>
<p>GMPTI considers experiments with gene-expression profiles from a collection of samples belonging to two classes, for example, drug-treated vs. control cells. The agent-induced gene expression profiles can be ordered in a ranked list, according to some metrics (e.g., the differential expression values between the two classes). Given the defined GMPs, the goal of our strategy is to compare the correlation of the query gene list with the GMPs of targets (<xref ref-type="fig" rid="F3">Figure 3A</xref>). A strong correlation indicates a similar transcriptional response induced by the agent and the target. The TCS is measured by a method adjusted from that used in the GSEA (see Methods). A positive TCS indicated a similar transcriptional response induced by the agent and the target, and a negative TCS indicated a reversed transcriptional response induced by the agent and the target. To allow for the comparison of scores across multiple queries, we normalized them by dividing a query&#x2019;s score into absolute means of the raw scores for all GMPs and calculated an NCS with respect to the query. The significance of the normalized score was assessed by comparing it with a null distribution of scores computed by random permutations of top and bottom transcriptional modules in all target spaces.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Gene module pair-based target identification<bold>. (A)</bold> A ranked gene list between two classes is compared with GMPs of all targets. A total correlation score (<italic>TCS</italic>
<sub>
<italic>i</italic>
</sub>) is used to quantify the correlation between the gene list and each GMP by an adjusted gene set enrichment approach. Then, the TCS is divided by absolute means of the TCS scores for all GMPs to get a normalized correlation score (<italic>NCS</italic>
<sub>
<italic>i</italic>
</sub>) with respect to the query. <bold>(B)</bold> Mean of NCS for all transcriptional signatures of each target is calculated for all GMPs and ranked. Because there may be multiple transcriptional signatures for a target in the L1000 database, we calculated the mean of multiple NCS values for each target relative to all GMPs. Then, for each target, NCS mean values for all GMPs were ranked in descending order by the absolute values, and the order of the GMP the target itself is extracted and displayed. The horizontal axis displays the 3,275 targets. The vertical axis is the order of the NCS mean value for a target and the GMP of the target itself. <bold>(C)</bold> Precision at different NCS cutoffs. <bold>(D)</bold> Recall at different NCS cutoffs.</p>
</caption>
<graphic xlink:href="fphar-13-1089217-g003.tif"/>
</fig>
<p>In addition, we manually tidied the effects of perturbagens for each target including both inhibition and activation that signifies the GMPs were concluded from the transcriptional profiles of inhibitors and agonists of targets, respectively. Out of the 3,275 targets, we found that 3,119 (95.2%) were inhibited, 26 (0.8%) were stimulated, and 131 were undetermined. From these data, we could determine how the query agent induces the corresponding gene-expressional profile in GMPTI. For example, when the transcriptional profile induced by an agent strongly positively correlated with the GMP of a target labeled &#x201c;inhibited&#x201d; in the target space, we speculated the agent might induce its gene expression by inhibiting the mechanism related to the target.</p>
<p>The quality of GMPs for each target is of paramount importance for prediction of targets by GMPTI. To assess the quality of GMPs, we used L1000 data as queries to examine whether the GMP of a target can be enriched into the transcriptional signatures of the target itself more than other GMPs. This means the transcriptional signatures of the target will have a greater NCS on its GMPs than other GMPs. We observed that signatures of 3,137 targets (&#x223c;95.8%) have maximal NCS on its corresponding GMPs. The signatures of other 138 targets and their own GMPs display slightly lower NCSs ranked from 2nd to 18th in all NCSs (<xref ref-type="fig" rid="F3">Figure 3B</xref>; <xref ref-type="sec" rid="s10">Supplementary Table S1</xref>). Manual inspection of these 138 targets indicated that GMPs with NCSs larger than their own GMPs are mostly corresponding to those targets that have similar biological mechanisms to their own targets. These results indicate that the GMPs of most targets are more correlated with transcriptional signatures of their own or other targets with similar biological mechanisms, confirming the quality of GMPs.</p>
<p>When a drug-induced gene expression profile is known, GMPTI can quantify the functional associations between the drugs and targets with GMPs by using NCS values and the corresponding nominal <italic>p</italic> values. For a drug&#x2013;target association, the NCS absolute value measures the extent of functional association between the drug and target. The larger the NCS absolute value, the stronger the drug&#x2013;target functional association. The nominal <italic>p</italic>-value &#x3c;0.05 means that more than 95% NCS values from the random model are less than the real NCS. We can find drug&#x2013;target associations by both <italic>P</italic> and NCS values. Generally, a nominal <italic>p</italic>-value &#x3c;0.05 can be regarded as the minimum standard for filtering potential drug&#x2013;target associations, which can be further refined by the ranked NCS values. To examine the influence of NCS on the prediction, we regarded NCS as cutoff values and monitored the distribution of the positive predictive value (precision) and true positive rate (recall). As shown in <xref ref-type="fig" rid="F3">Figures 3C,D</xref>, when raising the NCS values, the precision values sharply increase to the maximum, and correspondingly, the recall values gradually decrease, indicating the NCS values can be used to filter drug&#x2013;target associations.</p>
</sec>
<sec id="s3-5">
<title>Discovery of novel targets of drugs</title>
<p>We focused on identifying ligands that act on the PI3K signaling pathway, a key biological process involved in cancer and inflammatory diseases by GMPTI. This pathway has three target genes PI3K&#x3b1;/&#x3b2;/&#x3b4; in the target space and is suitable to be taken as an example of this test. First, GMPTI was used to screen 5,520 small-molecular compounds from the L1000 dataset. For each target, we assessed whether it connected to the 5,520 compounds. When these compounds were listed in descending order by NCS values, it was observed that most known ligands for the three targets were top ranked with significant scores. For PI3K&#x3b1;, 308 compounds exhibited the expected interaction, and out of all 15 known PI3K&#x3b1; ligands in the L1000 dataset, 14 ligands such as LY-294002, wortmannin, and NVP-BEZ235 were included in the top ranks (nominal <italic>p</italic>-value &#x3c;0.05, <xref ref-type="sec" rid="s10">Supplementary Table S6</xref>). Similarly, 351 compounds exhibited the expected interaction for PI3K&#x3b2;, and out of all 13 known PI3K&#x3b2; ligands in the L1000 dataset, 11 ligands were included in the top ranks (nominal <italic>p</italic>-value &#x3c;0.05, <xref ref-type="sec" rid="s10">Supplementary Table S6</xref>). For PI3K&#x3b4;, 321 compounds exhibited the expected interaction, and out of all 14 known PI3K&#x3b4; ligands in the L1000 dataset, 13 ligands were included in the top ranks (nominal <italic>p</italic>-value &#x3c;0.05, <xref ref-type="sec" rid="s10">Supplementary Table S6</xref>). Based on the NCS for the three kinases, we selected three potential compounds, PU-H71, alvespimycin, and reversine, to measure their affinity by direct-binding assay. In this test, we used the classical PI3K inhibitor wortmannin as a positive control, and the results showed that wortmannin inhibits PI3K&#x3b1;, PI3K&#x3b2;, and PI3K&#x3b4; with IC50 of 1.2&#xa0;nM, 1.2&#xa0;nM, and 4.5&#xa0;nM, respectively, confirming the specifications of the binding assay (<xref ref-type="fig" rid="F4">Figures 4A1-A3</xref>). Reversine has been known as a novel class of ATP-competitive Aurora kinase (Aurora A, Aurora B, and Aurora C) inhibitor and induces cell cycle arrest and apoptosis. GMPTI showed that reversine might also be a potential PI3K pathway inhibitor with IC50 of 12&#xa0;&#xb5;M and 3.5&#xa0;&#xb5;M for PI3K&#x3b1; and PI3K&#x3b4;, respectively (<xref ref-type="fig" rid="F4">Figures 4B1, B2</xref>). For alvespimycin and PU-H71, it has been known that both compounds are potent heat shock protein 90 (HSP90) inhibitors. Our model demonstrated that they also have potential to inhibit the PI3K pathway. Among them, alvespimycin slightly inhibited PI3K&#x3b1;, PI3K&#x3b2;, and PI3K&#x3b4; with IC50 of 93&#xa0;&#xb5;M, 69&#xa0;&#xb5;M and 28&#xa0;&#xb5;M, respectively (<xref ref-type="fig" rid="F4">Figures 4C1&#x2013;C3</xref>). PU-H71 has IC50 of 20&#xa0;&#xb5;M to antagonize the activation of PI3K&#x3b4; (<xref ref-type="fig" rid="F4">Figure 4D</xref>).</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Experimental validation of interaction between the test compound and PI3K&#x3b1;/&#x3b2;/&#x3b4;. We used the classical PI3K inhibitor wortmannin as the positive control. <bold>(A1&#x2013;3)</bold>: Wortmannin and PI3K&#x3b1;/&#x3b2;/&#x3b4;; <bold>(B1&#x2013;2)</bold>: Reversine and PI3K&#x3b1;/&#x3b4;; <bold>(C1&#x2013;3)</bold>: Alvespimycin and PI3K&#x3b1;/&#x3b2;/&#x3b4;; <bold>(D)</bold>: PU-H71 and PI3K&#x3b4;; <bold>(E1&#x2013;3)</bold>: Astemizole and PI3K&#x3b1;/&#x3b2;/&#x3b4;; <bold>(F1&#x2013;3)</bold>: Raloxifene HCL and PI3K&#x3b1;/&#x3b2;/&#x3b4;; <bold>(G1&#x2013;3)</bold>: Tamoxifen and PI3K&#x3b1;/&#x3b2;/&#x3b4;. Data are presented as mean &#xb1; SEM, with n &#x3d; 3 for each drug dose.</p>
</caption>
<graphic xlink:href="fphar-13-1089217-g004.tif"/>
</fig>
<p>The aforementioned prediction is based on the L1000 dataset and might improve the prediction ability of GMPTI. To further test the validity of GMPTI for predicting novel compound&#x2013;target interactions on external data, we collected the old version CMap dataset that includes 1,309 compounds and their induced gene expression profiles. For each compound, its signature was created by differential genes and was used as a query for GMPTI. For a better comparison, we also predicted the activity of these compounds against the PI3K pathway. GMPTI predicted 410, 374, and 408 compounds were significantly related to PI3K&#x3b1;, PI3K&#x3b2;, and PI3K&#x3b4;, respectively (nominal <italic>p</italic>-value &#x3c;0.05, <xref ref-type="sec" rid="s10">Supplementary Table S7</xref>). In the result, we can see the top three predicted compounds, LY294002, sirolimus, and wortmannin, are known PI3K inhibitors. In addition, we experimentally tested three well-known drugs, astemizole, raloxifene HCl, and tamoxifen, that were repositioned to PI3K inhibitors by GMPTI. Astemizole is known as a second-generation H1-receptor antagonist for use in relieving allergy symptoms, including rhinitis and conjunctivitis. The binding assay confirmed that astemizole inhibits PI3K&#x3b1;, PI3K&#x3b2;, and PI3K&#x3b4; with IC50 of 18&#xa0;&#xb5;M, 8.7&#xa0;&#xb5;M, and 6&#xa0;&#xb5;M, respectively (<xref ref-type="fig" rid="F4">Figures 4E1&#x2013;E3</xref>). Raloxifene is a selective estrogen receptor modulator and is indicated for the treatment of osteoporosis in postmenopausal women and corticosteroid-induced bone loss. We here verified its inhibitory effect on PI3K&#x3b1;, PI3K&#x3b2;, and PI3K&#x3b4; with IC50 of 16&#xa0;&#xb5;M, 8.4&#xa0;&#xb5;M, and 4.3&#xa0;&#xb5;M, respectively (<xref ref-type="fig" rid="F4">Figures 4F1&#x2013;F3</xref>). Tamoxifen, a well-known competitive inhibitor for the estrogen receptor, has been used to treat estrogen receptor-positive metastatic breast cancer. It was also found to be a PI3K inhibitor with IC50 of 19&#xa0;&#xb5;M, 14&#xa0;&#xb5;M, and 10&#xa0;&#xb5;M for PI3K&#x3b1;, PI3K&#x3b2;, and PI3K&#x3b4;, respectively (<xref ref-type="fig" rid="F4">Figures 4G1&#x2013;G3</xref>).</p>
</sec>
</sec>
<sec sec-type="discussion" id="s4">
<title>Discussion</title>
<p>Discovery of molecular mechanisms targeted by a compound is a top priority for the development and application of novel drugs. Direct prediction based on the chemical structure information of drugs usually finds a large number of redundant targets that are unrelated to the pharmacological effects of drugs. CMap-based methods explored connections among drugs, pathways, and diseases using a large collection of transcriptional responses following compound treatments (<xref ref-type="bibr" rid="B14">Lamb, 2007</xref>). The L1000 platform expands the CMap resource in different dimensions including the number of perturbations, cell lines, doses, and times (<xref ref-type="bibr" rid="B27">Subramanian et al., 2017</xref>). However, the perturbagens as a medium are indispensable for the CMap methods to discover the biological connections. This makes the exploration of the drugs&#x2019; mode of action of fuzzy and sometimes need more empirical judgment. We developed a general procedure to capture target-induced consensus gene modules hidden in the transcriptional profiles following the treatment of the target&#x2019;s perturbagens across multiple cell lines and dosages. Finally, a specific transcriptional GMP was automatically extracted for each target and can be used as a gene signature to represent the target. Based on the GMPs of targets, we built the target network by calculating the similarity among GMPs of all targets and identified some target gene clusters with similar biological mechanisms.</p>
<p>Our approach has the ability to infer mechanisms of queries with known gene-expression profiles. Three proteins PI3K&#x3b1;/&#x3b2;/&#x3b4; in the PI3K pathway were taken as an example. We found novel ligands of the three proteins not only in L1000 compounds but also the external dataset. We have experimentally validated three potential compounds PU-H71, alvespimycin, and reversine in the L1000 dataset and three well-known drugs astemizole, raloxifene HCl, and tamoxifen in the old CMap dataset by the direct-binding assay. It should be noted that these drug&#x2013;target interactions have affinities in the micromolar range in the experimental test and should be aspecific effects. However, the analysis of the binding efficiencies of natural products and marketed drugs indicates that therapeutic efficacy is not necessarily associated with high binding affinity (<xref ref-type="bibr" rid="B20">Mestres and Gregori-Puigjan&#xe9;, 2009</xref>). For instance, memantine, a drug for Alzheimer&#x2019;s disease, is an uncompetitive, low-affinity (in the micromolar range), non-selective N-methyl-D-aspartic acid (NMDA) receptor antagonist, and has less side effects than high-affinity (nanomolar or higher) drugs (<xref ref-type="bibr" rid="B17">Lipton, 2007</xref>). In addition, drugs to interact with multiple targets might also have changed to improve efficiency (<xref ref-type="bibr" rid="B10">Hopkins et al., 2006</xref>; <xref ref-type="bibr" rid="B22">Ohlson, 2008</xref>).</p>
<p>The major limitation of our approach is the limited quantity and quality of perturbagens for a target. The key of our approach is concentrating on the commonalities reserved in the transcriptional responses of different perturbagens for the same target. If the number of perturbagens is too small to cover the most transcriptional features of the target, the extracted GMPs were hardly sufficient to represent the target. The L1000 platform made it possible as the comprehensive, large-scale compendium of functional perturbations of the gene expression resource at various conditions. Certainly, it should be noted that the expression of most genes was not directly measured but inferred in the L1000 assay, although the reliability of the inferred transcripts were theoretically confirmed (<xref ref-type="bibr" rid="B27">Subramanian et al., 2017</xref>). In addition, we should note that it is inevitable for a target having perturbagens with inconsistent effects on different situations (for example, different cell lines, doses, and times); merging gene expression profiles from distinct perturbagens might dilute the biological effects of the target. For example, it is well-known that gene expression is drastically affected by drug dosages. The extraction of GMPs from the LINCS level 5 data without considering the impact of dosages could cause dose-dependent biases. Nevertheless, our approach makes a unique identifier for each target by merging profiles from multiple conditions, which give the opportunity to directly build links between targets, drugs, and diseases from a gene transcriptional level.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s5">
<title>Data availability statement</title>
<p>The datasets presented in this study can be found in online repositories. The names of the repository/repositories and accession number(s) can be found in the article/<xref ref-type="sec" rid="s10">Supplementary Material</xref>.</p>
</sec>
<sec id="s6">
<title>Author contributions</title>
<p>PL designed research, performed experiments, analyzed data, and wrote the manuscript. CB, LZ, HZ, YZ, WZ, and YW performed experiments and collected and analyzed data. PL and JZ supervised the work and reviewed the manuscript.</p>
</sec>
<sec id="s7">
<title>Funding</title>
<p>This research was supported by the National Natural Science Fund of China (No. 82274363, No. 81703945), the Fundamental Research Program of Shanxi Province (No. 20210302124129), and the Distinguished and Excellent Young Scholars Cultivation Project of Shanxi Agricultural University (No. 2022YQPYGC09).</p>
</sec>
<sec sec-type="COI-statement" id="s8">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s9">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors, and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s10">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fphar.2022.1089217/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fphar.2022.1089217/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="Table2.XLSX" id="SM1" mimetype="application/XLSX" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table3.XLSX" id="SM2" mimetype="application/XLSX" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="DataSheet3.CSV" id="SM3" mimetype="application/CSV" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table4.XLSX" id="SM4" mimetype="application/XLSX" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table1.XLSX" id="SM5" mimetype="application/XLSX" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="DataSheet1.CSV" id="SM6" mimetype="application/CSV" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="DataSheet2.CSV" id="SM7" mimetype="application/CSV" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Barrett</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Wilhite</surname>
<given-names>S. E.</given-names>
</name>
<name>
<surname>Ledoux</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Evangelista</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>I. F.</given-names>
</name>
<name>
<surname>Tomashevsky</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2013</year>). <article-title>NCBI GEO: Archive for functional genomics data sets-update</article-title>. <source>Nucleic Acids Res.</source> <volume>41</volume>, <fpage>D991</fpage>&#x2013;<lpage>D995</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gks1193</pub-id>
<comment>Database issue)</comment>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bodenhofer</surname>
<given-names>U.</given-names>
</name>
<name>
<surname>Kothmeier</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Hochreiter</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>APCluster: an R package for affinity propagation clustering</article-title>. <source>Bioinformatics</source> <volume>27</volume> (<issue>17</issue>), <fpage>2463</fpage>&#x2013;<lpage>2464</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btr406</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Brum</surname>
<given-names>A. M.</given-names>
</name>
<name>
<surname>van de Peppel</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Nguyen</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Aliev</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Schreuders-Koedam</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Gajadien</surname>
<given-names>T.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>Using the connectivity map to discover compounds influencing human osteoblast differentiation</article-title>. <source>J. Cell Physiol.</source> <volume>233</volume> (<issue>6</issue>), <fpage>4895</fpage>&#x2013;<lpage>4906</lpage>. <pub-id pub-id-type="doi">10.1002/jcp.26298</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Brum</surname>
<given-names>A. M.</given-names>
</name>
<name>
<surname>van de Peppel</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>van der Leije</surname>
<given-names>C. S.</given-names>
</name>
<name>
<surname>Schreuders-Koedam</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Eijken</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>van der Eerden</surname>
<given-names>B. C.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Connectivity map-based discovery of parbendazole reveals targetable human osteogenic pathway</article-title>. <source>Proc. Natl. Acad. Sci. U. S. A.</source> <volume>112</volume> (<issue>41</issue>), <fpage>12711</fpage>&#x2013;<lpage>12716</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.1501597112</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Peng</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Tan</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>H.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>The phytochemical hyperforin triggers thermogenesis in adipose tissue via a Dlat-AMPK signaling axis to curb obesity</article-title>. <source>Cell Metab.</source> <volume>33</volume> (<issue>3</issue>), <fpage>565</fpage>&#x2013;<lpage>580.e7</lpage>. <pub-id pub-id-type="doi">10.1016/j.cmet.2021.02.007</pub-id>
<comment>e567</comment>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Duan</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Reid</surname>
<given-names>S. P.</given-names>
</name>
<name>
<surname>Clark</surname>
<given-names>N. R.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Fernandez</surname>
<given-names>N. F.</given-names>
</name>
<name>
<surname>Rouillard</surname>
<given-names>A. D.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>L1000CDS(2): LINCS L1000 characteristic direction signatures search engine</article-title>. <source>NPJ Syst. Biol. Appl.</source> <volume>2</volume>, <fpage>16015</fpage>. <pub-id pub-id-type="doi">10.1038/npjsba.2016.15</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fortney</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Griesman</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Kotlyar</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Pastrello</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Angeli</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Sound-Tsao</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Prioritizing therapeutics for lung cancer: an integrative meta-analysis of cancer gene signatures and chemogenomic data</article-title>. <source>PLoS Comput. Biol.</source> <volume>11</volume> (<issue>3</issue>), <fpage>e1004068</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pcbi.1004068</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Frey</surname>
<given-names>B. J.</given-names>
</name>
<name>
<surname>Dueck</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>Clustering by passing messages between data points</article-title>. <source>Science</source> <volume>315</volume> (<issue>5814</issue>), <fpage>972</fpage>&#x2013;<lpage>976</lpage>. <pub-id pub-id-type="doi">10.1126/science.1136800</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Greene</surname>
<given-names>C. S.</given-names>
</name>
<name>
<surname>Krishnan</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Wong</surname>
<given-names>A. K.</given-names>
</name>
<name>
<surname>Ricciotti</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Zelaya</surname>
<given-names>R. A.</given-names>
</name>
<name>
<surname>Himmelstein</surname>
<given-names>D. S.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Understanding multicellular function and disease with human tissue-specific networks</article-title>. <source>Nat. Genet.</source> <volume>47</volume> (<issue>6</issue>), <fpage>569</fpage>&#x2013;<lpage>576</lpage>. <pub-id pub-id-type="doi">10.1038/ng.3259</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hopkins</surname>
<given-names>A. L.</given-names>
</name>
<name>
<surname>Mason</surname>
<given-names>J. S.</given-names>
</name>
<name>
<surname>Overington</surname>
<given-names>J. P.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>Can we rationally design promiscuous drugs?</article-title> <source>Curr. Opin. Struct. Biol.</source> <volume>16</volume> (<issue>1</issue>), <fpage>127</fpage>&#x2013;<lpage>136</lpage>. <pub-id pub-id-type="doi">10.1016/j.sbi.2006.01.013</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname>
<given-names>J. K.</given-names>
</name>
<name>
<surname>Carlin</surname>
<given-names>D. E.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>M. K.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Kreisberg</surname>
<given-names>J. F.</given-names>
</name>
<name>
<surname>Tamayo</surname>
<given-names>P.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>Systematic evaluation of molecular networks for discovery of disease genes</article-title>. <source>Cell Syst.</source> <volume>6</volume> (<issue>4</issue>), <fpage>484</fpage>&#x2013;<lpage>495</lpage>. <pub-id pub-id-type="doi">10.1016/j.cels.2018.03.001</pub-id>
<comment>&#x2b;</comment>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Iorio</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Bosotti</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Scacheri</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Belcastro</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Mithbaokar</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Ferriero</surname>
<given-names>R.</given-names>
</name>
<etal/>
</person-group> (<year>2010</year>). <article-title>Discovery of drug mode of action and drug repositioning from transcriptional responses</article-title>. <source>Proc. Natl. Acad. Sci. U. S. A.</source> <volume>107</volume> (<issue>33</issue>), <fpage>14621</fpage>&#x2013;<lpage>14626</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.1000138107</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lamb</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Crawford</surname>
<given-names>E. D.</given-names>
</name>
<name>
<surname>Peck</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Modell</surname>
<given-names>J. W.</given-names>
</name>
<name>
<surname>Blat</surname>
<given-names>I. C.</given-names>
</name>
<name>
<surname>Wrobel</surname>
<given-names>M. J.</given-names>
</name>
<etal/>
</person-group> (<year>2006</year>). <article-title>The connectivity map: using gene-expression signatures to connect small molecules, genes, and disease</article-title>. <source>Science</source> <volume>313</volume> (<issue>5795</issue>), <fpage>1929</fpage>&#x2013;<lpage>1935</lpage>. <pub-id pub-id-type="doi">10.1126/science.1132939</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lamb</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>The connectivity map: a new tool for biomedical research</article-title>. <source>Nat. Rev. Cancer</source> <volume>7</volume> (<issue>1</issue>), <fpage>54</fpage>&#x2013;<lpage>60</lpage>. <pub-id pub-id-type="doi">10.1038/nrc2044</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Langfelder</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Horvath</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Wgcna: an r package for weighted correlation network analysis</article-title>. <source>BMC Bioinforma.</source> <volume>9</volume>, <fpage>559</fpage>. <pub-id pub-id-type="doi">10.1186/1471-2105-9-559</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>T. B.</given-names>
</name>
<name>
<surname>Wernersson</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Hansen</surname>
<given-names>R. B.</given-names>
</name>
<name>
<surname>Horn</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Mercer</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Slodkowicz</surname>
<given-names>G.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>A scored human protein-protein interaction network to catalyze genomic interpretation</article-title>. <source>Nat. Methods</source> <volume>14</volume> (<issue>1</issue>), <fpage>61</fpage>&#x2013;<lpage>64</lpage>. <pub-id pub-id-type="doi">10.1038/nmeth.4083</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lipton</surname>
<given-names>S. A.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>Pathologically-activated therapeutics for neuroprotection: mechanism of NMDA receptor block by memantine and S-nitrosylation</article-title>. <source>Curr. Drug Targets</source> <volume>8</volume> (<issue>5</issue>), <fpage>621</fpage>&#x2013;<lpage>632</lpage>. <pub-id pub-id-type="doi">10.2174/138945007780618472</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Salazar Hernandez</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Mazitschek</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Ozcan</surname>
<given-names>U.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Treatment of obesity with celastrol</article-title>. <source>Cell</source> <volume>161</volume> (<issue>5</issue>), <fpage>999</fpage>&#x2013;<lpage>1011</lpage>. <pub-id pub-id-type="doi">10.1016/j.cell.2015.05.011</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Manzotti</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Parenti</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Ferrari-Amorotti</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Soliera</surname>
<given-names>A. R.</given-names>
</name>
<name>
<surname>Cattelani</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Montanari</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Monocyte-macrophage differentiation of acute myeloid leukemia cell lines by small molecules identified through interrogation of the connectivity map database</article-title>. <source>Cell Cycle</source> <volume>14</volume> (<issue>16</issue>), <fpage>2578</fpage>&#x2013;<lpage>2589</lpage>. <pub-id pub-id-type="doi">10.1080/15384101.2015.1033591</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mestres</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Gregori-Puigjan&#xe9;</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Conciliating binding efficiency and polypharmacology</article-title>. <source>Trends Pharmacol. Sci.</source> <volume>30</volume> (<issue>9</issue>), <fpage>470</fpage>&#x2013;<lpage>474</lpage>. <pub-id pub-id-type="doi">10.1016/j.tips.2009.07.004</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Musa</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Ghoraie</surname>
<given-names>L. S.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>S. D.</given-names>
</name>
<name>
<surname>Glazko</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Yli-Harja</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Dehmer</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>A review of connectivity map and computational approaches in pharmacogenomics</article-title>. <source>Briefings Bioinforma.</source> <volume>19</volume> (<issue>3</issue>), <fpage>506</fpage>&#x2013;<lpage>523</lpage>. <pub-id pub-id-type="doi">10.1093/bib/bbw112</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ohlson</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Designing transient binding drugs: A new concept for drug discovery</article-title>. <source>Drug Discov. Today</source> <volume>13</volume> (<issue>9-10</issue>), <fpage>433</fpage>&#x2013;<lpage>439</lpage>. <pub-id pub-id-type="doi">10.1016/j.drudis.2008.02.001</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Qu</surname>
<given-names>X. A.</given-names>
</name>
<name>
<surname>Rajpal</surname>
<given-names>D. K.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Applications of connectivity map in drug discovery and development</article-title>. <source>Drug Discov. Today</source> <volume>17</volume> (<issue>23-24</issue>), <fpage>1289</fpage>&#x2013;<lpage>1298</lpage>. <pub-id pub-id-type="doi">10.1016/j.drudis.2012.07.017</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rodchenkov</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Babur</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Luna</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Aksoy</surname>
<given-names>B. A.</given-names>
</name>
<name>
<surname>Wong</surname>
<given-names>J. V.</given-names>
</name>
<name>
<surname>Fong</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Pathway commons 2019 update: integration, analysis and exploration of pathway data</article-title>. <source>Nucleic Acids Res.</source> <volume>48</volume> (<issue>D1</issue>), <fpage>D489-D497</fpage>&#x2013;<lpage>D497</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkz946</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sj&#xf6;stedt</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Zhong</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Fagerberg</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Karlsson</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Mitsios</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Adori</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>An atlas of the protein-coding genes in the human, pig, and mouse brain</article-title>. <source>Science</source> <volume>367</volume> (<issue>6482</issue>), <fpage>eaay5947</fpage>. <pub-id pub-id-type="doi">10.1126/science.aay5947</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Snel</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Lehmann</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Bork</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Huynen</surname>
<given-names>M. A.</given-names>
</name>
</person-group> (<year>2000</year>). <article-title>String: A web-server to retrieve and display the repeatedly occurring neighbourhood of a gene</article-title>. <source>Nucleic Acids Res.</source> <volume>28</volume> (<issue>18</issue>), <fpage>3442</fpage>&#x2013;<lpage>3444</lpage>. <pub-id pub-id-type="doi">10.1093/nar/28.18.3442</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Subramanian</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Narayan</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Corsello</surname>
<given-names>S. M.</given-names>
</name>
<name>
<surname>Peck</surname>
<given-names>D. D.</given-names>
</name>
<name>
<surname>Natoli</surname>
<given-names>T. E.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>X. D.</given-names>
</name>
<etal/>
</person-group> (<year>2017</year>). <article-title>A next generation connectivity map: L1000 platform and the first 1, 000, 000 profiles</article-title>. <source>Cell</source> <volume>171</volume> (<issue>6</issue>), <fpage>1437</fpage>&#x2013;<lpage>1452</lpage>. <pub-id pub-id-type="doi">10.1016/j.cell.2017.10.049</pub-id>
<comment>&#x2b;</comment>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Subramanian</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Tamayo</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Mootha</surname>
<given-names>V. K.</given-names>
</name>
<name>
<surname>Mukherjee</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Ebert</surname>
<given-names>B. L.</given-names>
</name>
<name>
<surname>Gillette</surname>
<given-names>M. A.</given-names>
</name>
<etal/>
</person-group> (<year>2005</year>). <article-title>Gene set enrichment analysis: A knowledge-based approach for interpreting genome-wide expression profiles</article-title>. <source>Proc. Natl. Acad. Sci. U. S. A.</source> <volume>102</volume> (<issue>43</issue>), <fpage>15545</fpage>&#x2013;<lpage>15550</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.0506580102</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Trapotsi</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Hosseini-Gerami</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Bender</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Computational analyses of mechanism of action (MoA): data, methods and integration</article-title>. <source>RSC Chem. Biol.</source> <volume>3</volume> (<issue>2</issue>), <fpage>170</fpage>&#x2013;<lpage>200</lpage>. <pub-id pub-id-type="doi">10.1039/d1cb00069a</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>van Leeuwen</surname>
<given-names>J. E.</given-names>
</name>
<name>
<surname>Ba-Alawi</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Branchard</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Cruickshank</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Schormann</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Longo</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Computational pharmacogenomic screen identifies drugs that potentiate the anti-breast cancer activity of statins</article-title>. <source>Nat. Commun.</source> <volume>13</volume> (<issue>1</issue>), <fpage>6323</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-022-33144-9</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wishart</surname>
<given-names>D. S.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Oler</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Anjum</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Peters</surname>
<given-names>H.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Hmdb 5.0: The human metabolome database for 2022</article-title>. <source>Nucleic Acids Res.</source> <volume>50</volume> (<issue>D1</issue>), <fpage>D622</fpage>&#x2013;<lpage>d631</lpage>. <pub-id pub-id-type="doi">10.1093/nar/gkab1062</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>S. D.</given-names>
</name>
<name>
<surname>Gant</surname>
<given-names>T. W.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>A simple and robust method for connecting small-molecule drugs using gene-expression signatures</article-title>. <source>BMC Bioinforma.</source> <volume>9</volume>, <fpage>258</fpage>. <pub-id pub-id-type="doi">10.1186/1471-2105-9-258</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>