<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Archiving and Interchange DTD v2.3 20070202//EN" "archivearticle.dtd">
<article article-type="methods-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Genet.</journal-id>
<journal-title>Frontiers in Genetics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Genet.</abbrev-journal-title>
<issn pub-type="epub">1664-8021</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1217414</article-id>
<article-id pub-id-type="doi">10.3389/fgene.2023.1217414</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Genetics</subject>
<subj-group>
<subject>Methods</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Feature selection translates drug response predictors from cell lines to patients</article-title>
<alt-title alt-title-type="left-running-head">Yuan et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fgene.2023.1217414">10.3389/fgene.2023.1217414</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Yuan</surname>
<given-names>Shinsheng</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="fn" rid="fn1">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/42284/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Chen</surname>
<given-names>Yen-Chou</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="fn" rid="fn1">
<sup>&#x2020;</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Tsai</surname>
<given-names>Chi-Hsuan</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Chen</surname>
<given-names>Huei-Wen</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Shieh</surname>
<given-names>Grace S.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/13074/overview"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Institute of Statistical Science</institution>, <institution>Academia Sinica</institution>, <addr-line>Taipei</addr-line>, <country>Taiwan</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Bioinformatics Program</institution>, <institution>Taiwan International Graduate Program</institution>, <institution>Academia Sinica</institution>, <addr-line>Taipei</addr-line>, <country>Taiwan</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>College of Medicine</institution>, <institution>Graduate Institute of Toxicology</institution>, <institution>National Taiwan University</institution>, <addr-line>Taipei</addr-line>, <country>Taiwan</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Genome and Systems Biology Degree Program</institution>, <institution>Academia Sinica and National Taiwan University</institution>, <addr-line>Taipei</addr-line>, <country>Taiwan</country>
</aff>
<aff id="aff5">
<sup>5</sup>
<institution>Data Science Degree Program</institution>, <institution>Academia Sinica and National Taiwan University</institution>, <addr-line>Taipei</addr-line>, <country>Taiwan</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/34320/overview">Li Zhang</ext-link>, University of California, San Francisco, United States</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/489941/overview">Tian Tian</ext-link>, Children&#x2019;s Hospital of Philadelphia, United States</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2315152/overview">Phi Le</ext-link>, University of California, San Francisco, United States</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Grace S. Shieh, <email>gshieh@stat.sinica.edu.tw</email>
</corresp>
<fn fn-type="equal" id="fn1">
<label>
<sup>&#x2020;</sup>
</label>
<p>These authors have contributed equally to this work</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>14</day>
<month>07</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2023</year>
</pub-date>
<volume>14</volume>
<elocation-id>1217414</elocation-id>
<history>
<date date-type="received">
<day>05</day>
<month>05</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>26</day>
<month>06</month>
<year>2023</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2023 Yuan, Chen, Tsai, Chen and Shieh.</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Yuan, Chen, Tsai, Chen and Shieh</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Targeted therapies and chemotherapies are prevalent in cancer treatment. Identification of predictive markers to stratify cancer patients who will respond to these therapies remains challenging because patient drug response data are limited. As large amounts of drug response data have been generated by cell lines, methods to efficiently translate cell-line-trained predictors to human tumors will be useful in clinical practice. Here, we propose versatile feature selection procedures that can be combined with any classifier. For demonstration, we combined the feature selection procedures with a (linear) logit model and a (non-linear) K-nearest neighbor and trained these on cell lines to result in LogitDA and KNNDA, respectively. We show that LogitDA/KNNDA significantly outperforms existing methods, e.g., a logistic model and a deep learning method trained by thousands of genes, in prediction AUC (0.70&#x2013;1.00 for seven of the ten drugs tested) and is interpretable. This may be due to the fact that sample sizes are often limited in the area of drug response prediction. We further derive a novel adjustment on the prediction cutoff for LogitDA to yield a prediction accuracy of 0.70&#x2013;0.93 for seven drugs, including erlotinib and cetuximab, whose pathways relevant to anti-cancer therapies are also uncovered. These results indicate that our methods can efficiently translate cell-line-trained predictors into tumors.</p>
</abstract>
<kwd-group>
<kwd>cancer</kwd>
<kwd>domain adaptation</kwd>
<kwd>drug response</kwd>
<kwd>feature selection</kwd>
<kwd>machine learning</kwd>
<kwd>prediction</kwd>
</kwd-group>
<contract-num rid="cn001">AS-104-TP-A07 AS-SUMMIT</contract-num>
<contract-num rid="cn002">NSTC 109-2118-M-001-001-MY2 111-2118-M-001-009-MY2 106-2314-B-001-005</contract-num>
<contract-sponsor id="cn001">Academia Sinica<named-content content-type="fundref-id">10.13039/501100001869</named-content>
</contract-sponsor>
<contract-sponsor id="cn002">National Science and Technology Council<named-content content-type="fundref-id">10.13039/501100020950</named-content>
</contract-sponsor>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Statistical Genetics and Methodology</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1">
<title>Introduction</title>
<p>Targeted therapies and chemotherapies are prevalent in cancer treatments. About 7% (11.1%) of US patients with advanced or metastatic cancer benefited from genome-targeted (genome-informed) therapy in 2021 (<xref ref-type="bibr" rid="B14">Haslam et al., 2021</xref>), with 13.6% (27.3%) estimated eligibility. Thus, identification of the characteristics of cancer patients who will respond to chemotherapies or targeted therapies using their molecular profiles is important for precision medicine. Given that patient drug response data relative to cell lines are limited, obtaining this information is challenging. However, large-scale drug sensitivity screens of cell lines have identified clinically meaningful gene&#x2013;drug interactions (<xref ref-type="bibr" rid="B3">Barretina et al., 2012</xref>; <xref ref-type="bibr" rid="B10">Garnett et al., 2012</xref>; <xref ref-type="bibr" rid="B4">Basu et al., 2013</xref>; <xref ref-type="bibr" rid="B28">Seashore-Ludlow et al., 2015</xref>). In particular, the Cancer Cell Line Encyclopedia (CCLE) database consists of the transcriptomic profiles, chromosomal copy number, and mutational profiles of 947 human cancer cell lines screened with 24 targeted therapies. Moreover, Iorio et al. published valuable results and the Genomics of Drug Sensitivity in Cancer (GDSC) dataset (<xref ref-type="bibr" rid="B15">Iorio et al., 2016</xref>), consisting of the multi-omics profiles and drug sensitivity scores (IC<sub>50</sub>) of 1,001 cancer cell lines screened with 265 anti-cancer compounds, which may be used to train predictors to improve drug response prediction in patients. <xref ref-type="bibr" rid="B11">Geeleher et al. (2014</xref>) adopted this approach and showed that the trained ridge regression models using whole-genome gene expression and the response of &#x223c;700 cell lines in the Cancer Genome Project (<xref ref-type="bibr" rid="B10">Garnett et al., 2012</xref>) resulted in equally good or better predictions of human tumors than gene signatures derived directly from three clinical datasets.</p>
<p>Recently, a deep neural network-based method known as multi-omics late integration (MOLI) was proposed (<xref ref-type="bibr" rid="B29">Sharifi-Noghabi et al., 2019</xref>). This method was used to predict drug response by first embedding each multi-omics data type separately and then concatenating all embeddings into one representation, which was optimized via a cost function. The performance of MOLI was validated on patient-derived xenograft (PDX)/human tumor datasets of five chemotherapies and two targeted therapies. Moreover, data from cell lines screened with drugs targeting the same pathway, pan-drug data, were also integrated into MOLI to significantly improve its performance on targeted therapies.</p>
<p>Gene expression data are the most effective of the four omic data types for pan-cancer drug response prediction (<xref ref-type="bibr" rid="B15">Iorio et al., 2016</xref>). Thus, we used gene expression data in this study. Furthermore, when transferring the trained predictors from cell lines (the source domain) to human tumors or PDXs (the target domain), it is assumed that the features (genes in this study), which are used to train the predictors, behave similarly in these domains. However, cell lines and human tumors/PDXs are known to be different in the following respects (<xref ref-type="bibr" rid="B12">Gillet et al., 2013</xref>): there is no tumor micro-environment and vasculature in cell lines and no immune system in cell lines/PDXs. Nevertheless, strong positive correlations for mutational and transcriptomic profiles were found between cell lines and tumors (<xref ref-type="bibr" rid="B3">Barretina et al., 2012</xref>). Thus, it is reasonable to assume that there is a subset of genes that behave similarly between cancer cell lines and primary tumors (the two domains). PRECISE, a domain adaptation-based method, was developed to capture information shared among the preclinical models and human tumors (<xref ref-type="bibr" rid="B23">Mourragui et al., 2019</xref>). The resulting domain-invariant predictors were shown to reliably recover known associations between biomarkers and the corresponding drugs in human tumors. PRECISE assumes that the conditional distributions for drug response are the same in both domains (<inline-formula id="inf1">
<mml:math id="m1">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>S</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>Y</mml:mi>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>X</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>T</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>Y</mml:mi>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>X</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>), similar to other existing methods. However, when this assumption is not met, a negative transfer will occur (<xref ref-type="bibr" rid="B25">Peres da Silva et al., 2021</xref>), e.g., drugs that are effective <italic>in vitro</italic> but not effective in clinical trials/practice.</p>
<p>To alleviate this shortcoming, we adopted supervised domain adaptation (DA) (<xref ref-type="bibr" rid="B17">Koniusz et al., 2017</xref>; <xref ref-type="bibr" rid="B22">Motiian et al., 2017</xref>). Although DA uses information of all labels in a test set, it has been shown to outperform numerous baselines on real-world datasets in active learning, e.g., the MNIST and USPS datasets containing images of digits from 0 to 9 analyzed by <xref ref-type="bibr" rid="B22">Motiian et al. (2017</xref>) and the office dataset, which is a benchmark for visual domain adaptation (<xref ref-type="bibr" rid="B22">Motiian et al., 2017</xref>). This study is the first application of DA to the area of drug response prediction. Here, we propose feature selection procedures combined with a regular logistic ridge regression model (called LogitDA) or with a non-linear classifier K-nearest neighbor (called KNNDA), which have the following desirable properties: 1) our assumption is weaker than that of the existing methods, and if the given training and test datasets satisfy the assumption, the proposed predictors achieve a high area under the receiver operating characteristic curve (AUC; Results), even when the training dataset is relatively small; 2) we devise an adjustment of the prediction probability cutoff for LogitDA, which leads to high prediction accuracy no matter whether datasets meet the assumption or not; and 3) the proposed method can be combined with any linear or non-linear classifier to be trained, thus being versatile.</p>
<p>We used the labels of the test sets only in the feature selection procedures. Specifically, we selected genes (<inline-formula id="inf2">
<mml:math id="m2">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>&#x27;s) that have similar conditional distributions across the domains, <inline-formula id="inf3">
<mml:math id="m3">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>S</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>Y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2248;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>T</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>Y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, where <italic>S</italic> and <italic>T</italic> denote the source (training) and target (test) domains, respectively. This approach falls into one category of inductive transfer learning (<xref ref-type="bibr" rid="B24">Pan and Yang, 2010</xref>), in which the target and source domains are labeled, but the domains and the tasks are different. In the area of drug response prediction, the source domain consists of the gene expression data of cell lines and the associated probability distribution, and its task is to predict the drug response in terms of IC<sub>50</sub> scores. While the target domain consists of gene expression data of patients/PDX and the associated probability distribution, the target task is to predict the drug response of patients/PDX, which is measured by changes in tumor volumes or months-to-progress of patients. As there is no tumor micro-environment and immune system in cell lines, we assume that the probability distribution of the source and target domains is not the same. Thus, the domains and tasks are different in the area of drug response prediction. The remaining steps of our feature selection are prioritizing genes by their differential expression in sensitive <italic>versus</italic> resistant cell lines, keeping the top-ranked 1,000 genes for explainable features, and ranking these genes by a measure of their power to separate sensitive from resistant cell lines. Next, for each of the ten drugs, we trained a regular logit model and K-nearest neighbor (KNN) using expression data of the top-ranked <italic>p</italic> (<italic>p</italic> &#x2264; 1,000) genes of cell lines in GDSC via 5-fold cross-validation (CV); see Methods for details. Subsequently, we compared the performance of LogitDA/KNNDA to that of the baseline ridge regression in the work of <xref ref-type="bibr" rid="B11">Geeleher et al. (2014</xref>) and MOLI (<xref ref-type="bibr" rid="B29">Sharifi-Noghabi et al., 2019</xref>) for ten test sets in human tumors and PDXs. The scheme of the proposed approach is shown in <xref ref-type="fig" rid="F1">Figure 1</xref>.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>The scheme of the proposed approach.</p>
</caption>
<graphic xlink:href="fgene-14-1217414-g001.tif"/>
</fig>
<p>Notably, LogitDA and KNNDA turned out to be very powerful for drug response prediction. For example, LogitDA (KNNDA), trained by the top-ranked 50 (220) selected genes of the 370 cell lines screened with erlotinib, resulted in a prediction AUC of 0.94 (0.90) for NSCLC tumors. The high prediction power of LogitDA (KNNDA) for targeted therapies suggests that these predictors may help physicians stratify patients with NSCLC who will respond to erlotinib and spare those who do not from adverse effects, illustrating that these predictors have clinical implications. Finally, we uncovered the pathways of the top-fitted genes of LogitDA/KNNDA for erlotinib and cetuximab, which include pathways relevant to anti-cancer therapies and several metabolic pathways. These results indicate that our methods can efficiently translate cell-line-trained predictors into human tumors.</p>
</sec>
<sec sec-type="methods" id="s2">
<title>Methods</title>
<sec id="s2-1">
<title>Datasets</title>
<p>The gene expression data and drug response of 1,001 cell lines screened with 265 drugs in the GDSC dataset (<xref ref-type="bibr" rid="B15">Iorio et al., 2016</xref>) were used as training sets, and ten sets of gene expression data and the response of PDX/patients treated with chemotherapies and targeted therapies were used for testing. The test sets consisted of three clinical trial datasets for docetaxel, erlotinib, and sorafenib (<xref ref-type="bibr" rid="B11">Geeleher et al., 2014</xref>), four sets from PDX Encyclopedia datasets (<xref ref-type="bibr" rid="B9">Gao et al., 2015</xref>), and three sets of TCGA patients (<xref ref-type="bibr" rid="B37">Weinstein et al., 2013</xref>; <xref ref-type="bibr" rid="B5">Ding et al., 2016</xref>). The sources for both training and testing datasets are detailed in Data Availability Statement. All datasets are publicly available.</p>
<p>Gene expression profiles of cell lines, which were RMA-normalized, log-transformed, and aggregated to the level of genes, were downloaded from the GDSC database. The gene expression profiles of the first three test datasets were preprocessed by <xref ref-type="bibr" rid="B11">Geeleher et al. (2014</xref>), and those of the remaining seven test sets (from MOLI) were converted to TPM and log-transformed by <xref ref-type="bibr" rid="B29">Sharifi-Noghabi et al. (2019</xref>).</p>
</sec>
<sec id="s2-2">
<title>Pre-processing of gene expression data</title>
<p>The GED of cell lines from the GDSC dataset was first standardized by the mean and standard error (s.e.) of each gene. Next, the GED of each cell line was normalized by the house-keeping gene GAPDH across cell lines and homogenized with the GED of test sets by the ComBat() function from the sva library in R (<xref ref-type="bibr" rid="B11">Geeleher et al., 2014</xref>). The details of each dataset, such as the drug name, the number of samples, and the number of genes in the training and test sets, are provided in <xref ref-type="table" rid="T1">Table 1</xref>. Similar to existing methods (<xref ref-type="bibr" rid="B11">Geeleher et al., 2014</xref>; <xref ref-type="bibr" rid="B29">Sharifi-Noghabi et al., 2019</xref>), we included only genes present in both training and test sets for the subsequent analysis; the four columns from the right-hand side of <xref ref-type="table" rid="T1">Table 1</xref> show the number of overlapping genes.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Information about the training and test datasets of the studied drugs.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center"/>
<th colspan="3" align="center">Training set (GDSC)</th>
<th colspan="3" align="center">Test set</th>
<th colspan="2" align="center">No. of overlapping genes</th>
<th colspan="2" align="center">Geeleher et al.</th>
</tr>
<tr>
<th align="left">Drug (test dataset)</th>
<th align="center">No. of cell lines</th>
<th align="center">NS<xref ref-type="table-fn" rid="Tfn1">
<sup>a</sup>
</xref>
</th>
<th align="center">NR<xref ref-type="table-fn" rid="Tfn1">
<sup>a</sup>
</xref>
</th>
<th align="center">No. of samples</th>
<th align="center">NS</th>
<th align="center">NR</th>
<th align="center">In training and test sets<xref ref-type="table-fn" rid="Tfn2">
<sup>b</sup>
</xref>
</th>
<th align="center">Sifted by DA<xref ref-type="table-fn" rid="Tfn3">
<sup>c</sup>
</xref>
</th>
<th align="center">No. of genes in common<xref ref-type="table-fn" rid="Tfn2">
<sup>b</sup>
</xref>
</th>
<th align="center">No. of the remaining genes<xref ref-type="table-fn" rid="Tfn4">
<sup>d</sup>
</xref>
</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Docetaxel (GSE6434)</td>
<td align="center">850</td>
<td align="center">564</td>
<td align="center">286</td>
<td align="center">24</td>
<td align="center">10</td>
<td align="center">14</td>
<td align="center">7,963</td>
<td align="center">5,173</td>
<td align="center">7,964</td>
<td align="center">6,371</td>
</tr>
<tr>
<td align="left">Erlotinib (GSE33072)</td>
<td align="center">370</td>
<td align="center">28</td>
<td align="center">342</td>
<td align="center">25</td>
<td align="center">11</td>
<td align="center">14</td>
<td align="center">16,898</td>
<td align="center">12,264</td>
<td align="center">16,760</td>
<td align="center">13,408</td>
</tr>
<tr>
<td align="left">Sorafenib (GSE33072)</td>
<td align="center">403</td>
<td align="center">117</td>
<td align="center">286</td>
<td align="center">37</td>
<td align="center">21</td>
<td align="center">16</td>
<td align="center">16,898</td>
<td align="center">12,961</td>
<td align="center">16,760</td>
<td align="center">13,408</td>
</tr>
<tr>
<td align="left">Cetuximab (PDX)</td>
<td align="center">877</td>
<td align="center">40</td>
<td align="center">837</td>
<td align="center">60</td>
<td align="center">5</td>
<td align="center">55</td>
<td align="center">16,191</td>
<td align="center">7,509</td>
<td align="center">15,121</td>
<td align="center">12,096</td>
</tr>
<tr>
<td align="left">Erlotinib (PDX)</td>
<td align="center">370</td>
<td align="center">28</td>
<td align="center">342</td>
<td align="center">21</td>
<td align="center">3</td>
<td align="center">18</td>
<td align="center">16,190</td>
<td align="center">10,343</td>
<td align="center">18,232</td>
<td align="center">14,585</td>
</tr>
<tr>
<td align="left">Gemcitabine (PDX)</td>
<td align="center">866</td>
<td align="center">680</td>
<td align="center">186</td>
<td align="center">25</td>
<td align="center">7</td>
<td align="center">18</td>
<td align="center">16,190</td>
<td align="center">10,115</td>
<td align="center">18,232</td>
<td align="center">14,585</td>
</tr>
<tr>
<td align="left">Paclitaxel (PDX)</td>
<td align="center">399</td>
<td align="center">284</td>
<td align="center">115</td>
<td align="center">43</td>
<td align="center">5</td>
<td align="center">38</td>
<td align="center">16,190</td>
<td align="center">8,548</td>
<td align="center">18,232</td>
<td align="center">14,585</td>
</tr>
<tr>
<td align="left">Cisplatin (TCGA)</td>
<td align="center">850</td>
<td align="center">275</td>
<td align="center">575</td>
<td align="center">66</td>
<td align="center">60</td>
<td align="center">6</td>
<td align="center">16,026</td>
<td align="center">8,550</td>
<td align="center">18,216</td>
<td align="center">14,572</td>
</tr>
<tr>
<td align="left">Docetaxel (TCGA)</td>
<td align="center">850</td>
<td align="center">564</td>
<td align="center">286</td>
<td align="center">16</td>
<td align="center">8</td>
<td align="center">8</td>
<td align="center">16,168</td>
<td align="center">12,968</td>
<td align="center">18,216</td>
<td align="center">14,572</td>
</tr>
<tr>
<td align="left">Gemcitabine (TCGA)</td>
<td align="center">866</td>
<td align="center">680</td>
<td align="center">186</td>
<td align="center">57</td>
<td align="center">21</td>
<td align="center">36</td>
<td align="center">16,003</td>
<td align="center">9,728</td>
<td align="center">18,216</td>
<td align="center">14,572</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="Tfn1">
<label>
<sup>a</sup>
</label>
<p>NS and NR denote the number of sensitive (responder) and resistant (non-responder) samples.</p>
</fn>
<fn id="Tfn2">
<label>
<sup>b</sup>
</label>
<p>The number of overlapping genes between the training and test sets. The initial input genes of the work of <xref ref-type="bibr" rid="B11">Geeleher et al. (2014)</xref> were the same as our method, as we could not assess the input genes of the former.</p>
</fn>
<fn id="Tfn3">
<label>
<sup>c</sup>
</label>
<p>The number of genes which distributed similarly, namely, filtered by supervised domain adaptation (<italic>p</italic> &#x3e; 0.05); Kolmogorov&#x2013;Smirnov test).</p>
</fn>
<fn id="Tfn4">
<label>
<sup>d</sup>
</label>
<p>The number of genes remained after removing genes with the lowest 20% variability in expression across all samples.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s2-3">
<title>Feature selection procedures</title>
<p>For a given drug <italic>d</italic>, let <inline-formula id="inf4">
<mml:math id="m4">
<mml:mrow>
<mml:mfenced open="{" close="}" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">X</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula> consist of <inline-formula id="inf5">
<mml:math id="m5">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">X</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi mathvariant="bold-italic">R</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> expression profiles of <inline-formula id="inf6">
<mml:math id="m6">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> genes of <inline-formula id="inf7">
<mml:math id="m7">
<mml:mrow>
<mml:msub>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> cell lines (PDXs or human tumors) and drug response values <inline-formula id="inf8">
<mml:math id="m8">
<mml:mrow>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mrow>
<mml:mfenced open="{" close="" separators="|">
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mfenced open="" close="}" separators="|">
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msub>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> for drug <italic>d</italic>. Here, we dichotomized the drug response of cell lines (IC<sub>50</sub>) into 0 (resistant) or 1 (sensitive) if a drug response is greater or less than or equal to its maximum drug concentration (given in the GDSC website), respectively (<xref ref-type="bibr" rid="B15">Iorio et al., 2016</xref>).</p>
<p>The proposed feature selection consisted of three procedures: 1) supervised DA (<xref ref-type="bibr" rid="B23">Mourragui et al., 2019</xref>), 2) differential expression between sensitive and resistant cells, and 3) the ratio of &#x201c;between-group to within-group sums of squares&#x201d; (the BW ratio) (<xref ref-type="bibr" rid="B8">Dudoit et al., 2002</xref>), where the two groups refer to sensitive cell lines and resistant cell lines for each drug. As cell lines (the training sets) are different from patients and mouse models in the test sets, we applied DA to sift genes whose conditional distributions given the label <italic>Y</italic> across domains were not significantly different. The intuition for feature selection procedures 2) and 3) is stated in Supplementary methods. In a pilot study, we also studied Logit (KNN) trained by genes and sifted by unsupervised DA in combination with the latter two proposed feature selection procedures. However, the predictors employing unsupervised DA performed in a manner considerably inferior to those employing supervised DA, given that the remaining procedures were kept the same. Thus, we used supervised DA in this study.</p>
<p>Specifically, for each drug and gene <italic>X</italic>, the Kolmogorov&#x2013;Smirnov (KS) test for equality of the conditional distribution of selected genes <bold>
<italic>X</italic>
</bold> given the label <italic>Y</italic> in the source and target domains, <inline-formula id="inf9">
<mml:math id="m9">
<mml:mrow>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>S</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">X</mml:mi>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>Y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf10">
<mml:math id="m10">
<mml:mrow>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>T</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">X</mml:mi>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>Y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, was conducted at <italic>P</italic> <inline-formula id="inf11">
<mml:math id="m11">
<mml:mrow>
<mml:mo>&#x2265;</mml:mo>
<mml:mi mathvariant="normal">&#x3b1;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula id="inf12">
<mml:math id="m12">
<mml:mrow>
<mml:mi mathvariant="normal">&#x3b1;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.6</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>0.7</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>0.8</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, and 0.9. It should be noted that DA used all the information about the responses of the samples in both domains. In this study, we used stringent cutoffs <italic>P</italic> <inline-formula id="inf13">
<mml:math id="m13">
<mml:mrow>
<mml:mo>&#x2265;</mml:mo>
<mml:mi mathvariant="normal">&#x3b1;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf14">
<mml:math id="m14">
<mml:mrow>
<mml:mi mathvariant="normal">&#x3b1;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.6</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>0.7</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
<italic>,</italic> 0.8, and 0.9, as the source (cell lines) and target (PDX/patients) domains are quite different. Let <inline-formula id="inf15">
<mml:math id="m15">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> denote <inline-formula id="inf16">
<mml:math id="m16">
<mml:mrow>
<mml:mi mathvariant="normal">g</mml:mi>
<mml:mi mathvariant="normal">e</mml:mi>
<mml:mi mathvariant="normal">n</mml:mi>
<mml:mi mathvariant="normal">e</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> It should be noted that the aforementioned KS test is equivalent to using the distance measure <inline-formula id="inf17">
<mml:math id="m17">
<mml:mrow>
<mml:mi>sup</mml:mi>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>S</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>Y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>T</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>Y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf18">
<mml:math id="m18">
<mml:mrow>
<mml:mi>sup</mml:mi>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>S</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>Y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>T</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>Y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula id="inf19">
<mml:math id="m19">
<mml:mrow>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>S</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>Y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi mathvariant="normal">a</mml:mi>
<mml:mi mathvariant="normal">n</mml:mi>
<mml:mi mathvariant="normal">d</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>T</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>Y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> denote the conditional distribution of <inline-formula id="inf20">
<mml:math id="m20">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> given <italic>Y</italic> in the source and target domains, respectively.</p>
<p>Given the features <inline-formula id="inf21">
<mml:math id="m21">
<mml:mrow>
<mml:mi mathvariant="bold-italic">X</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, we define <inline-formula id="inf22">
<mml:math id="m22">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>S</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>Y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf23">
<mml:math id="m23">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>T</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>Y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> to be the population proportion of responders in the source and target domains, respectively. Furthermore, let <inline-formula id="inf24">
<mml:math id="m24">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>P</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>S</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>Y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x7c;</mml:mo>
<mml:mi mathvariant="bold-italic">X</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf25">
<mml:math id="m25">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>P</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>T</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>Y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x7c;</mml:mo>
<mml:mi mathvariant="bold-italic">X</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> be the estimated proportions of responders given <bold>
<italic>X</italic>
</bold> in the source and target domains, respectively, and <inline-formula id="inf26">
<mml:math id="m26">
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>S</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>Y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mo>/</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>S</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>Y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>/</mml:mo>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>T</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>Y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mo>/</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>T</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>Y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> be the odds ratio of these two domains. Proposition 1 states that if the selected features satisfy the required DA condition and the odds ratio equals to 1, then the features are invariant across the source and target domains and <italic>vice versa</italic>.</p>
<p>
<statement content-type="proposition" id="Proposition_1">
<label>Proposition 1</label>
<p>assumed that the features <inline-formula id="inf27">
<mml:math id="m27">
<mml:mrow>
<mml:mi mathvariant="bold-italic">X</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> satisfy the DA condition and marginal conditional distributions of <inline-formula id="inf28">
<mml:math id="m28">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>Y</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> are independent for <italic>i &#x3d;1, &#x2026;, p</italic>. Then, <inline-formula id="inf29">
<mml:math id="m29">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>S</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>Y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x7c;</mml:mo>
<mml:mi mathvariant="bold-italic">X</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>T</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>Y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x7c;</mml:mo>
<mml:mi mathvariant="bold-italic">X</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> if and only if <italic>r</italic> &#x3d; 1.</p>
<p>The proof is given in Supplementary information.</p>
<p>Genes that passed the DA selection were then prioritized by their differential expression among all overlapping genes in sensitive cell lines <italic>versus</italic> resistant cell lines. To reveal explainable classifiers, we kept at most the top-ranked 1,000 genes with the smallest false discovery rate (FDR) values obtained from the two-sample t-test and sorted these genes by the BW ratio. The BW ratio for a gene <italic>j</italic> of the cell line <italic>i</italic> in group <italic>k</italic> is defined as follows:<disp-formula id="equ1">
<mml:math id="m30">
<mml:mrow>
<mml:mi mathvariant="bold">B</mml:mi>
<mml:mi mathvariant="bold">W</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold">j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:munder>
<mml:mstyle displaystyle="true">
<mml:mo>&#x2211;</mml:mo>
</mml:mstyle>
<mml:mi mathvariant="bold">i</mml:mi>
</mml:munder>
<mml:mrow>
<mml:munder>
<mml:mstyle displaystyle="true">
<mml:mo>&#x2211;</mml:mo>
</mml:mstyle>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:munder>
<mml:mrow>
<mml:mi mathvariant="bold-italic">I</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">y</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi mathvariant="bold-italic">k</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mo>.</mml:mo>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:munder>
<mml:mstyle displaystyle="true">
<mml:mo>&#x2211;</mml:mo>
</mml:mstyle>
<mml:mi mathvariant="bold">i</mml:mi>
</mml:munder>
<mml:mrow>
<mml:munder>
<mml:mstyle displaystyle="true">
<mml:mo>&#x2211;</mml:mo>
</mml:mstyle>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:munder>
<mml:mrow>
<mml:mi mathvariant="bold-italic">I</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">y</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mover accent="true">
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">k</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>where <inline-formula id="inf30">
<mml:math id="m31">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mo>.</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf31">
<mml:math id="m32">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> denote the average expression level of gene <italic>j</italic> across all cell lines and the cell lines of group <italic>k</italic> only, respectively.</p>
</statement>
</p>
</sec>
<sec id="s2-4">
<title>Training models using GDSC datasets</title>
<p>For fixed top-ranked <italic>p</italic> genes, where <italic>p</italic> ranged from 50 with step size 10 to 200 (denoted by 50 (10)200), 200 (20)400, and 400 (100)1,000) genes of the cell lines, we trained the hyperparameter &#x3bb; (the penalty constant of logit regression) and <italic>p</italic> using 5-fold CV with ten repeats. We used grid-search to tune the hyper-parameter as follows. First, let <inline-formula id="inf32">
<mml:math id="m33">
<mml:mrow>
<mml:mi mathvariant="normal">&#x3bb;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mn>10</mml:mn>
<mml:msub>
<mml:mi mathvariant="normal">a</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf33">
<mml:math id="m34">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">a</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>3</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> with step size 10<sup>0.3</sup>, i.e., we ran 5-fold CV of LogitDA with grid points 10<sup>&#x2013;3</sup>, 10<sup>&#x2013;2.7</sup>, &#x2026;, and 10<sup>0</sup> and found the grid point whose associated CV score was the maximum, which was termed <inline-formula id="inf34">
<mml:math id="m35">
<mml:mrow>
<mml:msup>
<mml:mn>10</mml:mn>
<mml:msub>
<mml:mi mathvariant="normal">a</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, e.g., <inline-formula id="inf35">
<mml:math id="m36">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">a</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>2.7</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>. Second, we further evaluated LogitDA with grid points in <inline-formula id="inf36">
<mml:math id="m37">
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:msup>
<mml:mn>10</mml:mn>
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">a</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>0.05</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mn>10</mml:mn>
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">a</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>0.05</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula> and step size 10<sup>0.01</sup>. The grid point <inline-formula id="inf37">
<mml:math id="m38">
<mml:mrow>
<mml:msup>
<mml:mn>10</mml:mn>
<mml:msub>
<mml:mi mathvariant="normal">a</mml:mi>
<mml:mi mathvariant="italic">max</mml:mi>
</mml:msub>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, whose corresponding CV score is the maximum, determines the tuned hyperparameter <inline-formula id="inf38">
<mml:math id="m39">
<mml:mrow>
<mml:mi mathvariant="normal">&#x3bb;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mn>10</mml:mn>
<mml:msub>
<mml:mi mathvariant="normal">a</mml:mi>
<mml:mi mathvariant="italic">max</mml:mi>
</mml:msub>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>. The logit model with the highest averaged CV AUC determined <italic>p</italic> and <inline-formula id="inf39">
<mml:math id="m40">
<mml:mrow>
<mml:mi mathvariant="normal">&#x3bb;</mml:mi>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> which yielded one LogitDA. Then, we applied the LogitDA to a test set to determine its prediction AUC. The aforementioned procedures were repeated ten times (with different seeds for CV) to obtain the mean and s.e. of the prediction AUC.</p>
<p>For the classifier KNN, we used the distance measure 1 &#x2212; rho, where rho is Spearman&#x2019;s rho between any two cell lines with selected <italic>p</italic> genes because the default Euclidean distance did not work well for the GED of any two cell lines in our pilot study. For each drug and fixed top-ranked <italic>p</italic> gene, where <italic>p</italic> &#x3d; 50 (10)200, 200 (20) 400, and 400 (100)1,000 genes of the cell lines, we first trained the hyperparameter K of KNN, via 5-fold CV with ten repeats and using the GED and drug response of cell lines, and computed the AUC in the cross-validation experiments. The hyperparameter K was determined by the experiment with the highest averaged CV score. We then fitted all data into this KNN classifier with each top-ranked <italic>p</italic> gene. Of all the top-<italic>p</italic>-ranked KNN classifiers trained, the one with the highest averaged CV score determined the value of <italic>p,</italic> which was one trained KNNDA predictor. We repeated the aforementioned procedures ten times to yield the mean and s.e. of the prediction AUC of KNNDA.</p>
</sec>
<sec id="s2-5">
<title>Adjustment of the probability cutoff</title>
<p>The following lemma and Proposition 2 established the theoretical foundation for adjusting the prediction probability cutoff when the drug response rates between cell lines and tumors differ.</p>
<p>Lemma. When <inline-formula id="inf40">
<mml:math id="m41">
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mo>&#x3e;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf41">
<mml:math id="m42">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>S</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>Y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x7c;</mml:mo>
<mml:mi mathvariant="bold-italic">X</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3e;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>T</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>Y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x7c;</mml:mo>
<mml:mi mathvariant="bold-italic">X</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, the prediction probability is overestimated. Similarly, when <inline-formula id="inf42">
<mml:math id="m43">
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf43">
<mml:math id="m44">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>S</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>Y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x7c;</mml:mo>
<mml:mi mathvariant="bold-italic">X</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3c;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>T</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>Y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x7c;</mml:mo>
<mml:mi mathvariant="bold-italic">X</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, the prediction probability is underestimated.</p>
<p>The proof is given in Supplementary information.</p>
<p>
<statement content-type="proposition" id="Proposition_2">
<label>Proposition 2</label>
<p>assumed that the predictors <inline-formula id="inf44">
<mml:math id="m45">
<mml:mrow>
<mml:mi mathvariant="bold-italic">X</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> satisfy the DA condition and marginal conditional distributions of <inline-formula id="inf45">
<mml:math id="m46">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>Y</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> are independent for <italic>i &#x3d;1, &#x2026;, p</italic>. When the odds ratio between the source and target domains is <inline-formula id="inf46">
<mml:math id="m47">
<mml:mrow>
<mml:mi mathvariant="normal">r</mml:mi>
<mml:mo>&#x2260;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, the cutoff of the prediction probability <inline-formula id="inf47">
<mml:math id="m48">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>S</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>Y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x7c;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">X</mml:mi>
<mml:mi>T</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> should be adjusted to <inline-formula id="inf48">
<mml:math id="m49">
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mo>/</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>The proof is given in Supplementary information.</p>
<p>As Proposition 2 suggests, the prediction probability cutoff will deviate from 0.5 when the population proportion of responders in the target and source domains differ. Due to a lack of information on the ratio of responders in the target domain (patients), we estimated it using test data directly in this study. However, a better estimate of the ratio can be obtained once more responses to these drugs are released. We note that Proposition 2 considers a continuous prediction probability function. It does not apply to the KNN classifier that makes a prediction on a test sample based on the majority voting for its K-nearest neighbors.</p>
</statement>
</p>
</sec>
<sec id="s2-6">
<title>External validation of the trained classifiers</title>
<p>Finally, we applied the trained predictors LogitDA with <italic>&#x3b1; &#x3d;</italic> 0.7 and KNNDA with <italic>&#x3b1; &#x3d;</italic> 0.7 to the ten external test sets. To compare with the results of the baseline ridge regression (<xref ref-type="bibr" rid="B11">Geeleher et al., 2014</xref>) and MOLI complete (<xref ref-type="bibr" rid="B29">Sharifi-Noghabi et al., 2019</xref>), we repeated the experiments ten times for estimating the s.e. of the predictors for each drug.</p>
</sec>
</sec>
<sec sec-type="results" id="s3">
<title>Results</title>
<sec id="s3-1">
<title>Experimental design</title>
<p>In this study, we aimed to investigate the following questions: Do logistic ridge regression and KNN with adequately selected features outperform a deep learning-based predictor, MOLI complete (<xref ref-type="bibr" rid="B29">Sharifi-Noghabi et al., 2019</xref>), in terms of prediction AUC on external test sets (PDX and patient data)? Do the proposed predictors, LogitDA and KNNDA, work well for targeted therapies and/or chemotherapies? Information about the training and test datasets of the ten studied drugs is provided in <xref ref-type="table" rid="T1">Table 1</xref>.</p>
<p>After the features (namely, genes in this study) were selected by the proposed procedures (see Methods for details), we trained logistic ridge regression (KNN) with 5-fold CV using the GED of the prioritized features of GDSC cell lines screened with seven drugs, which included docetaxel, erlotinib, sorafenib, cetuximab, gemcitabine, paclitaxel, and cisplatin, in a total of ten sets. These drugs were chosen because we planned to compare LogitDA and KNNDA to the baseline logistic ridge regression (<xref ref-type="bibr" rid="B11">Geeleher et al., 2014</xref>) and MOLI complete (<xref ref-type="bibr" rid="B29">Sharifi-Noghabi et al., 2019</xref>).</p>
</sec>
<sec id="s3-2">
<title>Training our predictors LogitDA and KNNDA</title>
<p>As the training set (GDSC cell lines) is quite different from the test sets (PDX and patient data) (<xref ref-type="bibr" rid="B29">Sharifi-Noghabi et al., 2019</xref>), the cutoff for domain adaptation should be strict. Nevertheless, this threshold should allow sufficient features to pass so that a classifier can be adequately trained; see Methods for details. Therefore, we trained LogitDA with features (<inline-formula id="inf49">
<mml:math id="m50">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>) that passed the KS test for equality of <inline-formula id="inf50">
<mml:math id="m51">
<mml:mrow>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>S</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>Y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf51">
<mml:math id="m52">
<mml:mrow>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>T</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x7c;</mml:mo>
<mml:mi>Y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> with <italic>P</italic> <inline-formula id="inf52">
<mml:math id="m53">
<mml:mrow>
<mml:mo>&#x2265;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> <inline-formula id="inf53">
<mml:math id="m54">
<mml:mrow>
<mml:mi mathvariant="normal">&#x3b1;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, where <italic>&#x3b1;</italic> &#x3d; 0.6, 0.7, 0.8, and 0.9, and we denote the resulting predictor as LogitDA<sub>_&#x3b1;</sub>, where <inline-formula id="inf54">
<mml:math id="m55">
<mml:mrow>
<mml:mi mathvariant="normal">&#x3b1;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> <italic>&#x3d;</italic> 0.6<italic>,</italic> 0.7<italic>,</italic>0.8<italic>,</italic> and 0.9.</p>
<p>
<xref ref-type="sec" rid="s10">Supplementary Table S1</xref> shows that the CV scores of LogitDA<sub>_0.6</sub> were equivalent to those of LogitDA<sub>_0.7</sub> for the ten drugs. However, to satisfy the DA condition required by Proposition 1, namely, the marginal conditional distribution of selected genes <bold>
<italic>X</italic>
</bold> given the label <italic>Y</italic> in both domains is equal, the value of <italic>&#x3b1;</italic> should be large, so we chose LogitDA<sub>_0.7</sub>. Proposition 1 shows that features that satisfy the conditions will be domain-invariant. That is, if the features perform well in the source domain, they will also perform well in the target domain.</p>
<p>Moreover, we observed that each averaged CV score of LogitDA<sub>_0.7</sub> was higher than that of LogitDA<sub>_0.8</sub> and LogitDA<sub>_0.9</sub>, except that they had the same CV score for cetuximab (PDX). Thus, among the LogitDA predictors, we suggest using LogitDA<sub>_0.7</sub> for the prediction of the drug response of patients/PDX and denote it by LogitDA, henceforth, for simplicity; for details, we refer to <xref ref-type="table" rid="T2">Table 2</xref>.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Cross-validation result of LogitDA<sub>_&#x3b1;</sub> with various cutoffs of the KS test.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th rowspan="2" align="right">Method</th>
<th colspan="3" align="center">LogitDA<sub>_0.70</sub>
</th>
<th colspan="3" align="center">LogitDA<sub>_0.80</sub>
</th>
<th colspan="3" align="center">LogitDA<sub>_0.90</sub>
</th>
</tr>
<tr>
<th align="center">
<italic>p</italic>
<xref ref-type="table-fn" rid="Tfn5">
<sup>a</sup>
</xref>
</th>
<th rowspan="2" align="center">
<inline-formula id="inf55">
<mml:math id="m56">
<mml:mrow>
<mml:mi mathvariant="normal">&#x3bb;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th rowspan="2" align="center">CV score</th>
<th align="center">
<italic>p</italic>
</th>
<th rowspan="2" align="center">
<inline-formula id="inf56">
<mml:math id="m57">
<mml:mrow>
<mml:mi mathvariant="normal">&#x3bb;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th rowspan="2" align="center">CV score</th>
<th align="center">
<italic>p</italic>
</th>
<th rowspan="2" align="center">
<inline-formula id="inf57">
<mml:math id="m58">
<mml:mrow>
<mml:mi mathvariant="normal">&#x3bb;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th rowspan="2" align="center">CV score</th>
</tr>
<tr>
<th align="left">Drug (test set)</th>
<th align="center">Genes<xref ref-type="table-fn" rid="Tfn6">
<sup>b</sup>
</xref>
</th>
<th align="center">Genes</th>
<th align="center">Genes</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="2" align="left">Docetaxel (GSE6434) n &#x3d; 24</td>
<td align="center">170</td>
<td rowspan="2" align="center">0.447</td>
<td rowspan="2" align="center">0.76</td>
<td align="center">170</td>
<td rowspan="2" align="center">0.242</td>
<td rowspan="2" align="center">0.75</td>
<td align="center">50</td>
<td rowspan="2" align="center">0.424</td>
<td rowspan="2" align="center">0.660</td>
</tr>
<tr>
<td align="center">437</td>
<td align="center">223</td>
<td align="center">59</td>
</tr>
<tr>
<td rowspan="2" align="left">Erlotinib (GSE30072) n &#x3d; 25</td>
<td align="center">50</td>
<td rowspan="2" align="center">1.039</td>
<td rowspan="2" align="center">0.80</td>
<td align="center">100</td>
<td rowspan="2" align="center">0.962</td>
<td rowspan="2" align="center">0.78</td>
<td align="center">50</td>
<td rowspan="2" align="center">1.122</td>
<td rowspan="2" align="center">0.677</td>
</tr>
<tr>
<td align="center">860</td>
<td align="center">391</td>
<td align="center">90</td>
</tr>
<tr>
<td rowspan="2" align="left">Sorafenib (GSE30072) n &#x3d; 37</td>
<td align="center">110</td>
<td rowspan="2" align="center">1.122</td>
<td rowspan="2" align="center">0.68</td>
<td align="center">90</td>
<td rowspan="2" align="center">1.122</td>
<td rowspan="2" align="center">0.67</td>
<td align="center">50</td>
<td rowspan="2" align="center">1.122</td>
<td rowspan="2" align="center">0.633</td>
</tr>
<tr>
<td align="center">1,000</td>
<td align="center">752</td>
<td align="center">207</td>
</tr>
<tr>
<td rowspan="2" align="left">Cetuximab (PDX) n &#x3d; 60</td>
<td align="center">130</td>
<td rowspan="2" align="center">0.019</td>
<td rowspan="2" align="center">0.86</td>
<td align="center">150</td>
<td rowspan="2" align="center">0.041</td>
<td rowspan="2" align="center">0.84</td>
<td align="center">50</td>
<td rowspan="2" align="center">0.048</td>
<td rowspan="2" align="center">0.802</td>
</tr>
<tr>
<td align="center">827</td>
<td align="center">440</td>
<td align="center">157</td>
</tr>
<tr>
<td rowspan="2" align="left">Erlotinib (PDX) n &#x3d; 21</td>
<td align="center">50</td>
<td rowspan="2" align="center">0.495</td>
<td rowspan="2" align="center">0.85</td>
<td align="center">50</td>
<td rowspan="2" align="center">0.521</td>
<td rowspan="2" align="center">0.82</td>
<td align="center">50</td>
<td rowspan="2" align="center">1.122</td>
<td rowspan="2" align="center">0.786</td>
</tr>
<tr>
<td align="center">877</td>
<td align="center">499</td>
<td align="center">163</td>
</tr>
<tr>
<td rowspan="2" align="left">Gemcitabine (PDX) n &#x3d; 25</td>
<td align="center">100</td>
<td rowspan="2" align="center">0.414</td>
<td rowspan="2" align="center">0.71</td>
<td align="center">70</td>
<td rowspan="2" align="center">0.521</td>
<td rowspan="2" align="center">0.69</td>
<td align="center">50</td>
<td rowspan="2" align="center">0.242</td>
<td rowspan="2" align="center">0.666</td>
</tr>
<tr>
<td align="center">1,000</td>
<td align="center">541</td>
<td align="center">151</td>
</tr>
<tr>
<td rowspan="2" align="left">Paclitaxel (PDX) n &#x3d; 43</td>
<td align="center">50</td>
<td rowspan="2" align="center">0.221</td>
<td rowspan="2" align="center">0.69</td>
<td align="center">50</td>
<td rowspan="2" align="center">1.122</td>
<td rowspan="2" align="center">0.64</td>
<td align="center">50</td>
<td rowspan="2" align="center">1.039</td>
<td rowspan="2" align="center">0.493</td>
</tr>
<tr>
<td align="center">643</td>
<td align="center">311</td>
<td align="center">78</td>
</tr>
<tr>
<td rowspan="2" align="left">Cisplatin (TCGA) n &#x3d; 66</td>
<td align="center">130</td>
<td rowspan="2" align="center">0.224</td>
<td rowspan="2" align="center">0.71</td>
<td align="center">110</td>
<td rowspan="2" align="center">0.192</td>
<td rowspan="2" align="center">0.67</td>
<td align="center">50</td>
<td rowspan="2" align="center">1.066</td>
<td rowspan="2" align="center">0.598</td>
</tr>
<tr>
<td align="center">628</td>
<td align="center">293</td>
<td align="center">79</td>
</tr>
<tr>
<td rowspan="2" align="left">Docetaxel (TCGA) n &#x3d; 16</td>
<td align="center">650</td>
<td rowspan="2" align="center">0.521</td>
<td rowspan="2" align="center">0.79</td>
<td align="center">240</td>
<td rowspan="2" align="center">0.424</td>
<td rowspan="2" align="center">0.77</td>
<td align="center">80</td>
<td rowspan="2" align="center">0.236</td>
<td rowspan="2" align="center">0.741</td>
</tr>
<tr>
<td align="center">1,000</td>
<td align="center">691</td>
<td align="center">193</td>
</tr>
<tr>
<td rowspan="2" align="left">Gemcitabine (TCGA) n &#x3d; 57</td>
<td align="center">140</td>
<td rowspan="2" align="center">0.447</td>
<td rowspan="2" align="center">0.74</td>
<td align="center">70</td>
<td rowspan="2" align="center">0.192</td>
<td rowspan="2" align="center">0.72</td>
<td align="center">50</td>
<td rowspan="2" align="center">0.414</td>
<td rowspan="2" align="center">0.659</td>
</tr>
<tr>
<td align="center">841</td>
<td align="center">399</td>
<td align="center">94</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="Tfn5">
<label>
<sup>a</sup>
</label>
<p>
<italic>p</italic> denotes the top-<italic>p</italic> genes sifted by the feature selection procedures.</p>
</fn>
<fn id="Tfn6">
<label>
<sup>b</sup>
</label>
<p>Genes denote the number of genes that passed DA screening across the training and test domains for each drug.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>Taking the training result of LigitDA into account, we trained the non-linear KNNDA with <italic>&#x3b1;</italic> &#x3d; 0.7, 0.8, and 0.9 for the KS test and summarized the 5-fold CV result in <xref ref-type="table" rid="T3">Table 3</xref>. KNNDA<sub>_0.7</sub> performed better than KNNDA<sub>_0.8</sub>, as the former had higher (lower) averaged CV scores for five (two) drugs than the latter; the differences ranged from 1% to 4%. Moreover, KNND<sub>_0.8</sub> outperformed KNNDA<sub>_0.9</sub> in terms of higher averaged CV scores for nine of the ten drugs. Thus, we suggest using KNNDA<sub>_0.7</sub> among these non-linear predictors for the test sets. For simplicity, we denote KNNDA<sub>_0.7</sub> by KNNDA henceforth.</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>CV result of KNNDA<sub>_&#x3b1;</sub> with various cutoffs of the KS test.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th rowspan="2" align="left">Method</th>
<th colspan="3" align="center">KNNDA_<sub>0.70</sub>
</th>
<th colspan="3" align="center">KNNDA_<sub>0.80</sub>
</th>
<th colspan="3" align="center">KNNDA_<sub>0.90</sub>
</th>
</tr>
<tr>
<th align="center">
<italic>p</italic>
<xref ref-type="table-fn" rid="Tfn7">
<sup>a</sup>
</xref>
</th>
<th rowspan="2" align="center">Best K</th>
<th rowspan="2" align="center">CV score</th>
<th align="center">
<italic>p</italic>
</th>
<th rowspan="2" align="center">Best K</th>
<th rowspan="2" align="center">CV score</th>
<th align="center">
<italic>p</italic>
</th>
<th rowspan="2" align="center">Best K</th>
<th rowspan="2" align="center">CV score</th>
</tr>
<tr>
<th align="left">Drug</th>
<th align="center">Genes<xref ref-type="table-fn" rid="Tfn8">
<sup>b</sup>
</xref>
</th>
<th align="center">Genes</th>
<th align="center">Genes</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="2" align="left">Docetaxel (GSE6434) n &#x3d; 24</td>
<td align="center">110</td>
<td rowspan="2" align="center">23</td>
<td rowspan="2" align="center">0.76</td>
<td align="center">90</td>
<td rowspan="2" align="center">21</td>
<td rowspan="2" align="center">0.74</td>
<td align="center">59</td>
<td rowspan="2" align="center">19</td>
<td rowspan="2" align="center">0.69</td>
</tr>
<tr>
<td align="center">437</td>
<td align="center">223</td>
<td align="center">59</td>
</tr>
<tr>
<td rowspan="2" align="left">Erlotinib (GSE30072) n &#x3d; 25</td>
<td align="center">220</td>
<td rowspan="2" align="center">9</td>
<td rowspan="2" align="center">0.80</td>
<td align="center">140</td>
<td rowspan="2" align="center">15</td>
<td rowspan="2" align="center">0.78</td>
<td align="center">50</td>
<td rowspan="2" align="center">15</td>
<td rowspan="2" align="center">0.72</td>
</tr>
<tr>
<td align="center">860</td>
<td align="center">391</td>
<td align="center">90</td>
</tr>
<tr>
<td rowspan="2" align="left">Sorafenib (GSE30072) n &#x3d; 37</td>
<td align="center">120</td>
<td rowspan="2" align="center">17</td>
<td rowspan="2" align="center">0.64</td>
<td align="center">80</td>
<td rowspan="2" align="center">19</td>
<td rowspan="2" align="center">0.65</td>
<td align="center">150</td>
<td rowspan="2" align="center">19</td>
<td rowspan="2" align="center">0.60</td>
</tr>
<tr>
<td align="center">1,000</td>
<td align="center">752</td>
<td align="center">207</td>
</tr>
<tr>
<td rowspan="2" align="left">Cetuximab (PDX) n &#x3d; 60</td>
<td align="center">110</td>
<td rowspan="2" align="center">23</td>
<td rowspan="2" align="center">0.80</td>
<td align="center">50</td>
<td rowspan="2" align="center">29</td>
<td rowspan="2" align="center">0.76</td>
<td align="center">50</td>
<td rowspan="2" align="center">29</td>
<td rowspan="2" align="center">0.76</td>
</tr>
<tr>
<td align="center">827</td>
<td align="center">440</td>
<td align="center">157</td>
</tr>
<tr>
<td rowspan="2" align="left">Erlotinib (PDX) n &#x3d; 21</td>
<td align="center">60</td>
<td rowspan="2" align="center">19</td>
<td rowspan="2" align="center">0.83</td>
<td align="center">200</td>
<td rowspan="2" align="center">19</td>
<td rowspan="2" align="center">0.83</td>
<td align="center">110</td>
<td rowspan="2" align="center">17</td>
<td rowspan="2" align="center">0.81</td>
</tr>
<tr>
<td align="center">877</td>
<td align="center">499</td>
<td align="center">163</td>
</tr>
<tr>
<td rowspan="2" align="left">Gemcitabine (PDX) n &#x3d; 25</td>
<td align="center">240</td>
<td rowspan="2" align="center">17</td>
<td rowspan="2" align="center">0.69</td>
<td align="center">190</td>
<td rowspan="2" align="center">27</td>
<td rowspan="2" align="center">0.68</td>
<td align="center">100</td>
<td rowspan="2" align="center">29</td>
<td rowspan="2" align="center">0.66</td>
</tr>
<tr>
<td align="center">1,000</td>
<td align="center">541</td>
<td align="center">151</td>
</tr>
<tr>
<td rowspan="2" align="left">Paclitaxel (PDX) n &#x3d; 43</td>
<td align="center">100</td>
<td rowspan="2" align="center">9</td>
<td rowspan="2" align="center">0.65</td>
<td align="center">80</td>
<td rowspan="2" align="center">9</td>
<td rowspan="2" align="center">0.63</td>
<td align="center">60</td>
<td rowspan="2" align="center">7</td>
<td rowspan="2" align="center">0.57</td>
</tr>
<tr>
<td align="center">643</td>
<td align="center">311</td>
<td align="center">78</td>
</tr>
<tr>
<td rowspan="2" align="left">Cisplatin (TCGA) n &#x3d; 66</td>
<td align="center">190</td>
<td rowspan="2" align="center">29</td>
<td rowspan="2" align="center">0.66</td>
<td align="center">100</td>
<td rowspan="2" align="center">25</td>
<td rowspan="2" align="center">0.66</td>
<td align="center">79</td>
<td rowspan="2" align="center">19</td>
<td rowspan="2" align="center">0.59</td>
</tr>
<tr>
<td align="center">628</td>
<td align="center">293</td>
<td align="center">79</td>
</tr>
<tr>
<td rowspan="2" align="left">Docetaxel (TCGA) n &#x3d; 16</td>
<td align="center">200</td>
<td rowspan="2" align="center">25</td>
<td rowspan="2" align="center">0.75</td>
<td align="center">90</td>
<td rowspan="2" align="center">19</td>
<td rowspan="2" align="center">0.76</td>
<td align="center">60</td>
<td rowspan="2" align="center">29</td>
<td rowspan="2" align="center">0.74</td>
</tr>
<tr>
<td align="center">1,000</td>
<td align="center">691</td>
<td align="center">193</td>
</tr>
<tr>
<td rowspan="2" align="left">Gemcitabine (TCGA) n &#x3d; 57</td>
<td align="center">180</td>
<td rowspan="2" align="center">29</td>
<td rowspan="2" align="center">0.67</td>
<td align="center">70</td>
<td rowspan="2" align="center">29</td>
<td rowspan="2" align="center">0.67</td>
<td align="center">50</td>
<td rowspan="2" align="center">27</td>
<td rowspan="2" align="center">0.63</td>
</tr>
<tr>
<td align="center">841</td>
<td align="center">399</td>
<td align="center">94</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="Tfn7">
<label>
<sup>a</sup>
</label>
<p>
<italic>p</italic> denotes the top-<italic>p</italic> genes sifted by the feature selection procedures.</p>
</fn>
<fn id="Tfn8">
<label>
<sup>b</sup>
</label>
<p>Genes denotes the number of genes that passed DA screening across the training and test domains for each drug.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3-3">
<title>LogitDA and KNNDA predict well for the ten drugs</title>
<p>Next, <xref ref-type="table" rid="T4">Table 4</xref> and <xref ref-type="fig" rid="F2">Figure 2</xref> report the prediction AUC of LogitDA and KNNDA for the ten test sets. The predictor LogitDA achieved a prediction AUC &#x3e;0.8 for five drugs and predicted AUCs of 0.71 and 0.70 for docetaxel and sorafenib, respectively. In particular, LogitDA using the top-ranked 50, 130, 50, 100, and 650 genes resulted in prediction AUCs of 0.94, 0.93, 1.00, 0.83, and 0.81 for erlotinib, cetuximab (PDX), erlotinib (PDX), gemcitabine (PDX), and docetaxel (TCGA), respectively. This result shows that LogitDA may be useful for precision oncology, especially for the targeted therapies erlotinib and cetuximab.</p>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>Performance of LogitDA and KNNDA compared to the other methods in terms of prediction AUC across four targeted therapies and six chemotherapies.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Method</th>
<th rowspan="2" align="center">
<xref ref-type="bibr" rid="B11">Geeleher et al. (2014)</xref>
</th>
<th rowspan="2" align="center">MOLI complete (expression data)</th>
<th rowspan="2" align="center">MOLI complete (multi omics data)</th>
<th rowspan="2" align="center">LogitDA</th>
<th rowspan="2" align="center">KNNDA</th>
</tr>
<tr>
<th align="left">Drug (test dataset)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Docetaxel (GSE6434)</td>
<td align="center">0.74<xref ref-type="table-fn" rid="Tfn9">
<sup>a</sup>
</xref>
</td>
<td align="center">0.31<xref ref-type="table-fn" rid="Tfn9">
<sup>a</sup>
</xref>
</td>
<td align="center">X</td>
<td align="center">0.76 &#xb1; 0.019</td>
<td align="center">
<bold>0.87</bold>
<xref ref-type="table-fn" rid="Tfn10">
<sup>b</sup>
</xref> &#xb1; 0.010</td>
</tr>
<tr>
<td align="left">Erlotinib (GSE33072)</td>
<td align="center">0.60</td>
<td align="center">0.73</td>
<td align="center">X</td>
<td align="center">
<bold>0.94</bold> &#xb1; 0.004</td>
<td align="center">0.90 &#xb1; 0.004</td>
</tr>
<tr>
<td align="left">Sorafenib (GSE33072)</td>
<td align="center">0.45</td>
<td align="center">0.65</td>
<td align="center">X</td>
<td align="center">
<bold>0.70</bold> &#xb1; 0.003</td>
<td align="center">
<bold>0.71</bold> &#xb1; 0.044</td>
</tr>
<tr>
<td align="left">Cetuximab (PDX)</td>
<td align="center">0.58</td>
<td align="center">0.51</td>
<td align="center">0.53</td>
<td align="center">0.93 &#xb1; 0.006</td>
<td align="center">
<bold>0.95</bold> &#xb1; 0.018</td>
</tr>
<tr>
<td align="left">Erlotinib (PDX)</td>
<td align="center">0.67</td>
<td align="center">0.39</td>
<td align="center">0.63</td>
<td align="center">
<bold>1.00</bold> &#xb1; 0.000</td>
<td align="center">
<bold>1.00</bold> &#xb1; 0.000</td>
</tr>
<tr>
<td align="left">Gemcitabine (PDX)</td>
<td align="center">0.59</td>
<td align="center">0.52</td>
<td align="center">0.64</td>
<td align="center">
<bold>0.83</bold> &#xb1; 0.015</td>
<td align="center">0.62 &#xb1; 0.006</td>
</tr>
<tr>
<td align="left">Paclitaxel (PDX)</td>
<td align="center">0.52</td>
<td align="center">0.69</td>
<td align="center">
<bold>0.74</bold>
</td>
<td align="center">0.68 &#xb1; 0.022</td>
<td align="center">0.65 &#xb1; 0.073</td>
</tr>
<tr>
<td align="left">Cisplatin (TCGA)</td>
<td align="center">0.62</td>
<td align="center">
<bold>0.75</bold>
</td>
<td align="center">0.66</td>
<td align="center">0.62 &#xb1; 0.012</td>
<td align="center">0.67 &#xb1; 0.028</td>
</tr>
<tr>
<td align="left">Docetaxel (TCGA)</td>
<td align="center">0.59</td>
<td align="center">0.63</td>
<td align="center">0.58</td>
<td align="center">
<bold>0.81</bold> &#xb1; 0.005</td>
<td align="center">0.77 &#xb1; 0.041</td>
</tr>
<tr>
<td align="left">Gemcitabine (TCGA)</td>
<td align="center">0.53</td>
<td align="center">0.64</td>
<td align="center">0.65</td>
<td align="center">0.62 &#xb1; 0.004</td>
<td align="center">
<bold>0.68</bold> &#xb1; 0.031</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="Tfn9">
<label>
<sup>a</sup>
</label>
<p>The initial input genes of the work of <xref ref-type="bibr" rid="B11">Geeleher et al. (2014)</xref> and MOLI complete were the same as those of LogitDA and KNNDA, as we could not assess the input genes of the work of <xref ref-type="bibr" rid="B11">Geeleher et al. (2014)</xref>. The parameters of MOLI complete were optimized using the training data.</p>
</fn>
<fn id="Tfn10">
<label>
<sup>b</sup>
</label>
<p>The bold-faced values indicate the highest prediction AUC among the five methods for a drug.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Performance of LogitDA and KNNDA compared to other methods in terms of the prediction AUC for the ten drugs.</p>
</caption>
<graphic xlink:href="fgene-14-1217414-g002.tif"/>
</fig>
<p>Of the ten drugs, the predictor KNNDA achieved a prediction AUC &#x3e;0.8 for four drugs. Specifically, KNNDA using the top-ranked 110, 220, 110, and 60 genes resulted in prediction AUCs of 0.87, 0.90, 0.95, and 1.00 for docetaxel, erlotinib, cetuximab (PDX), and erlotinib (PDX), respectively. This result shows that KNNDA may also be useful for precision oncology.</p>
<p>We further compared LogitDA to KNNDA. Of the ten drugs, LogitDA had a significantly higher (21% higher) (11% lower) prediction AUC compared to KNNDA for gemcitabine (PDX) (docetaxel) and performed equivalent to KNNDA for the remaining eight drugs. Thus, these predictors performed equivalently; we refer to <xref ref-type="table" rid="T4">Table 4</xref> for details.</p>
</sec>
<sec id="s3-4">
<title>Our predictors outperform the deep learning-based MOLI</title>
<p>As shown in <xref ref-type="table" rid="T4">Table 4</xref> and <xref ref-type="fig" rid="F2">Figure 2</xref>, the predictors LogitDA and KNNDA performed significantly better (16%&#x2013;35% and 13%&#x2013;37% higher prediction AUC) than the baseline logistic ridge regression model (<xref ref-type="bibr" rid="B11">Geeleher et al., 2014</xref>) for nine and ten out of the ten drugs, respectively.</p>
<p>Next, we compared LogitDA and KNNDA to the deep neural network (DNN)-based method MOLI (<xref ref-type="bibr" rid="B29">Sharifi-Noghabi et al., 2019</xref>), which outperformed DNNs with early integration, with 5-fold CV and 10 repeats. Of the ten drugs in <xref ref-type="table" rid="T4">Table 4</xref>, LogitDA and KNNDA outperformed MOLI complete (expression data) for seven and eight drugs, respectively. In particular, LogitDA and KNNDA had 31%&#x2013;61% and 44%&#x2013;61% higher prediction AUCs compared to MOLI complete for docetaxel, cetuximab (PDX), erlotinib (PDX), and gemcitabine (PDX). Furthermore, LogitDA and KNNDA also had significantly higher (18%&#x2013;21% and 14%&#x2013;17%) prediction AUCs compared to MOLI complete for erlotinib and docetaxel (TCGA). LogitDA and KNNDA only performed significantly worse than MOLI complete (13% and 8% lower prediction AUC) for cisplatin (TCGA).</p>
<p>The prediction AUC of LogitDA for both cisplatin and gemcitabine (TCGA) was only 62%, which may be because the ratio of sensitive (responders) <italic>versus</italic> resistant (non-responders) samples is reversed from the training to the test sets (from about 1:2 to 10:1 for cisplatin); in other words, our assumption that the ratio of sensitive to resistant samples in both domains is equal was not met.</p>
<p>EGFR expression has been used as a biomarker to treat colorectal cancer (CRC) patients with wild-type KRAS in the US (patients with metastatic CRC and HNSCC in the EU). However, EGFR expression does not predict a response to cetuximab (<xref ref-type="bibr" rid="B21">Messersmith and Ahnen, 2008</xref>). The high prediction AUC of LogitDA for cetuximab (PDX) suggests that the fitted 130 genes may be promising for selecting KRAS wild-type patients with CRC for cetuximab, provided more test sets are validated.</p>
<p>Furthermore, we compared LogitDA (KNNDA) to MOLI complete (multi-omics data). LogitDA (KNNDA) has a significantly higher prediction AUC 19%&#x2013;40% (19%&#x2013;42%) in the test set of cetuximab (PDX), erlotinib (PDX), gemcitabine (PDX, LogitDA only), and docetaxel (TCGA) and performed equivalent (&#x3c;6% differences of test AUC) to MOLI complete (multi-omics data) for the remaining drugs, except that KNNDA had 9% less test AUC for paclitaxel; please refer to <xref ref-type="table" rid="T4">Table 4</xref> for details.</p>
<p>For targeted therapies such as erlotinib and cetuximab, Sharifi-Noghabi and colleagues further trained MOLI on multi-omics data of five drugs targeting the EGFR pathway (MOLI complete (pan-drug)), which consisted of &#x3e;3,000 samples. It is interesting that LogitDA (KNNDA) (using merely hundreds of samples) outperformed MOLI (pan-drug) for erlotinib (PDX) and cetuximab (PDX) with 28% and 13% (28% and 15%) higher prediction AUCs, respectively.</p>
<p>In addition to the high prediction AUC for the aforementioned drugs, our approach also has the advantages of being interpretable and using much fewer (50&#x2013;650) genes that are interpretable in comparison with the baseline logistic ridge regression and MOLI, which used more than 12,000 genes, except for docetaxel, for which &#x223c;6,370 were used, as shown in <xref ref-type="table" rid="T1">Table 1</xref>. The use of much fewer genes (features) and hyperparameters may prevent LogitDA and KNNDA from overfitting problems.</p>
</sec>
<sec id="s3-5">
<title>Prediction accuracy of LogitDA for the ten drugs</title>
<p>For some of the ten drugs whose odds ratios of the source and target domains <inline-formula id="inf58">
<mml:math id="m59">
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>S</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>Y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mo>/</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>S</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>Y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>/</mml:mo>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>T</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>Y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:mo>/</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>T</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>Y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> deviate much from 1 (<xref ref-type="sec" rid="s10">Supplementary Table S2A</xref>), Proposition 2 shows that their cutoff of the predicted probability should be adjusted to r/(r&#x2b;1) to account for the differences of the ratios across domains. Therefore, we adjusted the cutoffs accordingly and obtained the prediction accuracy of the ten drugs in <xref ref-type="table" rid="T5">Table 5</xref>. Notably, for seven of the ten drugs, the resulting prediction accuracy is greater than or equal to 0.70. In particular, for 25 tumors treated with erlotinib, LogitDA achieved a prediction accuracy of 0.76, and its prediction accuracy increased to 0.85 if we focused on the 20 <italic>EGFR</italic> and <italic>KRAS</italic> wild-type patients with NSCLC; LogitDA correctly predicted all 12 resistant tumors and five of eight tumors sensitive to erlotinib(<xref ref-type="sec" rid="s10">Supplementary Table S2B</xref>). To the best of our knowledge, to date, there is no efficient biomarker to predict the response to erlotinib of such patients (<xref ref-type="bibr" rid="B11">Geeleher et al., 2014</xref>), who were estimated to represent &#x223c;30% of Caucasian patients with lung adenocarcinoma (<xref ref-type="bibr" rid="B35">Wang M. et al., 2021</xref>).</p>
<table-wrap id="T5" position="float">
<label>TABLE 5</label>
<caption>
<p>Prediction accuracy of LogitDA with the adjusted cutoffs of the ten drugs.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Drug (resource)</th>
<th align="center">n<xref ref-type="table-fn" rid="Tfn11">
<sup>a</sup>
</xref>
</th>
<th align="center">Cutoff</th>
<th align="center">Prediction accuracy</th>
<th align="center">False positive rate</th>
<th align="center">False negative rate</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Docetaxel (GSE6434)</td>
<td align="center">24</td>
<td align="center">0.73</td>
<td align="center">0.62</td>
<td align="center">0.36</td>
<td align="center">0.40 (4/10)</td>
</tr>
<tr>
<td align="left">Erlotinib (GSE33072)</td>
<td align="center">25</td>
<td align="center">0.09</td>
<td align="center">0.76</td>
<td align="center">0.00</td>
<td align="center">0.55 (6/11)</td>
</tr>
<tr>
<td align="left">Sorafenib (GSE33072)</td>
<td align="center">37</td>
<td align="center">0.24</td>
<td align="center">0.62</td>
<td align="center">0.63<xref ref-type="table-fn" rid="Tfn12">
<sup>b</sup>
</xref>
</td>
<td align="center">0.19 (4/21)</td>
</tr>
<tr>
<td align="left">Cetuximab (PDX)</td>
<td align="center">60</td>
<td align="center">0.34</td>
<td align="center">0.93</td>
<td align="center">0.00</td>
<td align="center">0.80 (4/5)</td>
</tr>
<tr>
<td align="left">Erlotinib (PDX)</td>
<td align="center">21</td>
<td align="center">0.33</td>
<td align="center">0.86</td>
<td align="center">0.00</td>
<td align="center">1.00 (3/3)</td>
</tr>
<tr>
<td align="left">Gemcitabine (PDX)</td>
<td align="center">25</td>
<td align="center">0.90</td>
<td align="center">0.72</td>
<td align="center">0.06</td>
<td align="center">0.86 (6/7)</td>
</tr>
<tr>
<td align="left">Paclitaxel (PDX)</td>
<td align="center">43</td>
<td align="center">0.95</td>
<td align="center">0.88</td>
<td align="center">0.00</td>
<td align="center">1.00 (5/5)</td>
</tr>
<tr>
<td align="left">Cisplatin (TCGA)</td>
<td align="center">66</td>
<td align="center">0.05</td>
<td align="center">0.88</td>
<td align="center">0.83<xref ref-type="table-fn" rid="Tfn12">
<sup>b</sup>
</xref>
</td>
<td align="center">0.05 (3/60)</td>
</tr>
<tr>
<td align="left">Docetaxel (TCGA)</td>
<td align="center">16</td>
<td align="center">0.66</td>
<td align="center">0.69</td>
<td align="center">0.38</td>
<td align="center">0.25 (2/8)</td>
</tr>
<tr>
<td align="left">Gemcitabine (TCGA)</td>
<td align="center">57</td>
<td align="center">0.86</td>
<td align="center">0.70</td>
<td align="center">0.17</td>
<td align="center">0.52 (11/21)</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="Tfn11">
<label>
<sup>a</sup>
</label>
<p>n denotes the sample size.</p>
</fn>
<fn id="Tfn12">
<label>
<sup>b</sup>
</label>
<p>The false positive rate of sorafenib (GSE33072) and cisplatin (TCGA) are 10/16 and 5/6, respectively.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3-6">
<title>Ablation study</title>
<p>In a pilot study, we found that DA considerably improved the prediction AUC of logistic ridge regression (KNN) combined with feature selection using DE genes and BW ratio. Thus, it was of interest to evaluate the contribution of DA. We trained logistic ridge regression and KNN with the features selected by the two aforementioned feature selections (denoted as LogitDA-DA and KNNDA-DA, respectively) for the ten drugs. <xref ref-type="sec" rid="s10">Supplementary Table S3</xref> shows that LogitDA-DA (KNNDA-DA) used a few hundred genes to achieve equivalent test AUCs as the baseline logit model trained by more than 5,100&#x2013;13,400 features for the ten drugs (<xref ref-type="fig" rid="F2">Figure 2</xref>). <xref ref-type="table" rid="T6">Table 6</xref> shows that DA increases the averaged prediction AUC of LogitDA (KNNDA) from 0.55 to 0.79 (0.57&#x2013;0.78) over LogitDA-DA (KNNDA-DA), where the averaged prediction AUC was averaged over the ten drugs; the improvements are quite significant.</p>
<table-wrap id="T6" position="float">
<label>TABLE 6</label>
<caption>
<p>Ablation study of the proposed predictors with DA <italic>versus</italic> without DA.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Experimental setting</th>
<th align="center">Averaged prediction AUC (s.e.)<xref ref-type="table-fn" rid="Tfn13">
<sup>a</sup>
</xref>
</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">LogitDA</td>
<td align="center">0.79 (0.14)</td>
</tr>
<tr>
<td align="left">LogitDA-DA</td>
<td align="center">0.55 (0.11)</td>
</tr>
<tr>
<td align="left">KNNDA</td>
<td align="center">0.78 (0.14)</td>
</tr>
<tr>
<td align="left">KNNDA-DA</td>
<td align="center">0.57 (0.14)</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="Tfn13">
<label>
<sup>a</sup>
</label>
<p>The averaged prediction AUC and its s.e. were computed over those of the ten drugs studied.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3-7">
<title>Pathways relevant to erlotinib and cetuximab discovered</title>
<p>As LogitDA and KNNDA perform well in the prediction AUC for erlotinib and cetuximab (PDX), it is of interest to find the pathways in which the fitted genes of these predictors are involved. Thus, we first submitted the top-ranked 220 genes of LogitDA and KNNDA for erlotinib into the database Ingenuity Pathway Analysis (IPA; <ext-link ext-link-type="uri" xlink:href="http://www.ingenuity.com/">http://www.ingenuity.com</ext-link>) and uncovered the relevant pathways in <xref ref-type="sec" rid="s10">Supplementary Table S4</xref>. Interestingly, several important metabolic pathways were discovered, e.g., Purine Nucleotides <italic>de novo</italic> Biosynthesis (<xref ref-type="bibr" rid="B1">Ali et al., 2020</xref>; <xref ref-type="bibr" rid="B31">Taha-Mehlitz et al., 2021</xref>) and Histidine Degradation VI (<xref ref-type="bibr" rid="B33">Tominaga et al., 2019</xref>; <xref ref-type="bibr" rid="B13">Han et al., 2020</xref>). Furthermore, pathways for epigenetic regulation (<xref ref-type="bibr" rid="B38">Yu et al., 2018</xref>; <xref ref-type="bibr" rid="B7">Du et al., 2019</xref>) and DNA repair (nucleotide excision repair enhanced pathway) (<xref ref-type="bibr" rid="B6">Dong et al., 2019</xref>; <xref ref-type="bibr" rid="B36">Wang T. et al., 2021</xref>; <xref ref-type="bibr" rid="B27">Sato et al., 2021</xref>) were also uncovered. The aforementioned pathways play essential roles in tumor malignancy and response to anti-cancer therapies.</p>
<p>The overlap of the aforementioned fitted genes and the uncovered pathways (in the molecules column of <xref ref-type="sec" rid="s10">Supplementary Table S5</xref>) has been reported to contribute to tumor progression (cell proliferation, survival, invasion, and metastasis) and drug resistance. Specifically, LIG1 is an attractive target for personalization of ovarian cancer therapy (<xref ref-type="bibr" rid="B2">Ali et al., 2021</xref>), and decreased eEF2 phosphorylation, mediated by increased PP2A activity, contributes to resistance to HER2 inhibition (<xref ref-type="bibr" rid="B20">McDermott et al., 2014</xref>). ADSL has been suggested as a predictive biomarker of response to 6-mercaptopurine (under the brand name Purinethol) in a pre-clinical setting (<xref ref-type="bibr" rid="B31">Taha-Mehlitz et al., 2021</xref>).</p>
<p>Moreover, MTA3 downregulates SOX2OT, and the MTA3/SOX2-OT/SOX2 axis has been reported as a potential cancer stratification marker in human esophageal squamous cell carcinomas (<xref ref-type="bibr" rid="B7">Du et al., 2019</xref>). Finally, CAD, a key enzyme of <italic>de novo</italic> pyrimidine biosynthesis essential for cell proliferation, has been found to directly interact with the second generation of EGFR-TKI Afatinib, which also targets EGFR in the same pathway as erlotinib (<xref ref-type="bibr" rid="B34">Tu et al., 2021</xref>).</p>
<p>Similarly, we submitted the top-ranked 130 genes of LogitDA for cetuximab (PDX) and uncovered DNA repair, metabolic processes, and lysosome-associated pathways; the details of the pathways are listed in <xref ref-type="sec" rid="s10">Supplementary Table S5</xref>. The overlap of the fitted genes and the uncovered pathways includes CDK7, IGF-1R, and others. In particular, CDK7 is a key regulator of transcription and cell-cycle control, and its deregulation in cancer has been linked to a worse prognosis (<xref ref-type="bibr" rid="B16">Jagomast et al., 2022</xref>). Inhibition of CDK7/12 promotes resistance emergence in response to targeted therapy in lung cancer cells (<xref ref-type="bibr" rid="B26">Rusan et al., 2018</xref>; <xref ref-type="bibr" rid="B32">Terai et al., 2018</xref>). Moreover, cetuximab therapeutically blocks EGFR, and this might concurrently induce the activation of IGF-1R, which could activate EGFR-downstream Akt signaling, thus mediating cetuximab resistance in gastric cancer cells (<xref ref-type="bibr" rid="B18">Li et al., 2015</xref>).</p>
</sec>
</sec>
<sec sec-type="discussion" id="s4">
<title>Discussion</title>
<p>Our feature selection approach can be used in combination with any classifier or regression model, not restricted to the logistic ridge regression and KNN demonstrated here, to predict the response of cancer patients using gene expression data. In particular, the ablation study shows that DA increases the prediction power by &#x223c;24% (21%) from LogitDA-DA (KNNDA-DA). Following standard practice, we have chosen the K value of KNNDA that yielded the largest average AUC from 5-fold CV. To see the impact of the selection of K, we computed the test AUC of KNNDA with various values of K in <xref ref-type="sec" rid="s10">Supplementary Table S6</xref> (p. 17, <xref ref-type="sec" rid="s10">Supplementary File</xref>). The result shows that excluding the smallest value of K (3), within the neighborhood of the optimized K (say, K <inline-formula id="inf59">
<mml:math id="m60">
<mml:mrow>
<mml:mo>&#xb1;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> 3), the yielded test AUC of KNNDA deviates from the reported AUC only within 0.05, except for sorafenib (&#x2212;0.08, 0), paclitaxel (&#x2212;0.06, 0.04), and docetaxel (0, 0.09). This may be due to the large s.e. of KNNDA for these drugs, 0.03, 0.07, and 0.04 in the test AUC from 10 repeats (<xref ref-type="table" rid="T4">Table 4</xref>).</p>
<p>LogitDA (KNNDA) performed very well on prediction for five (four) out of the ten targeted therapies and chemotherapies (AUC &#x3e;0.81), i.e., erlotinib (two sets), cetuximab, gemcitabine, and docetaxel. Thus, these predictors may efficiently uncover novel biomarkers and pathways, although large test sets are warranted. In addition to the high prediction AUC for the aforementioned drugs, our approach also has the advantage of using much fewer (50&#x2013;650) genes than the baseline logistic ridge regression and MOLI, which used more than 5,100 or 12,000 genes.</p>
<p>Notably, using the novel adjusted cutoff of prediction probability, LogitDA achieved a prediction accuracy of 0.70 or higher for seven of the ten drugs. In particular, the prediction accuracy of LogitDA increases to 0.76 from 0.56 (using the default cutoff of 0.5), using the adjusted cutoff of 0.09, as Proposition 2 suggests. Moreover, its prediction accuracy increased to 0.80 if we focused on 20 <italic>EGFR</italic> and <italic>KRAS</italic> wild-type patients with NSCLC, whereas there is no currently effective predictive marker of drug response for these patients. Thus, LogitDA may be useful for stratifying such NSCLC patients for erlotinib in clinical practice. Although the test AUCs for chemotherapies such as paclitaxel and cisplatin were only 0.68 and 0.62, respectively, their prediction accuracy achieved 0.88 and 0.88 for paclitaxel (PDX) and cisplatin (66 patients in TCGA), using the adjusted cutoffs for the prediction probability.</p>
<p>As LogitDA and KNNDA performed well in prediction responses to erlotinib and cetuximab, we used the fitted genes of the predictors to uncover several important metabolic pathways for these drugs, in addition to DNA repair pathways. The aforementioned pathways play essential roles in tumor malignancy and response to anti-cancer therapies.</p>
<p>It is interesting to point out that LogitDA performed particularly well for certain targeted therapies. LogitDA used 370 cell lines for training and achieved test AUCs of 0.94 and 1.00 for erlotinib (clinical trial and PDX). In contrast, deep learning-based methods, e.g., MOLI aggregated related samples (of drugs targeting the same EGFR pathway) to a larger training set (&#x3e;3,000 cell lines) and used multi omics data to train the classifier, increased the test AUC from 0.63 to 0.72 for erlotinib (PDX). These differences may be because our approach prioritizes important features, limiting the number of parameters in the logistic ridge regression to at most 1,000 genes to fit. Nevertheless, MOLI (gene expression) performed very well in the prediction of chemotherapies, e.g., with a prediction AUC of 0.75 for cisplatin (TCGA), which outperformed LogitDA (using 130 genes) and KNNDA (using 190 genes).</p>
<p>This study employed GED, which has been shown to be the most predictive data type among omics data (<xref ref-type="bibr" rid="B15">Iorio et al., 2016</xref>), to predict the drug response of cancer patients; integrating GED and other omics data types to predict the drug response is a natural extension. We postulate that chemotherapies usually target broad biological mechanisms, so predictors for these therapies may require more genes to train to predict well. This suggests a future research direction in which biological domain knowledge (<xref ref-type="bibr" rid="B19">Ma et al., 2021</xref>) is incorporated to integrate samples screened with several therapeutics targeting the same tumorigenesis mechanism to improve the performance of our approach. This research direction is similar to a recent development in which adversarial inductive transfer learning (<xref ref-type="bibr" rid="B24">Pan and Yang, 2010</xref>) is applied to drug response prediction (AITL, <xref ref-type="bibr" rid="B30">Sharifi-Noghabi et al., 2020</xref>). AITL applied adversarial domain adaptation and multi-task learning to tackle discrepancies in the input and output spaces of drug response prediction. Moreover, combining the proposed feature selection method with deep learning-based methods may prove powerful for drug response, as the former has been shown to improve the prediction power of linear and nonlinear predictors for drug response. Finally, applying the concept of few-shot learning (<xref ref-type="bibr" rid="B19">Ma et al., 2021</xref>), namely, applying DA to only partial test samples and keeping the remaining procedures the same, may reveal the minimum number of test labels required for adequate performance of our predictors.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s5">
<title>Data availability statement</title>
<p>Publicly available datasets were analyzed in this study. These data can be found at: <ext-link ext-link-type="uri" xlink:href="http://geeleherlab.org/cgpPrediction">http://geeleherlab.org/cgpPrediction</ext-link>; <ext-link ext-link-type="uri" xlink:href="https://zenodo.org/record/4036592">https://zenodo.org/record/4036592</ext-link>; PDX Encyclopedia datasets; TCGA; and Broad GDAC Firehose.</p>
</sec>
<sec id="s6">
<title>Author contributions</title>
<p>GSS designed and supervised the study. SY proved the propositions. Y-CC modified the codes and implemented all codes. C-HT wrote an earlier version of the codes. H-WC analyzed the pathways. SY and GSS interpreted the results and wrote the manuscript. All authors contributed to the article and approved the submitted version.</p>
</sec>
<sec id="s7">
<title>Funding</title>
<p>This research was supported by Academia Sinica, Taiwan (Tukey Grant to GSS and SY, Mathematics in Biology, AS-104-TP-A07, AS-SUMMIT) and the National Science and Technology Council, Taiwan, Republic of China (NSTC 109-2118-M-001-001-MY2 and 111-2118-M-001-009-MY2 to GSS and NSTC 106-2314-B-001-005 to SY).</p>
</sec>
<ack>
<p>The authors thank Jan-Gowth Chang for the discussions and U-Hou Lok, Ce-Bo Yang, and Brian Kim for computational assistance.</p>
</ack>
<sec sec-type="COI-statement" id="s8">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s9">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors, and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s10">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fgene.2023.1217414/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fgene.2023.1217414/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="DataSheet1.pdf" id="SM1" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ali</surname>
<given-names>E. S.</given-names>
</name>
<name>
<surname>Sahu</surname>
<given-names>U.</given-names>
</name>
<name>
<surname>Villa</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>O&#x2019;Hara</surname>
<given-names>B. P.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Beaudet</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>ERK2 phosphorylates PFAS to mediate posttranslational control of de novo purine synthesis</article-title>. <source>Mol. Cell.</source> <volume>78</volume> (<issue>6</issue>), <fpage>1178</fpage>&#x2013;<lpage>1191.e6</lpage>. <pub-id pub-id-type="doi">10.1016/j.molcel.2020.05.001</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ali</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Alabdullah</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Algethami</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Alblihy</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Miligy</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Shoqafi</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Ligase 1 is a predictor of platinum resistance and its blockade is synthetically lethal in XRCC1 deficient epithelial ovarian cancers</article-title>. <source>Theranostics</source> <volume>11</volume> (<issue>17</issue>), <fpage>8350</fpage>&#x2013;<lpage>8361</lpage>. <pub-id pub-id-type="doi">10.7150/thno.51456</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Barretina</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Caponigro</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Stransky</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Venkatesan</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Margolin</surname>
<given-names>A. A.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2012</year>). <article-title>The Cancer Cell Line Encyclopedia enables predictive modelling of anticancer drug sensitivity</article-title>. <source>Nature</source> <volume>483</volume> (<issue>7391</issue>), <fpage>603</fpage>&#x2013;<lpage>607</lpage>. <pub-id pub-id-type="doi">10.1038/nature11003</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Basu</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Bodycombe</surname>
<given-names>N. E.</given-names>
</name>
<name>
<surname>Cheah</surname>
<given-names>J. H.</given-names>
</name>
<name>
<surname>Price</surname>
<given-names>E. V.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Schaefer</surname>
<given-names>G. I.</given-names>
</name>
<etal/>
</person-group> (<year>2013</year>). <article-title>An interactive resource to identify cancer genetic and lineage dependencies targeted by small molecules</article-title>. <source>Cell.</source> <volume>154</volume> (<issue>5</issue>), <fpage>1151</fpage>&#x2013;<lpage>1161</lpage>. <pub-id pub-id-type="doi">10.1016/j.cell.2013.08.003</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ding</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Gu</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Evaluating the molecule-based prediction of clinical drug responses in cancer</article-title>. <source>Bioinformatics</source> <volume>32</volume> (<issue>19</issue>), <fpage>2891</fpage>&#x2013;<lpage>2895</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btw344</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dong</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>B.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Histone-related genes are hypermethylated in lung cancer and hypermethylated HIST1H4F could serve as a pan-cancer biomarker</article-title>. <source>Cancer Res.</source> <volume>79</volume> (<issue>24</issue>), <fpage>6101</fpage>&#x2013;<lpage>6112</lpage>. <pub-id pub-id-type="doi">10.1158/0008-5472.CAN-19-1019</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Du</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Gan</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Yao</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>MTA3 represses cancer stemness by targeting the SOX2OT/SOX2 Axis</article-title>. <source>Iscience</source> <volume>22</volume>, <fpage>353</fpage>&#x2013;<lpage>368</lpage>. <pub-id pub-id-type="doi">10.1016/j.isci.2019.11.009</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dudoit</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Fridlyand</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Speed</surname>
<given-names>T. P.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>Comparison of discrimination methods for the classification of tumors using gene expression data</article-title>. <source>J. Am. Stat. Assoc.</source> <volume>97</volume> (<issue>457</issue>), <fpage>77</fpage>&#x2013;<lpage>87</lpage>. <pub-id pub-id-type="doi">10.1198/016214502753479248</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gao</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Korn</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Ferretti</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Monahan</surname>
<given-names>J. E.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Singh</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>High-throughput screening using patient-derived tumor xenografts to predict clinical trial drug response</article-title>. <source>Nat. Med.</source> <volume>21</volume> (<issue>11</issue>), <fpage>1318</fpage>&#x2013;<lpage>1325</lpage>. <pub-id pub-id-type="doi">10.1038/nm.3954</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Garnett</surname>
<given-names>M. J.</given-names>
</name>
<name>
<surname>Edelman</surname>
<given-names>E. J.</given-names>
</name>
<name>
<surname>Heidorn</surname>
<given-names>S. J.</given-names>
</name>
<name>
<surname>Greenman</surname>
<given-names>C. D.</given-names>
</name>
<name>
<surname>Dastur</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Lau</surname>
<given-names>K. W.</given-names>
</name>
<etal/>
</person-group> (<year>2012</year>). <article-title>Systematic identification of genomic markers of drug sensitivity in cancer cells</article-title>. <source>Nature</source> <volume>483</volume> (<issue>7391</issue>), <fpage>570</fpage>&#x2013;<lpage>575</lpage>. <pub-id pub-id-type="doi">10.1038/nature11005</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Geeleher</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Cox</surname>
<given-names>N. J.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>R. S.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Clinical drug response can be predicted using baseline gene expression levels and in vitrodrug sensitivity in cell lines</article-title>. <source>Genome Biol.</source> <volume>15</volume> (<issue>3</issue>), <fpage>R47</fpage>&#x2013;<lpage>R12</lpage>. <pub-id pub-id-type="doi">10.1186/gb-2014-15-3-r47</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gillet</surname>
<given-names>J.-P.</given-names>
</name>
<name>
<surname>Varma</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Gottesman</surname>
<given-names>M. M.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>The clinical relevance of cancer cell lines</article-title>. <source>J. Natl. Cancer Inst.</source> <volume>105</volume> (<issue>7</issue>), <fpage>452</fpage>&#x2013;<lpage>458</lpage>. <pub-id pub-id-type="doi">10.1093/jnci/djt007</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Han</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Saed</surname>
<given-names>H. S.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Therapeutic implications of altered cholesterol homeostasis mediated by loss of CYP46A1 in human glioblastoma</article-title>. <source>EMBO Mol. Med.</source> <volume>12</volume> (<issue>1</issue>), <fpage>e10924</fpage>. <pub-id pub-id-type="doi">10.15252/emmm.201910924</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Haslam</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Prasad</surname>
<given-names>V.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Updated estimates of eligibility for and response to genome-targeted oncology drugs among US cancer patients, 2006-2020</article-title>. <source>Ann. Oncol.</source> <volume>32</volume> (<issue>7</issue>), <fpage>926</fpage>&#x2013;<lpage>932</lpage>. <pub-id pub-id-type="doi">10.1016/j.annonc.2021.04.003</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Iorio</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Knijnenburg</surname>
<given-names>T. A.</given-names>
</name>
<name>
<surname>Vis</surname>
<given-names>D. J.</given-names>
</name>
<name>
<surname>Bignell</surname>
<given-names>G. R.</given-names>
</name>
<name>
<surname>Menden</surname>
<given-names>M. P.</given-names>
</name>
<name>
<surname>Schubert</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2016</year>). <article-title>A landscape of pharmacogenomic interactions in cancer</article-title>. <source>Cell.</source> <volume>166</volume> (<issue>3</issue>), <fpage>740</fpage>&#x2013;<lpage>754</lpage>. <pub-id pub-id-type="doi">10.1016/j.cell.2016.06.017</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jagomast</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Idel</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Klapper</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Kuppler</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Offermann</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Dreyer</surname>
<given-names>E.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>CDK7 predicts worse outcome in head and neck squamous-cell cancer</article-title>. <source>Cancers</source> <volume>14</volume> (<issue>3</issue>), <fpage>492</fpage>. <pub-id pub-id-type="doi">10.3390/cancers14030492</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Koniusz</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Tas</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Porikli</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Domain adaptation by mixture of alignments of second-or higher-order scatter tensors</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE conference on computer vision and pattern recognition</conf-name>, <conf-loc>Honolulu, HI, USA</conf-loc>, <conf-date>July 2017</conf-date> (<publisher-name>IEEE</publisher-name>), <fpage>4478</fpage>&#x2013;<lpage>4487</lpage>.</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>Z.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Cetuximab-induced insulin-like growth factor receptor I activation mediates cetuximab resistance in gastric cancer cells</article-title>. <source>Mol. Med. Rep.</source> <volume>11</volume> (<issue>6</issue>), <fpage>4547</fpage>&#x2013;<lpage>4554</lpage>. <pub-id pub-id-type="doi">10.3892/mmr.2015.3245</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ma</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Fong</surname>
<given-names>S. H.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Bakkenist</surname>
<given-names>C. J.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>J. P.</given-names>
</name>
<name>
<surname>Mourragui</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Few-shot learning creates predictive models of drug response that translate from high-throughput screens to individual patients</article-title>. <source>Nat. Cancer</source> <volume>2</volume> (<issue>2</issue>), <fpage>233</fpage>&#x2013;<lpage>244</lpage>. <pub-id pub-id-type="doi">10.1038/s43018-020-00169-2</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>McDermott</surname>
<given-names>M. S.</given-names>
</name>
<name>
<surname>Browne</surname>
<given-names>B. C.</given-names>
</name>
<name>
<surname>Conlon</surname>
<given-names>N. T.</given-names>
</name>
<name>
<surname>O&#x2019;Brien</surname>
<given-names>N. A.</given-names>
</name>
<name>
<surname>Slamon</surname>
<given-names>D. J.</given-names>
</name>
<name>
<surname>Henry</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2014</year>). <article-title>PP2A inhibition overcomes acquired resistance to HER2 targeted therapy</article-title>. <source>Mol. cancer</source> <volume>13</volume> (<issue>1</issue>), <fpage>157</fpage>. <pub-id pub-id-type="doi">10.1186/1476-4598-13-157</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Messersmith</surname>
<given-names>W. A.</given-names>
</name>
<name>
<surname>Ahnen</surname>
<given-names>D. J.</given-names>
</name>
</person-group> (<year>2008</year>).<article-title>Targeting EGFR in colorectal cancer</article-title>. <source>N. Engl. J. Med.</source> <volume>359</volume> (<issue>17</issue>), <fpage>1834</fpage>&#x2013;<lpage>1836</lpage>.</citation>
</ref>
<ref id="B22">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Motiian</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Piccirilli</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Adjeroh</surname>
<given-names>D. A.</given-names>
</name>
<name>
<surname>Doretto</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Unified deep supervised domain adaptation and generalization</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE international conference on computer vision</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>5715</fpage>&#x2013;<lpage>5725</lpage>.</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mourragui</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Loog</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Van De Wiel</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Reinders</surname>
<given-names>M. J.</given-names>
</name>
<name>
<surname>Wessels</surname>
<given-names>L. F.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Precise: A domain adaptation approach to transfer predictors of drug response from pre-clinical models to tumors</article-title>. <source>Bioinformatics</source> <volume>35</volume> (<issue>14</issue>), <fpage>i510</fpage>&#x2013;<lpage>i519</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btz372</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pan</surname>
<given-names>S. J.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Q.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>A survey on transfer learning</article-title>. <source>IEEE Trans. Knowl. Data Eng.</source> <volume>22</volume>, <fpage>1345</fpage>&#x2013;<lpage>1359</lpage>. <pub-id pub-id-type="doi">10.1109/tkde.2009.191</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Peres da Silva</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Suphavilai</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Nagarajan</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Tugda: Task uncertainty guided domain adaptation for robust generalization of cancer drug response prediction from <italic>in vitro</italic> to <italic>in vivo</italic> settings</article-title>. <source>Bioinformatics</source> <volume>37</volume> (<issue>1</issue>), <fpage>i76</fpage>&#x2013;<lpage>i83</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btab299</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rusan</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Christensen</surname>
<given-names>C. L.</given-names>
</name>
<name>
<surname>Abraham</surname>
<given-names>B. J.</given-names>
</name>
<name>
<surname>Kwiatkowski</surname>
<given-names>N.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>Suppression of adaptive responses to targeted cancer therapy by transcriptional repression</article-title>. <source>Cancer Discov.</source> <volume>8</volume> (<issue>1</issue>), <fpage>59</fpage>&#x2013;<lpage>73</lpage>. <pub-id pub-id-type="doi">10.1158/2159-8290.CD-17-0461</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sato</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Liebau</surname>
<given-names>A. W.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Rabadan</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Gautier</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>The UVSSA complex alleviates MYC-driven transcription stress</article-title>. <source>J. Cell. Biol.</source> <volume>220</volume> (<issue>2</issue>), <fpage>e201807163</fpage>. <pub-id pub-id-type="doi">10.1083/jcb.201807163</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Seashore-Ludlow</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Rees</surname>
<given-names>M. G.</given-names>
</name>
<name>
<surname>Cheah</surname>
<given-names>J. H.</given-names>
</name>
<name>
<surname>Cokol</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Price</surname>
<given-names>E. V.</given-names>
</name>
<name>
<surname>Coletti</surname>
<given-names>M. E.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Harnessing connectivity in a large-scale small-molecule sensitivity dataset</article-title>. <source>Cancer Discov.</source> <volume>5</volume> (<issue>11</issue>), <fpage>1210</fpage>&#x2013;<lpage>1223</lpage>. <pub-id pub-id-type="doi">10.1158/2159-8290.CD-15-0235</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sharifi-Noghabi</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Zolotareva</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Collins</surname>
<given-names>C. C.</given-names>
</name>
<name>
<surname>Ester</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Moli: Multi-omics late integration with deep neural networks for drug response prediction</article-title>. <source>Bioinformatics</source> <volume>35</volume> (<issue>14</issue>), <fpage>i501</fpage>&#x2013;<lpage>i509</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btz318</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sharifi-Noghabi</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Peng</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zolotareva</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Collins</surname>
<given-names>C. C.</given-names>
</name>
<name>
<surname>Ester</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Aitl: Adversarial Inductive Transfer Learning with input and output space adaptation for pharmacogenomics</article-title>. <source>Bioinformatics</source> <volume>36</volume> (<issue>1</issue>), <fpage>i380</fpage>&#x2013;<lpage>i388</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btaa442</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Taha-Mehlitz</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Bianco</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Coto-Llerena</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Kancherla</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Bantug</surname>
<given-names>G. R.</given-names>
</name>
<name>
<surname>Gallon</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Adenylosuccinate lyase is oncogenic in colorectal cancer by causing mitochondrial dysfunction and independent activation of NRF2 and mTOR-MYC-axis</article-title>. <source>Theranostics</source> <volume>11</volume> (<issue>9</issue>), <fpage>4011</fpage>&#x2013;<lpage>4029</lpage>. <pub-id pub-id-type="doi">10.7150/thno.50051</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Terai</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Kitajima</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Potter</surname>
<given-names>D. S.</given-names>
</name>
<name>
<surname>Matsui</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Quiceno</surname>
<given-names>L. G.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>T.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>ER stress signaling promotes the survival of cancer "persister cells" tolerant to EGFR tyrosine kinase inhibitors</article-title>. <source>Cancer Res.</source> <volume>78</volume> (<issue>4</issue>), <fpage>1044</fpage>&#x2013;<lpage>1057</lpage>. <pub-id pub-id-type="doi">10.1158/0008-5472.CAN-17-1904</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tominaga</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Minato</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Murayama</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Sasahara</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Nishimura</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Kiyokawa</surname>
<given-names>E.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Semaphorin signaling via MICAL3 induces symmetric cell division to expand breast cancer stem-like cells</article-title>. <source>Proc. Natl. Acad. Sci.</source> <volume>116</volume> (<issue>2</issue>), <fpage>625</fpage>&#x2013;<lpage>630</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.1806851116</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tu</surname>
<given-names>H.-F.</given-names>
</name>
<name>
<surname>Ko</surname>
<given-names>C.-J.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>C.-T.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>C.-F.</given-names>
</name>
<name>
<surname>Lan</surname>
<given-names>S.-W.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>H.-H.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Afatinib exerts immunomodulatory effects by targeting the pyrimidine biosynthesis enzyme CAD</article-title>. <source>Cancer Res.</source> <volume>81</volume> (<issue>12</issue>), <fpage>3270</fpage>&#x2013;<lpage>3282</lpage>. <pub-id pub-id-type="doi">10.1158/0008-5472.CAN-20-3436</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Herbst</surname>
<given-names>R. S.</given-names>
</name>
<name>
<surname>Boshoff</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Toward personalized treatment approaches for non-small-cell lung cancer</article-title>. <source>Nat. Med.</source> <volume>27</volume> (<issue>8</issue>), <fpage>1345</fpage>&#x2013;<lpage>1356</lpage>. <pub-id pub-id-type="doi">10.1038/s41591-021-01450-2</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Jing</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Tan</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Identifying the hub genes in non-small cell lung cancer by integrated bioinformatics methods and analyzing the prognostic values</article-title>. <source>Pathology-Research Pract.</source> <volume>228</volume>, <fpage>153654</fpage>. <pub-id pub-id-type="doi">10.1016/j.prp.2021.153654</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Weinstein</surname>
<given-names>J. N.</given-names>
</name>
<name>
<surname>Collisson</surname>
<given-names>E. A.</given-names>
</name>
<name>
<surname>Mills</surname>
<given-names>G. B.</given-names>
</name>
<name>
<surname>Shaw</surname>
<given-names>K. R.</given-names>
</name>
<name>
<surname>Ozenberger</surname>
<given-names>B. A.</given-names>
</name>
<name>
<surname>Ellrott</surname>
<given-names>K.</given-names>
</name>
<etal/>
</person-group> (<year>2013</year>). <article-title>The cancer genome atlas pan-cancer analysis project</article-title>. <source>Nat. Genet.</source> <volume>45</volume> (<issue>10</issue>), <fpage>1113</fpage>&#x2013;<lpage>1120</lpage>. <pub-id pub-id-type="doi">10.1038/ng.2764</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yu</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>RBBP7 is a prognostic biomarker in patients with esophageal squamous cell carcinoma</article-title>. <source>Oncol. Lett.</source> <volume>16</volume> (<issue>6</issue>), <fpage>7204</fpage>&#x2013;<lpage>7211</lpage>. <pub-id pub-id-type="doi">10.3892/ol.2018.9543</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>