<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Pharmacol.</journal-id>
<journal-title>Frontiers in Pharmacology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Pharmacol.</abbrev-journal-title>
<issn pub-type="epub">1663-9812</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">670670</article-id>
<article-id pub-id-type="doi">10.3389/fphar.2021.670670</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Pharmacology</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Predicting Successes and Failures of Clinical Trials With Outer Product&#x2013;Based Convolutional Neural Network</article-title>
<alt-title alt-title-type="left-running-head">Seo et&#x20;al.</alt-title>
<alt-title alt-title-type="right-running-head">Predicting Failures of Clinical Trials</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Seo</surname>
<given-names>Sangwoo</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1367022/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Kim</surname>
<given-names>Youngmin</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1366974/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Han</surname>
<given-names>Hyo-Jeong</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1367038/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Son</surname>
<given-names>Woo Chan</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1368001/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Hong</surname>
<given-names>Zhen-Yu</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1366895/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Sohn</surname>
<given-names>Insuk</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1366897/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Shim</surname>
<given-names>Jooyong</given-names>
</name>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1367016/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Hwang</surname>
<given-names>Changha</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1196509/overview"/>
</contrib>
</contrib-group>
<aff id="aff1">
<label>
<sup>1</sup>
</label>Department of Data and Knowledge Service Engineering, Dankook University, <addr-line>Gyeonggido</addr-line>, <country>Korea</country>
</aff>
<aff id="aff2">
<label>
<sup>2</sup>
</label>Department of Statistics, Dankook University, <addr-line>Gyeonggido</addr-line>, <country>Korea</country>
</aff>
<aff id="aff3">
<label>
<sup>3</sup>
</label>Department of Pathology, College of Medicine, University of Ulsan, Asan Medical Center, <addr-line>Seoul</addr-line>, <country>Korea</country>
</aff>
<aff id="aff4">
<label>
<sup>4</sup>
</label>Arontier, <addr-line>Seoul</addr-line>, <country>Korea</country>
</aff>
<aff id="aff5">
<label>
<sup>5</sup>
</label>Department of Statistics, Institute of Statistical Information, Inje University, <addr-line>Gyeongsangnamdo</addr-line>, <country>Korea</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/366816/overview">Ke-Vin Chang</ext-link>, National Taiwan University Hospital, Taiwan</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/552766/overview">Tao Huang</ext-link>, Shanghai Institute of Nutrition and Health (CAS), China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/711875/overview">Khanh N. Q. Le</ext-link>, Taipei Medical University, Taiwan</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/246296/overview">Giuseppe Jurman</ext-link>, Bruno Kessler Foundation, Italy</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Youngmin Kim, <email>dudals4051@gmail.com</email>; Woo Chan Son, <email>wcson@amc.seoul.kr</email>; Changha Hwang, <email>chwang@dankook.ac.kr</email>
</corresp>
<fn fn-type="other">
<p>This article was submitted to Translational Pharmacology, a section of the journal Frontiers in Pharmacology</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>16</day>
<month>06</month>
<year>2021</year>
</pub-date>
<pub-date pub-type="collection">
<year>2021</year>
</pub-date>
<volume>12</volume>
<elocation-id>670670</elocation-id>
<history>
<date date-type="received">
<day>23</day>
<month>02</month>
<year>2021</year>
</date>
<date date-type="accepted">
<day>24</day>
<month>05</month>
<year>2021</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2021 Seo, Kim, Han, Son, Hong, Sohn, Shim and Hwang.</copyright-statement>
<copyright-year>2021</copyright-year>
<copyright-holder>Seo, Kim, Han, Son, Hong, Sohn, Shim and Hwang</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these&#x20;terms.</p>
</license>
</permissions>
<abstract>
<p>Despite several improvements in the drug development pipeline over the past decade, drug failures due to unexpected adverse effects have rapidly increased at all stages of clinical trials. To improve the success rate of clinical trials, it is necessary to identify potential loser drug candidates that may fail at clinical trials. Therefore, we need to develop reliable models for predicting the outcomes of clinical trials of drug candidates, which have the potential to guide the drug discovery process. In this study, we propose an outer product&#x2013;based convolutional neural network (OPCNN) model which integrates effectively chemical features of drugs and target-based features. The validation results via 10-fold cross-validations on the dataset used for a data-driven approach PrOCTOR proved that our OPCNN model performs quite well in terms of accuracy, F1-score, Matthews correlation coefficient (MCC), precision, recall, area under the curve (AUC) of the receiver operating characteristic, and area under the precision&#x2013;recall curve (AUPRC). In particular, the proposed OPCNN model showed the best performance in terms of MCC, which is widely used in biomedicine as a performance metric and is a more reliable statistical measure. Through 10-fold cross-validation experiments, the accuracy of the OPCNN model is as high as 0.9758, F1 score is as high as 0.9868, the MCC reaches 0.8451, the precision is as high as 0.9889, the recall is as high as 0.9893, the AUC is as high as 0.9824, and the AUPRC is as high as 0.9979. The results proved that our OPCNN model shows significantly good prediction performance on outcomes of clinical trials and it can be quite helpful in early drug discovery.</p>
</abstract>
<kwd-group>
<kwd>clinical trial</kwd>
<kwd>convolutional neural network</kwd>
<kwd>multimodal learning</kwd>
<kwd>outer product</kwd>
<kwd>imbalance</kwd>
</kwd-group>
<contract-num rid="cn001">2019M3E5D4066897</contract-num>
<contract-sponsor id="cn001">Ministry of Science and ICT, South Korea<named-content content-type="fundref-id">10.13039/501100014188</named-content>
</contract-sponsor>
</article-meta>
</front>
<body>
<sec id="s1">
<title>Introduction</title>
<p>Over the past 30&#xa0;years, failures at all phases of clinical trials have increased rapidly for safety reasons (<xref ref-type="bibr" rid="B12">Ledford, 2011</xref>; <xref ref-type="bibr" rid="B10">Hay et&#x20;al., 2014</xref>; <xref ref-type="bibr" rid="B18">Lysenko et&#x20;al., 2018</xref>; <xref ref-type="bibr" rid="B15">Liu et&#x20;al., 2021</xref>). This phenomenon happens despite significant improvements at all stages of the drug development pipeline (<xref ref-type="bibr" rid="B20">Scannell et&#x20;al., 2012</xref>). There have been many improvements in screening for drugs that are likely to fail clinical trials.</p>
<p>Drug-likeness scores are widely utilized as a useful guideline for eliminating toxic molecules during the early stages of drug development. This concept was first introduced by Lipinski&#x2019;s rule of five (Ro5), which screens molecules with a low probability of useful oral activity due to poor absorption or permeation (<xref ref-type="bibr" rid="B13">Lipinski et&#x20;al., 1997</xref>). That is to say, the Ro5 enhanced the drug discovery process because it helps in distinguishing between drug-like and nondrug-like molecules. However, Lipinski argued that the Ro5 is a very conservative strategy because this rule does not guarantee drug-likeness (<xref ref-type="bibr" rid="B14">Lipinski, 2004</xref>). To enhance the Ro5, Veber&#x2019;s rule and Ghose&#x2019;s rule were proposed (<xref ref-type="bibr" rid="B9">Ghose et&#x20;al., 1999</xref>; <xref ref-type="bibr" rid="B22">Veber et&#x20;al., 2002</xref>). The quantitative estimate for drug-likeness (QED) was also recently proposed as an alternative to rule-based methods (<xref ref-type="bibr" rid="B2">Bickerton et&#x20;al., 2012</xref>).</p>
<p>Despite lots of advances in identifying potentially toxic drugs, overall failure rates of clinical trials continued to increase (<xref ref-type="bibr" rid="B10">Hay et&#x20;al., 2014</xref>). To deal with this problem, Gayvert et&#x20;al. recently proposed a new data-driven approach PrOCTOR, which predicts the odds of clinical trial outcomes on the basis of random forests that integrates chemical properties of drugs and target-based properties (<xref ref-type="bibr" rid="B8">Gayvert et&#x20;al., 2016</xref>). It was exhibited that both the chemical features and target-related gene expression values contribute to effective classification. In this study, we will also use the chemical features of drugs and target-based features for predicting successes and failures of clinical trials. Lo et&#x20;al. applied machine learning techniques to predict the outcomes of randomized clinical trials using drug development and clinical trial data (<xref ref-type="bibr" rid="B16">Lo et&#x20;al., 2019</xref>). Munos et&#x20;al. improved the prediction of clinical success using machine learning algorithms based on a large database of projects (<xref ref-type="bibr" rid="B19">Munos et&#x20;al., 2020</xref>).</p>
<p>Modeling the relationship between chemical structure of drug and molecular activity is very important for drug development for precision medicine. In this study, we employ a novel outer product&#x2013;based convolutional neural network (OPCNN) to integrate effectively chemical features of the drugs, biological network features, genotype-tissue expression (GTEx) features, and target loss frequency. The purpose of this research is to propose a two-dimensional (2D) convolutional neural network (CNN) based on the outer product of chemical feature vector and a target-based feature vector to predict successes and failures of clinical trials.</p>
</sec>
<sec sec-type="materials|methods" id="s2">
<title>Materials and Methods</title>
<sec id="s2-1">
<title>Dataset</title>
<p>We evaluated our proposed OPCNN using the same dataset as in Gayvert et&#x20;al. (<xref ref-type="bibr" rid="B8">Gayvert et&#x20;al., 2016</xref>), which consists of 757 approved drugs for positive class and 71 failed drugs for negative class. We notice that the dataset is imbalanced. The imbalance ratio of majority to minority compounds is 10.662. The set of 47 input features describing each drug contains 10 molecular properties, 34&#x20;target-based properties, and three drug-likeness rule outcomes for the Lipinski&#x2019;s rule of five, Veber&#x2019;s, and Ghose&#x2019;s rules. There are several missing values for six features. We impute them with relevant median values. Molecular properties represent molecular weight, XLogP, polar surface area, hydrogen bond donor and acceptor counts, formal charge, number of rings, rotatable bond count, refractivity, and logP solubility. For a set of 30&#x20;target-based features, we use the median expression of each drug&#x2019;s known gene targets in 30 different tissues, including the blood, skin, brain, liver, testis, muscle, nerve, and heart, calculated from the GTEx project. For three other target-based features, we use the network connectivity of the target, with the gene degree feature and betweenness feature computed using an aggregated gene&#x2013;gene interaction network. We also use a feature that represents the loss-of-function mutation frequency in the target&#x20;gene.</p>
</sec>
<sec id="s2-2">
<title>Model Development</title>
<sec id="s2-2-1">
<title>The Proposed OPCNN Classifier</title>
<p>The problem of predicting clinical successes and failures of clinical trials is modeled as a binary classification task. For a given drug <inline-formula id="inf1">
<mml:math id="m1">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, the target label is a binary variable <inline-formula id="inf2">
<mml:math id="m2">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula id="inf3">
<mml:math id="m3">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> indicates that the drug is passed and <inline-formula id="inf4">
<mml:math id="m4">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> indicates otherwise. Our dataset contains <inline-formula id="inf5">
<mml:math id="m5">
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>828</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> drugs, where each is represented by a pair of feature vector <inline-formula id="inf6">
<mml:math id="m6">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and a corresponding clinical outcome <inline-formula id="inf7">
<mml:math id="m7">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>: <inline-formula id="inf8">
<mml:math id="m8">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula id="inf9">
<mml:math id="m9">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:msubsup>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf10">
<mml:math id="m10">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf11">
<mml:math id="m11">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> represent the chemical feature vector and target-based feature vector, respectively. The data associated with this task are bimodal and highly imbalanced. Both modalities are associated with chemical properties of the drugs and target-based properties, respectively. Thus, we need to join effectively two different modalities. In addition, we also need to consider the model that deals with class-imbalance problem.</p>
<p>
<xref ref-type="fig" rid="F1">Figure&#x20;1</xref> explains the entire workflow of the proposed OPCNN classifier for the prediction of successes and failures in clinical trials. Our OPCNN consists of three residual blocks and five fully connected (FC) layers. Each residual block has three convolution layers, each of which employs 32 kernels with kernel size 3 and stride size 1, and the rectified linear unit (ReLU) activation function. The numbers in parentheses of FC(1), FC(50), and FC(100) indicate the number of nodes. FC(1) layer employs the sigmoid activation function. Both FC(50) and FC(100) layers employ the rectified linear unit (ReLU) activation function. Our method consists of two stages. First, the representative feature vectors of chemical feature vector and target-based feature vector are calculated and then the outer products between these two representative feature vectors are calculated. Second, a 2D CNN model is adopted to extract deep features from the outer products and to predict successes and failures of clinical trials.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>A workflow of the proposed OPCNN classifier for predicting successes and failures of clinical trials. Given an outer product of two representative feature vectors as an input, 2D CNN is used to learn features. The architecture of OPCNN consists of three residual blocks and five fully connected (FC) layers. Each residual block has three convolution layers. <bold>(A)</bold> OPCNN classifier <bold>(B)</bold> Residual&#x20;block.</p>
</caption>
<graphic xlink:href="fphar-12-670670-g001.tif"/>
</fig>
<p>The process of calculating the outer product is as follows. The chemical feature vector <inline-formula id="inf12">
<mml:math id="m12">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mn>13</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and the target-based feature vector <inline-formula id="inf13">
<mml:math id="m13">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mn>34</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> in different modalities are first fed into the FC(50) layer to get representative feature vectors <inline-formula id="inf14">
<mml:math id="m14">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mn>50</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>and <inline-formula id="inf15">
<mml:math id="m15">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mn>50</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and improve their performance. Given <inline-formula id="inf16">
<mml:math id="m16">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mn>50</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf17">
<mml:math id="m17">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mn>50</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, the outer product on the augmented unimodal is calculated as follows:<disp-formula id="e1">
<mml:math id="m18">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi>f</mml:mi>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mtable columnalign="left">
<mml:mtr columnalign="left">
<mml:mtd columnalign="left">
<mml:msup>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mtd>
</mml:mtr>
<mml:mtr columnalign="left">
<mml:mtd columnalign="left">
<mml:mn>1</mml:mn>
</mml:mtd>
</mml:mtr>
</mml:mtable>
<mml:mo>]</mml:mo>
</mml:mrow>
<mml:mo>&#x2297;</mml:mo>
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mtable columnalign="left">
<mml:mtr columnalign="left">
<mml:mtd columnalign="left">
<mml:msup>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mtd>
</mml:mtr>
<mml:mtr columnalign="left">
<mml:mtd columnalign="left">
<mml:mn>1</mml:mn>
</mml:mtd>
</mml:mtr>
</mml:mtable>
<mml:mo>]</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mtable columnalign="left">
<mml:mtr columnalign="left">
<mml:mtd columnalign="left">
<mml:msup>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2297;</mml:mo>
<mml:msup>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr columnalign="left">
<mml:mtd columnalign="left">
<mml:mn>1</mml:mn>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:msup>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mrow>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mtd>
</mml:mtr>
</mml:mtable>
<mml:mo>]</mml:mo>
</mml:mrow>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>
</p>
<p>Here, <inline-formula id="inf18">
<mml:math id="m19">
<mml:mrow>
<mml:mo>&#x2297;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> indicates the outer product between vectors. Thus, this outer product produces two sets of information: the bimodal interactions in the form of two-dimensional tensor and the raw unimodal representations of the modalities. The tensor calculated by such outer product is directly fed into the first residual block. The final representation is used for the classification&#x20;task.</p>
</sec>
<sec id="s2-2-2">
<title>Other Deep Multimodal Neural Networks</title>
<p>Classification with multimodal data often occurs in many machine learning applications (<xref ref-type="bibr" rid="B1">Baltru&#x161;aitis et&#x20;al., 2019</xref>; <xref ref-type="bibr" rid="B7">Gao et&#x20;al., 2020</xref>). Multimodal learning is an effective approach to combine information from multiple modalities to perform a prediction task. The modalities may be independent or correlated. Fusing multiple modalities is a key issue in any multimodal task. In general, the fusion of multiple modalities can be achieved at three levels: at the level of features or at a lower layer, at the intermediate level, and at the level of decisions. Fusion at the feature level or at a lower layer is called early fusion. On the other hand, fusion at the intermediate layer is called intermediate fusion, whereas fusion at the level of decisions is called late fusion. Because early and late fusions can generally suppress either intra-modality or inter-modality interactions, recent studies have focused on intermediate methods that allow fusion to occur on multiple layers of a deep&#x20;model.</p>
<p>
<xref ref-type="fig" rid="F2">Figure&#x20;2</xref> illustrates a graphical representation for deep multimodal neural network (DMNN) models associated with the early, intermediate, and late fusions used in the study. As seen from <xref ref-type="fig" rid="F2">Figure&#x20;2</xref>, each DMNN model consists of several FC layers. The number in parentheses indicates the number of nodes. As in <xref ref-type="fig" rid="F1">Figure&#x20;1</xref>, the FC(1) layer employs the sigmoid activation function. Both FC(50) and FC(100) layers employ the ReLU activation function. In the case of early fusion, each modality is first fed into an FC(50) layer before fusion in order to improve performance and to apply several fusion techniques. However, the standard early fusion allows multiple modalities to be directly concatenated to produce a single multimodal vector. In the case of intermediate and late fusions, each modality is fed into an independent deep neural network (DNN) and then fused to be the inputs of higher layers. The final representation is used for the classification&#x20;task.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Graphical representation for the early, intermediate, and late fusions. <bold>(A)</bold> Early fusion <bold>(B)</bold> Intermediate fusion <bold>(C)</bold> Late fusion.</p>
</caption>
<graphic xlink:href="fphar-12-670670-g002.tif"/>
</fig>
<p>Based on the literature, five fusion operations are often used to fuse multiple modalities (<xref ref-type="bibr" rid="B6">Feng et&#x20;al., 2021</xref>): <xref ref-type="disp-formula" rid="e1">Eq. 1</xref> addition, <xref ref-type="disp-formula" rid="e2">Eq. 2</xref> product, <xref ref-type="disp-formula" rid="e3">Eq. 3</xref> concatenation, <xref ref-type="disp-formula" rid="e4">Eq. 4</xref> ensemble, and <xref ref-type="disp-formula" rid="e5">Eq. 5</xref> mixture of experts. Addition and product operations are performed in terms of elements at the fusion layer. Here, we will consider two more multimodal fusion techniques based on tensor fusion layer (TFL) (<xref ref-type="bibr" rid="B25">Zadeh et&#x20;al., 2017</xref>) and multimodal circulant fusion (MCF) (<xref ref-type="bibr" rid="B24">Wu and Han, 2018</xref>) for early and intermediate fusions. When using TFL and MCF for the intermediate fusion, we actually use the DMNN model with FC(100)-FC(50) instead of FC(100)-FC(100)-FC(50) for each modality to improve its performance.</p>
<p>In general, the early fusion approach performs better than individual unimodal classifiers. The ensemble approach called late fusion is to weigh several individual classifiers and combine them to get a classifier that surpasses individual classifiers. In general, ensemble methods provide better results when there are significant differences among the models. Therefore, many ensemble methods try to enhance diversity among the models to be combined. Based on our preliminary studies, the unimodal classifiers using only chemical features perform better than unimodal classifiers using only target-based features. We actually have tried three different ensemble models using support vector machine (SVM) (<xref ref-type="bibr" rid="B21">Vapnik, 1995</xref>) and one-dimensional CNN and our DMNN for the late fusion in <xref ref-type="fig" rid="F2">Figure&#x20;2</xref>. Note that our DMNN model uses only concatenation technique for late fusion. Since our DMNN ensemble model has shown the best performance, we will only report those results&#x20;later.</p>
</sec>
<sec id="s2-2-3">
<title>Tensor Fusion Layer and Multimodal Circulant Fusion</title>
<p>We now briefly illustrate TFL and MCF strategies. Element-wise addition and product are used to join features from multiple modalities. Concatenation technique focuses more on learning intra-modality than learning inter-modality. However, both TFL and MCF capture both intra-modality and inter-modality dynamics. TFL also employs the same outer product on the augmented unimodal as in our OPCNN.</p>
<p>We first illustrate the idea of TFL strategy to fuse multimodal data at the tensor level. For our studies, we need to build a TFL that disentangles unimodal and bimodal dynamics. Given representative feature vectors <inline-formula id="inf19">
<mml:math id="m20">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mn>50</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf20">
<mml:math id="m21">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mn>50</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> associated with the chemical feature vector <inline-formula id="inf21">
<mml:math id="m22">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mn>13</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and the target-based feature vector <inline-formula id="inf22">
<mml:math id="m23">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mn>34</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> in different modalities, TFL calculates the outer product on the augmented unimodal using the <xref ref-type="disp-formula" rid="e1">Eq. 1</xref>. However, as seen from <xref ref-type="fig" rid="F2">Figure&#x20;2</xref>, <inline-formula id="inf23">
<mml:math id="m24">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mn>50</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf24">
<mml:math id="m25">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mn>50</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> are obtained slightly differently for the early fusion and intermediate fusion. Thus, TFL also produces two sets of information: the bimodal interactions in the form of two-dimensional tensor and the raw unimodal representations of the modalities. The tensor calculated by TFL is fed into a FC layer after being flattened. It is noted that TFL introduces no learnable parameters. Although TFL yields the high dimensional output tensor, chances of overfitting are low (<xref ref-type="bibr" rid="B25">Zadeh et&#x20;al., 2017</xref>).</p>
<p>We now briefly illustrate the idea of MCF strategy which consists of four steps. Given representative feature vectors <inline-formula id="inf25">
<mml:math id="m26">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mn>50</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf26">
<mml:math id="m27">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mn>50</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, we first project <inline-formula id="inf27">
<mml:math id="m28">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf28">
<mml:math id="m29">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> to a lower dimensional space using projection matrices <inline-formula id="inf29">
<mml:math id="m30">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">W</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>50</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf30">
<mml:math id="m31">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">W</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>50</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>.<disp-formula id="e2">
<mml:math id="m32">
<mml:mrow>
<mml:mi mathvariant="bold-italic">v</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">W</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:msup>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mi>d</mml:mi>
</mml:msup>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi mathvariant="bold-italic">c</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">W</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:msup>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mi>d</mml:mi>
</mml:msup>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>where <inline-formula id="inf31">
<mml:math id="m33">
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mo>&#x2264;</mml:mo>
<mml:mn>50</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>. As in TFL, <inline-formula id="inf32">
<mml:math id="m34">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mn>50</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and are obtained slightly differently for early fusion and intermediate fusion. Second, we construct circulant matrices <inline-formula id="inf33">
<mml:math id="m35">
<mml:mrow>
<mml:mi mathvariant="bold-italic">A</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf34">
<mml:math id="m36">
<mml:mrow>
<mml:mi mathvariant="bold-italic">B</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> using the projection vector <inline-formula id="inf35">
<mml:math id="m37">
<mml:mrow>
<mml:mi mathvariant="bold-italic">v</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mi>d</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf36">
<mml:math id="m38">
<mml:mrow>
<mml:mi mathvariant="bold-italic">c</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mi>d</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>.<disp-formula id="e3">
<mml:math id="m39">
<mml:mrow>
<mml:mi mathvariant="bold-italic">A</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>c</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi mathvariant="bold-italic">v</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi mathvariant="bold-italic">B</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>c</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi mathvariant="bold-italic">c</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>where <inline-formula id="inf37">
<mml:math id="m40">
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>c</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi mathvariant="bold-italic">b</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> denotes converting <inline-formula id="inf38">
<mml:math id="m41">
<mml:mrow>
<mml:mi mathvariant="bold-italic">b</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> to a circulant matrix. Third, we calculate in one of two ways: matrix multiplication between circulant matrix and projection vector to make elements in this matrix and vector fully interact. Two ways are illustrated in <xref ref-type="disp-formula" rid="e4">Eqs. 4</xref>, <xref ref-type="disp-formula" rid="e5">5</xref>.<disp-formula id="e4">
<mml:math id="m42">
<mml:mrow>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="bold-italic">Ac</mml:mi>
<mml:mo>,</mml:mo>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi mathvariant="bold-italic">g</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="bold-italic">Bv</mml:mi>
<mml:mi mathvariant="bold-italic">&#xa0;</mml:mi>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>
<disp-formula id="e5">
<mml:math id="m43">
<mml:mrow>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mi>d</mml:mi>
</mml:mfrac>
<mml:munderover>
<mml:mstyle displaystyle="true">
<mml:mo>&#x2211;</mml:mo>
</mml:mstyle>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>d</mml:mi>
</mml:munderover>
<mml:msub>
<mml:mi mathvariant="bold-italic">a</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2299;</mml:mo>
<mml:mi mathvariant="bold-italic">c</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">&#xa0;</mml:mi>
<mml:mi mathvariant="bold-italic">g</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mi>d</mml:mi>
</mml:mfrac>
<mml:munderover>
<mml:mstyle displaystyle="true">
<mml:mo>&#x2211;</mml:mo>
</mml:mstyle>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>d</mml:mi>
</mml:munderover>
<mml:msub>
<mml:mi mathvariant="bold-italic">b</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2299;</mml:mo>
<mml:mi mathvariant="bold-italic">v</mml:mi>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
<label>(5)</label>
</disp-formula>Here, <inline-formula id="inf39">
<mml:math id="m44">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">a</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf40">
<mml:math id="m45">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">b</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> are column vectors of circulant matrices <inline-formula id="inf41">
<mml:math id="m46">
<mml:mrow>
<mml:mi mathvariant="bold-italic">A</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf42">
<mml:math id="m47">
<mml:mrow>
<mml:mi mathvariant="bold-italic">B</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, respectively. <inline-formula id="inf43">
<mml:math id="m48">
<mml:mrow>
<mml:mo>&#x2299;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> denotes the operation of element-wise product. It is noted that we introduce no new parameters in the multiplication operation. Finally, we calculate target vector <inline-formula id="inf44">
<mml:math id="m49">
<mml:mrow>
<mml:mi mathvariant="bold-italic">m</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mi>k</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> using <inline-formula id="inf45">
<mml:math id="m50">
<mml:mrow>
<mml:mi mathvariant="bold-italic">f</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf46">
<mml:math id="m51">
<mml:mrow>
<mml:mi mathvariant="bold-italic">g</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, and a projection matrix <inline-formula id="inf47">
<mml:math id="m52">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">W</mml:mi>
<mml:mn>3</mml:mn>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>.<disp-formula id="e6">
<mml:math id="m53">
<mml:mrow>
<mml:mi mathvariant="bold-italic">m</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">W</mml:mi>
<mml:mn>3</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mo>&#x2295;</mml:mo>
<mml:mi mathvariant="bold-italic">g</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mi>k</mml:mi>
</mml:msup>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
<label>(6)</label>
</disp-formula>Here, <inline-formula id="inf48">
<mml:math id="m54">
<mml:mrow>
<mml:mo>&#x2295;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> denotes the operation of element-wise addition.</p>
</sec>
<sec id="s2-2-4">
<title>Imbalanced Data Learning</title>
<p>Since the ratio of passed drugs to failed drugs in clinical trials is highly imbalanced, the class-imbalance problem occurs. There are generally three types of methods to deal with the imbalance data learning (<xref ref-type="bibr" rid="B23">Wang et&#x20;al., 2019</xref>). We briefly illustrate the methods to be actually used in the study. 1) Sampling method: an intuitive way to cope with the imbalanced distribution of the data is to balance class distributions via resampling, which could oversample the minority class and undersample the majority class. One advanced sampling method called synthetic minority oversampling technique (SMOTE) creates artificial examples through interpolating neighboring data points (<xref ref-type="bibr" rid="B4">Chawla et&#x20;al., 2002</xref>). Several variants of this technique have been proposed. However, oversampling can lead to overfitting due to repeatedly visiting the existing minority samples. On the other hand, undersampling can discard potentially useful information in majority samples. 2) Cost-sensitive learning method: instead of balancing class distributions via sampling methods, this method aims at coping with the abovementioned issues by directly imposing a heavier cost on misclassifying the minority class. However, what types of cost to use in different problem settings is still an open problem. In this study, we use the cost-sensitive learning method using the class weights (CWs) <inline-formula id="inf49">
<mml:math id="m55">
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>/</mml:mo>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>n</mml:mi>
<mml:mo>&#x2b;</mml:mo>
</mml:msub>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf50">
<mml:math id="m56">
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>/</mml:mo>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>n</mml:mi>
<mml:mo>&#x2212;</mml:mo>
</mml:msub>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> for the positive and negative classes, respectively. Recall that the majority class is the positive class and the minority class is the negative class in the study. Here, <inline-formula id="inf51">
<mml:math id="m57">
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> represents the size of training dataset and <inline-formula id="inf52">
<mml:math id="m58">
<mml:mrow>
<mml:msub>
<mml:mi>n</mml:mi>
<mml:mo>&#x2b;</mml:mo>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf53">
<mml:math id="m59">
<mml:mrow>
<mml:msub>
<mml:mi>n</mml:mi>
<mml:mo>&#x2212;</mml:mo>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> represent the sizes of the positive and negative classes, respectively. 3) Hybrid method: this is an approach that combines aforementioned two methods. In the study, we use the combination of SMOTE and CW techniques.</p>
</sec>
</sec>
<sec id="s2-3">
<title>Classification Evaluation Metrics</title>
<p>To evaluate binary classifications, we can employ various statistical metrics, accordingly to the goal of the experiment we are performing. Accuracy and F1-score have been among the most quintessential metrics for binary classification problems. Accuracy is a valid evaluation metric for classification problems which are well balanced and not skewed or no class imbalance. In general, accuracy can dangerously show overoptimistic inflated results, especially on imbalanced datasets. F1-score is the harmonic mean of precision and recall, and thus F1-score maintains a balance between the precision and recall for classifier. F1-score is a measure of accuracy, which takes both false positives and false negatives into account. F1-score is usually more useful than accuracy especially for imbalanced classification. Precision and recall are two extremely important model evaluation metrics. While precision measures the probability of correct detection of positive values, recall measures the ability to distinguish between the classes. Area under the curve (AUC) of the receiver operating characteristic (ROC) and the area under the precision&#x2013;recall curve (AUPRC) are ranking order metrics. AUPRC is often used as evaluation metrics for imbalanced classes. AUPRC is preferred over AUC. When comparing performance of classifiers that need to deal with imbalanced data, F1-score, precision&#x2013;recall, and AUPRC are often used out of convenience (<xref ref-type="bibr" rid="B3">Brabec et&#x20;al., 2020</xref>).</p>
<p>The use of inadequate performance metrics, such as accuracy, lead to poor generalization results because the classifiers tend to predict the largest size class. Matthews correlation coefficient (MCC) is widely used in biomedicine as a performance metric. The MCC is a more reliable statistical measure which produces a high score only if the prediction obtained good results in all of the four confusion matrix categories (true positives, false negatives, true negatives, and false positives), proportional to both the size of positive elements and the size of negative elements in the dataset (<xref ref-type="bibr" rid="B5">Chicco and Jurman, 2020</xref>; <xref ref-type="bibr" rid="B11">Ietswaart et&#x20;al., 2020</xref>). MCC is easier to interpret as a correlation coefficient since it takes a value in the interval <inline-formula id="inf54">
<mml:math id="m60">
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mn>1</mml:mn>
<mml:mo>]</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, with 1 showing a perfect classifier, &#x2013;1 showing a perverse classifier, and 0 showing that the prediction is uncorrelated with the ground truth. MCC is a very good metric for the imbalanced classification and can be safely used for even classes that are very different in sizes. It is also shown that MCC produces a more informative and truthful score in evaluating binary classifications than accuracy and F1-score (<xref ref-type="bibr" rid="B5">Chicco and Jurman, 2020</xref>). We prefer to use MCC to assess classification performance in this&#x20;study.</p>
<p>The performance of the prediction models of successes and failures of clinical trials is evaluated using the following statistical metrics: TN (true negative), FN (false negative), TP (true positive), FP (false positive), PR (precision), RE (recall), ACC (accuracy), F1-score, MCC, AUC, and AUPRC, which are defined in the following equations:<disp-formula id="e7">
<mml:math id="m61">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>R</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(7)</label>
</disp-formula>
<disp-formula id="e8">
<mml:math id="m62">
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>E</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(8)</label>
</disp-formula>
<disp-formula id="e9">
<mml:math id="m63">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>C</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>T</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>T</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(9)</label>
</disp-formula>
<disp-formula id="e10">
<mml:math id="m64">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>s</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>R</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>R</mml:mi>
<mml:mi>E</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>R</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>R</mml:mi>
<mml:mi>E</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:math>
<label>(10)</label>
</disp-formula>
<disp-formula id="e11">
<mml:math id="m65">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>C</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>T</mml:mi>
<mml:mi>N</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:msqrt>
<mml:mrow>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>T</mml:mi>
<mml:mi>N</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>T</mml:mi>
<mml:mi>N</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msqrt>
</mml:mfrac>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
<label>(11)</label>
</disp-formula>
</p>
</sec>
</sec>
<sec id="s3">
<title>Experiments and Results</title>
<p>As mentioned before, we use the same dataset as in Gayvert et&#x20;al. (<xref ref-type="bibr" rid="B8">Gayvert et&#x20;al., 2016</xref>), which consists of 757 passed drugs for positive class and 71 failed drugs for negative class. We notice that the dataset is imbalanced. The imbalance ratio of majority to minority compounds is 10.662. The dataset used may not have enough samples for the use of deep learning. We use 10-fold cross-validation techniques to evaluate classification models. The folds are stratified based on drugs. That is to say, all experiments of a single drug are either completely in the training set or completely in the test set. Thus, a model is expected to predict the clinical outcomes of previously unseen drugs at test time. We conduct these 10-fold cross-validation experiments, randomly splitting ten folds. To obtain reliable performance results, we repeat the cross-validation 20&#x20;times for each model on the dataset, and report the mean and standard deviation for each metric.</p>
<p>We select OPCNN as a good model for this particular data. Early experiments with different models did not yield meaningful results. To take into account the class imbalance, we use cost-sensitive learning and hybrid methods. We use binary cross entropy (BCE) as the loss function. We investigate the effect of employing weighted BCE and SMOTE to address the imbalance in our training dataset. Adam optimizer is used for training the neural networks. While the learning rate for Adam optimizer is tuned separately for each model and dataset pair, the same set of hyperparameters is used across the folds. We select hyperparameters such as the number of layers and the number of nodes for OPCNN and DMNN, which provide the best MCC value based on a 10-fold cross-validation.</p>
<p>Deep learning models are likely to overfit the training data since the data used do not have sufficient samples. Therefore, we consider two conventional machine learning models such as SVM and random forest for comparison since these models alleviate overfitting by ensemble and regularization techniques, respectively. 47 input features are first concatenated to be used as inputs of these two models. For the case of SVM, the polynomial kernel of degree 3 and penalty constant <inline-formula id="inf55">
<mml:math id="m66">
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>10</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> are selected. It is because this combination provides the best MCC value based on a 10-fold cross-validation. We have tried with several polynomial degrees and <inline-formula id="inf56">
<mml:math id="m67">
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> values to determine the best combination. We have also tried with several kernel parameter values of Gaussian kernel and <inline-formula id="inf57">
<mml:math id="m68">
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> values. For the case of random forest, the number of trees is selected as 100, which provides the best MCC value based on 10-fold cross validation. We have decided it by increasing the number of trees from 10 to 150 in increments of 10. When looking for the best split, the number of input features to be considered is determined as <inline-formula id="inf58">
<mml:math id="m69">
<mml:mrow>
<mml:msqrt>
<mml:mn>47</mml:mn>
</mml:msqrt>
</mml:mrow>
</mml:math>
</inline-formula>, where the number of input features is&#x20;47.</p>
<p>To statistically evaluate the significant improvement of our OPCNN, we utilize the two sided <inline-formula id="inf59">
<mml:math id="m70">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>-test. We basically compare the&#x20;model with the best performance result to other models. For&#x20;all evaluation metrics, the value for the best-performing&#x20;model&#x20;is&#x20;highlighted in bold font. Therefore, the&#x20;null&#x20;hypotheses&#x20;associated with <xref ref-type="table" rid="T1">Table&#x20;1</xref> are given as follows:&#x20;<inline-formula id="inf60">
<mml:math id="m71">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>:</mml:mo>
<mml:mi>A</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mtext>best&#xa0;model</mml:mtext>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>A</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mtext>other&#xa0;model</mml:mtext>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf61">
<mml:math id="m72">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>:</mml:mo>
<mml:mi>F</mml:mi>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>s</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mtext>best&#xa0;model</mml:mtext>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>s</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mtext>other&#xa0;model</mml:mtext>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> <inline-formula id="inf62">
<mml:math id="m73">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>:</mml:mo>
<mml:mi>M</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mtext>best&#xa0;model</mml:mtext>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>M</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mtext>other&#xa0;model</mml:mtext>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf63">
<mml:math id="m74">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>:</mml:mo>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mtext>best&#xa0;model</mml:mtext>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mtext>other&#xa0;model</mml:mtext>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> <inline-formula id="inf64">
<mml:math id="m75">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>:</mml:mo>
<mml:mi>R</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mtext>best&#xa0;model</mml:mtext>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>R</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mtext>other&#xa0;model</mml:mtext>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf65">
<mml:math id="m76">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>:</mml:mo>
<mml:mi>A</mml:mi>
<mml:mi>U</mml:mi>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mtext>best&#xa0;model</mml:mtext>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>A</mml:mi>
<mml:mi>U</mml:mi>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mtext>other&#xa0;model</mml:mtext>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mi mathvariant="double-struck">S</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula id="inf66">
<mml:math id="m77">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>:</mml:mo>
<mml:mi>A</mml:mi>
<mml:mi>U</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>R</mml:mi>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mtext>best&#xa0;model</mml:mtext>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>A</mml:mi>
<mml:mi>U</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>R</mml:mi>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mtext>other&#xa0;model</mml:mtext>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. As seen from <xref ref-type="table" rid="T1">Table&#x20;1</xref>, the best model is OPCNN base model for the other five metrics except precision and recall. The relevant <inline-formula id="inf67">
<mml:math id="m78">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>-values less than 0.05 are given one asterisk, <inline-formula id="inf68">
<mml:math id="m79">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>-values less than 0.01 are given two asterisks, and <inline-formula id="inf69">
<mml:math id="m80">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>-values less than 0.001 are given three asterisks.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Classification results for various prediction models via a 10-fold cross-validation.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Multimodal<break/>learning</th>
<th align="center">Model</th>
<th align="center">ACC</th>
<th align="center">F1-score</th>
<th align="center">MCC</th>
<th align="center">Precision</th>
<th align="center">Recall</th>
<th align="center">AUC</th>
<th align="center">AUPRC</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="3" align="left">SVM<break/>Early fusion<break/>Concatenation</td>
<td align="left">Base</td>
<td align="center">0.8308<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0053)</td>
<td align="center">0.9055<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0031)</td>
<td align="center">0.1796<sup>&#x2a;&#x2a;&#x2a;</sup> (0.014)</td>
<td align="center">0.9253<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0017)</td>
<td align="center">0.8864<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0054)</td>
<td align="center">0.5622<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0099)</td>
<td align="center">0.9578<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0099)</td>
</tr>
<tr>
<td align="left">CW</td>
<td align="center">0.8152<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0075)</td>
<td align="center">0.8956<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0046)</td>
<td align="center">0.1837<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0149)</td>
<td align="center">0.9263<sup>&#x2a;&#x2a;&#x2a;</sup> (0.002)</td>
<td align="center">0.8669<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0079)</td>
<td align="center">0.5658<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0114)</td>
<td align="center">0.9574<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0011)</td>
</tr>
<tr>
<td align="left">SMOTE &#x2b; CW</td>
<td align="center">0.7748<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0077)</td>
<td align="center">0.8687<sup>&#x2a;&#x2a;&#x2a;</sup> (0.005)</td>
<td align="center">0.1975<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0157)</td>
<td align="center">0.9297<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0028)</td>
<td align="center">0.8153<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0085)</td>
<td align="center">0.5791<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0145)</td>
<td align="center">0.9569<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0015)</td>
</tr>
<tr>
<td rowspan="3" align="left">Random<break/>forest<break/>Early fusion<break/>Concatenation</td>
<td align="left">Base</td>
<td align="center">0.9149<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0018)</td>
<td align="center">0.9551<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0009)</td>
<td align="center">0.2018<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0303)</td>
<td align="center">0.9206<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0013)</td>
<td align="center">0.9924<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0016)</td>
<td align="center">0.7019<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0058)</td>
<td align="center">0.9532<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0015)</td>
</tr>
<tr>
<td align="left">CW</td>
<td align="center">0.9156<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0022)</td>
<td align="center">0.9556<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0011)</td>
<td align="center">0.1865<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0367)</td>
<td align="center">0.9193<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0012)</td>
<td align="center">
<bold>0.9950</bold> (0.0018)</td>
<td align="center">0.7125<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0055)</td>
<td align="center">0.9565<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0021)</td>
</tr>
<tr>
<td align="left">SMOTE &#x2b; CW</td>
<td align="center">0.8949<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0026)</td>
<td align="center">0.9435<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0014)</td>
<td align="center">0.2484<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0156)</td>
<td align="center">0.9285<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0012)</td>
<td align="center">0.9689<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0023)</td>
<td align="center">0.7045<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0052)</td>
<td align="center">0.9577<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0015)</td>
</tr>
<tr>
<td rowspan="3" align="left">OPCNN</td>
<td align="left">Base</td>
<td align="center">
<bold>0.9758</bold> (0.0067)</td>
<td align="center">
<bold>0.9868</bold> (0.0037)</td>
<td align="center">
<bold>0.8451</bold> (0.0424)</td>
<td align="center">0.9844<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0050)</td>
<td align="center">0.9893<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0058)</td>
<td align="center">
<bold>0.9824</bold> (0.0102)</td>
<td align="center">
<bold>0.9979</bold> (0.0015)</td>
</tr>
<tr>
<td align="left">CW</td>
<td align="center">0.9539<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0249)</td>
<td align="center">0.9743<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0144)</td>
<td align="center">0.7620<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0854)</td>
<td align="center">0.9866<sup>&#x2a;&#x2a;</sup> (0.0041)</td>
<td align="center">0.9628<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0282)</td>
<td align="center">0.9653<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0247)</td>
<td align="center">0.9952<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0045)</td>
</tr>
<tr>
<td align="left">SMOTE &#x2b; CW</td>
<td align="center">0.9329<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0338)</td>
<td align="center">0.9619<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0201)</td>
<td align="center">0.7012<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0909)</td>
<td align="center">0.9889 (0.0048)</td>
<td align="center">0.9373<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0379)</td>
<td align="center">0.9583<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0177)</td>
<td align="center">0.9583<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0177)</td>
</tr>
<tr>
<td rowspan="3" align="left">DMNN<break/>Early fusion<break/>Addition</td>
<td align="left">Base</td>
<td align="center">0.9653<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0038)</td>
<td align="center">0.9811<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0021)</td>
<td align="center">0.7727<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0250)</td>
<td align="center">0.9760<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0037)</td>
<td align="center">0.9863<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0041)</td>
<td align="center">0.9717<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0061)</td>
<td align="center">0.9968<sup>&#x2a;&#x2a;</sup> (0.0010)</td>
</tr>
<tr>
<td align="left">CW</td>
<td align="center">0.9492<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0075)</td>
<td align="center">0.9719<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0042)</td>
<td align="center">0.7238<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0368)</td>
<td align="center">0.9843<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0039)</td>
<td align="center">0.9598<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0065)</td>
<td align="center">0.9660<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0080)</td>
<td align="center">0.9961<sup>&#x2a;&#x2a;</sup> (0.0011)</td>
</tr>
<tr>
<td align="left">SMOTE &#x2b; CW</td>
<td align="center">0.9309<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0073)</td>
<td align="center">0.9612<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0043)</td>
<td align="center">0.6740<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0228)</td>
<td align="center">0.9871<sup>&#x2a;&#x2a;</sup> (0.0032)</td>
<td align="center">0.9367<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0089)</td>
<td align="center">0.9551<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0070)</td>
<td align="center">0.9944<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0011)</td>
</tr>
<tr>
<td rowspan="3" align="left">DMNN<break/>Early fusion<break/>Product</td>
<td align="left">Base</td>
<td align="center">0.9669<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0026)</td>
<td align="center">0.9819<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0014)</td>
<td align="center">0.7880<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0175)</td>
<td align="center">0.9798<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0030)</td>
<td align="center">0.9840<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0031)</td>
<td align="center">0.9748<sup>&#x2a;&#x2a;</sup> (0.0055)</td>
<td align="center">0.9972 (0.0012)</td>
</tr>
<tr>
<td align="left">CW</td>
<td align="center">0.9449<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0085)</td>
<td align="center">0.9694<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0048)&#x3c;</td>
<td align="center">0.7111<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0358)</td>
<td align="center">0.9849<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0028)</td>
<td align="center">0.9544<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0080)</td>
<td align="center">0.9678<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0073)</td>
<td align="center">0.9964<sup>&#x2a;&#x2a;</sup> (0.0011)</td>
</tr>
<tr>
<td align="left">SMOTE &#x2b; CW</td>
<td align="center">0.9170<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0090)</td>
<td align="center">0.9529<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0054)</td>
<td align="center">0.6465<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0265)</td>
<td align="center">0.9898 (0.0024)</td>
<td align="center">0.9187<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0095)</td>
<td align="center">0.9560<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0108)</td>
<td align="center">0.9936<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0061)</td>
</tr>
<tr>
<td rowspan="3" align="left">DMNN<break/>Early fusion<break/>Concatenation</td>
<td align="left">Base</td>
<td align="center">0.9652<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0038)</td>
<td align="center">0.9810<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0021)</td>
<td align="center">0.7715<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0261)</td>
<td align="center">0.9761<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0035)</td>
<td align="center">0.9861<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0031)</td>
<td align="center">0.9751<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0063)</td>
<td align="center">0.9973 (0.0008)</td>
</tr>
<tr>
<td align="left">CW</td>
<td align="center">0.9473<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0057)</td>
<td align="center">0.9708<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0032)</td>
<td align="center">0.7125<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0293)</td>
<td align="center">0.9831<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0031)</td>
<td align="center">0.9589<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0043)</td>
<td align="center">0.9662<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0071)</td>
<td align="center">0.9963<sup>&#x2a;&#x2a;</sup> (0.0010)</td>
</tr>
<tr>
<td align="left">SMOTE &#x2b; CW</td>
<td align="center">0.9345<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0069)</td>
<td align="center">0.9634<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0039)</td>
<td align="center">0.6786<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0301)</td>
<td align="center">0.9855<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0036)</td>
<td align="center">0.9422<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0064)</td>
<td align="center">0.9569<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0097)</td>
<td align="center">0.9943<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0019)</td>
</tr>
<tr>
<td rowspan="3" align="left">DMNN<break/>Early fusion<break/>TFL</td>
<td align="left">Base</td>
<td align="center">0.9652<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0053)</td>
<td align="center">0.9811<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0029)</td>
<td align="center">0.7700<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0365)</td>
<td align="center">0.9753<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0039)</td>
<td align="center">0.9869<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0031)</td>
<td align="center">0.9748<sup>&#x2a;&#x2a;</sup> (0.0064)</td>
<td align="center">0.9971<sup>&#x2a;</sup> (0.0009)</td>
</tr>
<tr>
<td align="left">CW</td>
<td align="center">0.9512<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0072)</td>
<td align="center">0.9731<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0041)</td>
<td align="center">0.7252<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0324)</td>
<td align="center">0.9822<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0028)</td>
<td align="center">0.9641<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0070)</td>
<td align="center">0.9663<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0072)</td>
<td align="center">0.9963<sup>&#x2a;&#x2a;</sup> (0.0009)</td>
</tr>
<tr>
<td align="left">SMOTE &#x2b; CW</td>
<td align="center">0.9172<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0113)</td>
<td align="center">0.9531<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0067)</td>
<td align="center">0.6387<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0295)</td>
<td align="center">0.9874<sup>&#x2a;</sup> (0.0032)</td>
<td align="center">0.9212<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0128)</td>
<td align="center">0.9535<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0085)</td>
<td align="center">0.9943<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0015)</td>
</tr>
<tr>
<td rowspan="3" align="left">DMNN<break/>Early fusion<break/>MCF</td>
<td align="left">Base</td>
<td align="center">0.9582<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0048)</td>
<td align="center">0.9773<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0026)</td>
<td align="center">0.7219<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0309)</td>
<td align="center">0.9703<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0028)</td>
<td align="center">0.9845<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0041)</td>
<td align="center">0.9635<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0091)</td>
<td align="center">0.9959<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0016)</td>
</tr>
<tr>
<td align="left">CW</td>
<td align="center">0.9325<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0133)</td>
<td align="center">0.9625<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0076)</td>
<td align="center">0.6429<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0512)</td>
<td align="center">0.9768<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0040)</td>
<td align="center">0.9487<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0131)</td>
<td align="center">0.9454<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0131)</td>
<td align="center">0.9939<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0017)</td>
</tr>
<tr>
<td align="left">SMOTE &#x2b; CW</td>
<td align="center">0.8958<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0122)</td>
<td align="center">0.9407<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0073)</td>
<td align="center">0.5616<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0345)</td>
<td align="center">0.9798<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0037)</td>
<td align="center">0.9046<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0126)</td>
<td align="center">0.9331<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0105)</td>
<td align="center">0.9919<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0021)</td>
</tr>
<tr>
<td rowspan="3" align="left">DMNN<break/>Intermediate fusion<break/>Addition</td>
<td align="left">Base</td>
<td align="center">0.9582<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0041)</td>
<td align="center">0.9773<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0023)</td>
<td align="center">0.7212<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0261)</td>
<td align="center">0.9701<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0033)</td>
<td align="center">0.9845<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0041)</td>
<td align="center">0.9630<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0065)</td>
<td align="center">0.9959<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0008)</td>
</tr>
<tr>
<td align="left">CW</td>
<td align="center">0.9315<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0075)</td>
<td align="center">0.9620<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0043)</td>
<td align="center">0.6335<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0298)</td>
<td align="center">0.9757<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0036)</td>
<td align="center">0.9487<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0083)</td>
<td align="center">0.9429<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0090)</td>
<td align="center">0.9934<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0014)</td>
</tr>
<tr>
<td align="left">SMOTE &#x2b; CW</td>
<td align="center">0.9297<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0081)</td>
<td align="center">0.9607<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0047)</td>
<td align="center">0.6522<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0291)</td>
<td align="center">0.9820<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0029)</td>
<td align="center">0.9404<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0083)</td>
<td align="center">0.9462<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0065)</td>
<td align="center">0.9934<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0010)</td>
</tr>
<tr>
<td rowspan="3" align="left">DMNN<break/>Intermediate fusion<break/>Product</td>
<td align="left">Base</td>
<td align="center">0.9484<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0047)</td>
<td align="center">0.9716<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0026)</td>
<td align="center">0.6983<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0265)</td>
<td align="center">0.9774<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0033)</td>
<td align="center">0.9659<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0042)</td>
<td align="center">0.9638<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0079)</td>
<td align="center">0.9960<sup>&#x2a;&#x2a;</sup> (0.0015)</td>
</tr>
<tr>
<td align="left">CW</td>
<td align="center">0.9311<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0084)</td>
<td align="center">0.9614<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0048)</td>
<td align="center">0.6715<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0338)</td>
<td align="center">0.9863<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0035)</td>
<td align="center">0.9377<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0078)</td>
<td align="center">0.9636<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0093)</td>
<td align="center">0.9959<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0014)</td>
</tr>
<tr>
<td align="left">SMOTE &#x2b; CW</td>
<td align="center">0.9203<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0076)</td>
<td align="center">0.9549<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0045)</td>
<td align="center">0.6518<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0216)</td>
<td align="center">0.9890 (0.0026)</td>
<td align="center">0.9231<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0087)</td>
<td align="center">0.9632<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0069)</td>
<td align="center">0.9958<sup>&#x2a;&#x2a;</sup> (0.0012)</td>
</tr>
<tr>
<td rowspan="3" align="left">DMNN<break/>Intermediate fusion<break/>Concatenation</td>
<td align="left">Base</td>
<td align="center">0.9574<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0059)</td>
<td align="center">0.9769<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0032)</td>
<td align="center">0.7173<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0390)</td>
<td align="center">0.9701<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0041)</td>
<td align="center">0.9838<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0045)</td>
<td align="center">0.9621<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0082)</td>
<td align="center">0.9958<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0011)</td>
</tr>
<tr>
<td align="left">CW</td>
<td align="center">0.9362<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0122)</td>
<td align="center">0.9646<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0070)</td>
<td align="center">0.6542<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0463)</td>
<td align="center">0.9767<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0036)</td>
<td align="center">0.9529<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0126)</td>
<td align="center">0.9522<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0097)</td>
<td align="center">0.9947<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0013)</td>
</tr>
<tr>
<td align="left">SMOTE &#x2b; CW</td>
<td align="center">0.9265<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0098)</td>
<td align="center">0.9588<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0057)</td>
<td align="center">0.6400<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0347)</td>
<td align="center">0.9811<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0033)</td>
<td align="center">0.9376<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0101)</td>
<td align="center">0.9461<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0083)</td>
<td align="center">0.9934<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0014)</td>
</tr>
<tr>
<td rowspan="3" align="left">DMNN<break/>Intermediate fusion<break/>TFL</td>
<td align="left">Base</td>
<td align="center">0.9652<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0053)</td>
<td align="center">0.9810<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0029)</td>
<td align="center">0.7774<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0324)</td>
<td align="center">0.9787<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0035)</td>
<td align="center">0.9834<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0044)</td>
<td align="center">0.9678<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0096)</td>
<td align="center">0.9964<sup>&#x2a;&#x2a;</sup> (0.0013)</td>
</tr>
<tr>
<td align="left">CW</td>
<td align="center">0.9457<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0054)</td>
<td align="center">0.9699<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0031)</td>
<td align="center">0.7068<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0242)</td>
<td align="center">0.9831<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0029)</td>
<td align="center">0.9571<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0058)</td>
<td align="center">0.9632<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0091)</td>
<td align="center">0.9958<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0013)</td>
</tr>
<tr>
<td align="left">SMOTE &#x2b; CW</td>
<td align="center">0.9286<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0112)&#x3c;</td>
<td align="center">0.9598<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0065)</td>
<td align="center">0.6740<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0334)</td>
<td align="center">0.9887 (0.0025)&#x3c;</td>
<td align="center">0.9325<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0122)</td>
<td align="center">0.9597<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0098)</td>
<td align="center">0.9950<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0016)</td>
</tr>
<tr>
<td rowspan="3" align="left">DMNN<break/>Intermediate fusion<break/>MCF</td>
<td align="left">Base</td>
<td align="center">0.9464<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0042)</td>
<td align="center">0.9706<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0023)</td>
<td align="center">0.6770<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0255)</td>
<td align="center">0.9736<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0034)</td>
<td align="center">0.9677<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0038)</td>
<td align="center">0.9550<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0067)</td>
<td align="center">0.9948<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0011)</td>
</tr>
<tr>
<td align="left">CW</td>
<td align="center">0.9225<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0073)</td>
<td align="center">0.9564<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0043)</td>
<td align="center">0.6379<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0225)</td>
<td align="center">0.9836<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0025)</td>
<td align="center">0.9307<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0082)</td>
<td align="center">0.9541<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0111)</td>
<td align="center">0.9947<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0018)</td>
</tr>
<tr>
<td align="left">SMOTE &#x2b; CW</td>
<td align="center">0.9057<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0136)</td>
<td align="center">0.9464<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0081)</td>
<td align="center">0.6060<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0408)</td>
<td align="center">0.9857<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0035)</td>
<td align="center">0.9101<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0129)</td>
<td align="center">0.9505<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0109)</td>
<td align="center">0.9943<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0016)</td>
</tr>
<tr>
<td rowspan="3" align="left">DMNN<break/>Late fusion<break/>Concatenation</td>
<td align="left">Base</td>
<td align="center">0.9432<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0050)</td>
<td align="center">0.9691<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0028)</td>
<td align="center">0.6276<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0310)</td>
<td align="center">0.9633<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0036)</td>
<td align="center">0.9750<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0047)</td>
<td align="center">0.9414<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0084)</td>
<td align="center">0.9934<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0012)</td>
</tr>
<tr>
<td align="left">CW</td>
<td align="center">0.8990<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0107)</td>
<td align="center">0.9429<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0062)</td>
<td align="center">0.5476<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0405)</td>
<td align="center">0.9750<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0049)</td>
<td align="center">0.9130<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0090)</td>
<td align="center">0.9228<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0153)</td>
<td align="center">0.9912<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0020)</td>
</tr>
<tr>
<td align="left">SMOTE &#x2b; CW</td>
<td align="center">0.9005<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0054)</td>
<td align="center">0.9434<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0033)</td>
<td align="center">0.5835<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0181)</td>
<td align="center">0.9832<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0036)</td>
<td align="center">0.9067<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0070)</td>
<td align="center">0.9381<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0057)</td>
<td align="center">0.9931<sup>&#x2a;&#x2a;&#x2a;</sup> (0.0009)</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>
<xref ref-type="table" rid="T1">Table&#x20;1</xref> shows the comparison of various prediction models via a 10-fold cross-validation, each of which is trained based on the imbalanced training dataset with or without balancing the class frequencies. We calculate means and standard deviations of the ACC, F1-score, MCC, precision, recall, AUC, and AUPRC. Boldfaced values indicate best performance result. Standard errors are given in parenthesis. As seen from <xref ref-type="table" rid="T1">Table&#x20;1</xref>, OPCNN and DMNN models overall show better results than SVM and RF for all evaluation metrics except recall. The OPCNN base model shows the highest ACC, F1-score, MCC, AUC, and AUPRC averages, which are 0.9758, 0.9868, 0.8451, 0.9824, and 0.9979, respectively. In particular, OPCNN base model significantly outperforms the other models for both F1-score and MCC that are good metrics for the imbalanced classification. Although OPCNN base model does not show the highest precision and recall averages, it still shows evenly high precision and recall averages. The DMNN base model using product operation at the early fusion step shows the second highest ACC, F1-score, and MCC averages, which are 0.9669, 0.9819, and 0.7880, respectively. If classification successes and errors must be considered together, then the MCC arises as the best choice (<xref ref-type="bibr" rid="B17">Luque et&#x20;al., 2019</xref>). Therefore, we prefer to use MCC to assess classification performance in this study. Compared to other models, the OPCNN base model shows a significantly higher MCC average. To conclude, <xref ref-type="table" rid="T1">Table&#x20;1</xref> shows that OPCNN base model is the best model for predicting successes and failures of clinical trials.</p>
<p>Plotting ROC and precision&#x2013;recall curves is a popular way for discriminatory accuracy visualization of the binary classification models. <xref ref-type="fig" rid="F3">Figure&#x20;3</xref> shows the graph of ROC curves and precision&#x2013;recall curves for three best-performing models in terms of AUC and AUPRC, respectively. Since we replicate the cross-validation 20&#x20;times for each model, we here show curves only for one replication. <xref ref-type="fig" rid="F3">Figure&#x20;3</xref> shows that the OPCNN base model is a better classifier. By the way, <xref ref-type="table" rid="T1">Table&#x20;1</xref> illustrates that AUC averages of these three models differ significantly but AUPRC averages of these three models do not differ significantly.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>ROC and precision&#x2013;recall curves for 10-fold cross-validation. <bold>(A)</bold> ROC curves <bold>(B)</bold> Precision&#x2013;recall curves.</p>
</caption>
<graphic xlink:href="fphar-12-670670-g003.tif"/>
</fig>
</sec>
<sec sec-type="conclusion" id="s4">
<title>Conclusion</title>
<p>In this study, to develop the prediction model of the outcomes of clinical trials of drug candidates, we proposed OPCNN model that employs the augmented outer product to join effectively chemical features of drugs and target-based features. The proposed OPCNN model was evaluated via 10-fold cross-validation techniques on dataset used in Gayvert et&#x20;al. (<xref ref-type="bibr" rid="B8">Gayvert et&#x20;al., 2016</xref>), which consists of 757 approved drugs for positive class and 71 failed drugs for negative class. We observed that the OPCNN base model shows the highest averages of ACC, F1-score, MCC, AUC, and AUPRC. In particular, it is noteworthy that the OPCNN base model showed the highest averages of F1-score, MCC, and AUPRC, which are more reliable metrics for the imbalanced classification. The two-sided <inline-formula id="inf70">
<mml:math id="m81">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>-test showed that F1-score and MCC averages of OPCNN base model are significantly higher than those of the other models. The OPCNN base model also showed evenly high precision and recall averages, even though this model did not show the highest precision and recall averages. The graph of ROC curves and precision&#x2013;recall curves also illustrate that the OPCNN base model is a better classifier.</p>
<p>Although we did not report the experimental results, we also conducted experiments on ensemble models based on RFs, extra trees, and weighted least squares SVM. In addition, we performed experiments on a DMNN using a one-dimensional CNN for each individual modality. OPCNN and DMNN models aforementioned performed much better than those of ensemble models for all of five evaluation metrics. The purpose of this study is to develop an efficient predictive model based on the dataset used in Gayvert et&#x20;al. (<xref ref-type="bibr" rid="B8">Gayvert et&#x20;al., 2016</xref>). The key idea underlying OPCNN is to integrate two modalities using the augmented outer product and to apply CNN to the resulting matrix. We think this idea can be effectively applied to other tasks based on bimodal data and can be extended to multimodal data. The OPCNN model can be further improved by adjusting the architecture of CNN according to the data structure.</p>
<p>A critical issue is that the dataset does not have enough samples for the use of deep learning and particularly has only 71 samples for failure data. Therefore, OPCNN and DMNN could overfit the data since these complex models are likely to detect subtle patterns in the data. Obviously, these patterns will not generalize to new instances. Therefore, we need to apply our OPCNN to a larger dataset and check its efficacy. Furthermore, we need to carefully argue that our OPCNN is an effective approach for predicting successes and failures of clinical trials and can be quite helpful in drug development process.</p>
</sec>
</body>
<back>
<sec id="s5">
<title>Data Availability Statement</title>
<p>The dataset and source code for this paper can be downloaded from the Github repository at <ext-link ext-link-type="uri" xlink:href="https://github.com/sawoo9410/Clinical-Trials-with-OPCNN">https://github.com/sawoo9410/Clinical-Trials-with-OPCNN</ext-link>.</p>
</sec>
<sec id="s6">
<title>Author Contributions</title>
<p>SS and YK performed the experiment. HH, WS, ZH, IS, JS, and CH proposed the problem to tackle. IS, JS, and CH wrote the main manuscript text. All authors reviewed the manuscript.</p>
</sec>
<sec id="s7">
<title>Funding</title>
<p>This research was supported by the Bio and Medical Technology Development Program of the National Research Foundation (NRF) funded by the Korean government (MSIT) (No. 2019M3E5D4066897). This work was supported by the National Research Foundation of Korea (NRF) grant funded by the Korean government (MSIT) (No. 2020R1F1A1A01061099). This work was supported by the Korea Institute of Energy Technology Evaluation and Planning (KETEP) and the Ministry of Trade, Industry and Energy (MOTIE) of the Republic of Korea (No. 20212020800120).</p>
</sec>
<sec sec-type="COI-statement" id="s8">
<title>Conflict of Interest</title>
<p>ZH and IS were employed by the company Arontier.</p>
<p>The remaining authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Baltru&#x161;aitis</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Ahuja</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Morency</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Multimodal Machine Learning: A Survey and Taxonomy</article-title>. <source>IEEE Trans. Pattern Anal. Machine Intelligence</source> <volume>15</volume>, <fpage>850</fpage>&#x2013;<lpage>863</lpage>. </citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bickerton</surname>
<given-names>G. R.</given-names>
</name>
<name>
<surname>Paolini</surname>
<given-names>G. V.</given-names>
</name>
<name>
<surname>Besnard</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Muresan</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Hopkins</surname>
<given-names>A. L.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Quantifying the Chemical beauty of Drugs</article-title>. <source>Nat. Chem.</source> <volume>4</volume>, <fpage>90</fpage>&#x2013;<lpage>98</lpage>. <pub-id pub-id-type="doi">10.1038/nchem.1243</pub-id> </citation>
</ref>
<ref id="B3">
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>Brabec</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Kom&#xe1;rek</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Franc</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Machlica</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>On Model Evaluation under Non-constant Class Imbalance</article-title>. <comment>Available at: <ext-link ext-link-type="uri" xlink:href="https://arxiv.org/pdf/2001.05571">https://arxiv.org/pdf/2001.05571</ext-link>
</comment>. </citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chawla</surname>
<given-names>N. V.</given-names>
</name>
<name>
<surname>Bowyer</surname>
<given-names>K. W.</given-names>
</name>
<name>
<surname>Hall</surname>
<given-names>L. O.</given-names>
</name>
<name>
<surname>Kegelmeyer</surname>
<given-names>W. P.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>SMOTE: Synthetic Minority Over-sampling Technique</article-title>. <source>jair</source> <volume>16</volume>, <fpage>321</fpage>&#x2013;<lpage>357</lpage>. <pub-id pub-id-type="doi">10.1613/jair.953</pub-id> </citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chicco</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Jurman</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>The Advantages of the Matthews Correlation Coefficient (MCC) over F1 Score and Accuracy in Binary Classification Evaluation</article-title>. <source>BMC Genomics</source> <volume>21</volume> (<issue>1</issue>). <pub-id pub-id-type="doi">10.1186/s12864-019-6413-7</pub-id> </citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Feng</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Haase-Schuetz</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Rosenbaum</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Hertlein</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Duffhauss</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Glaeser</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Deep Multi-Modal Object Detection and Semantic Segmentation for Autonomous Driving: Datasets, Methods, and Challenges</article-title>. <source>IEEE Trans. Intell. Transportation Syst.</source> <comment>in press</comment>. </citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gao</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>A Survey on Deep Learning for Multimodal Data Fusion</article-title>. <source>Neural Comput.</source> <volume>32</volume>, <fpage>829</fpage>&#x2013;<lpage>864</lpage>. <pub-id pub-id-type="doi">10.1162/neco_a_01273</pub-id> </citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gayvert</surname>
<given-names>K. M.</given-names>
</name>
<name>
<surname>Madhukar</surname>
<given-names>N. S.</given-names>
</name>
<name>
<surname>Elemento</surname>
<given-names>O.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>A Data-Driven Approach to Predicting Successes and Failures of Clinical Trials</article-title>. <source>Cel. Chem. Biol.</source> <volume>23</volume>, <fpage>1294</fpage>&#x2013;<lpage>1301</lpage>. <pub-id pub-id-type="doi">10.1016/j.chembiol.2016.07.023</pub-id> </citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ghose</surname>
<given-names>A. K.</given-names>
</name>
<name>
<surname>Viswanadhan</surname>
<given-names>V. N.</given-names>
</name>
<name>
<surname>Wendoloski</surname>
<given-names>J.&#x20;J.</given-names>
</name>
</person-group> (<year>1999</year>). <article-title>A Knowledge-Based Approach in Designing Combinatorial or Medicinal Chemistry Libraries for Drug Discovery. 1. A Qualitative and Quantitative Characterization of Known Drug Databases</article-title>. <source>J.&#x20;Comb. Chem.</source> <volume>1</volume>, <fpage>55</fpage>&#x2013;<lpage>68</lpage>. <pub-id pub-id-type="doi">10.1021/cc9800071</pub-id> </citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hay</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Thomas</surname>
<given-names>D. W.</given-names>
</name>
<name>
<surname>Craighead</surname>
<given-names>J.&#x20;L.</given-names>
</name>
<name>
<surname>Economides</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Rosenthal</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Clinical Development success Rates for Investigational Drugs</article-title>. <source>Nat. Biotechnol.</source> <volume>32</volume>, <fpage>40</fpage>&#x2013;<lpage>51</lpage>. <pub-id pub-id-type="doi">10.1038/nbt.2786</pub-id> </citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ietswaart</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Arat</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>A. X.</given-names>
</name>
<name>
<surname>Farahmand</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>DuMouchel</surname>
<given-names>W.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Machine Learning Guided Association of Adverse Drug Reactions with <italic>In Vitro</italic> Target-Based Pharmacology</article-title>. <source>EBioMedicine</source> <volume>57</volume>, <fpage>102837</fpage>. <pub-id pub-id-type="doi">10.1016/j.ebiom.2020.102837</pub-id> </citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ledford</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Translational Research: 4 Ways to Fix the Clinical Trial</article-title>. <source>Nature</source> <volume>477</volume>, <fpage>526</fpage>&#x2013;<lpage>528</lpage>. <pub-id pub-id-type="doi">10.1038/477526a</pub-id> </citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lipinski</surname>
<given-names>C. A.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>Lead- and Drug-like Compounds: the Rule-Of-Five Revolution</article-title>. <source>Drug Discov. Today Tech.</source> <volume>1</volume>, <fpage>337</fpage>&#x2013;<lpage>341</lpage>. <pub-id pub-id-type="doi">10.1016/j.ddtec.2004.11.007</pub-id> </citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lipinski</surname>
<given-names>C. A.</given-names>
</name>
<name>
<surname>Lombardo</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Dominy</surname>
<given-names>B. W.</given-names>
</name>
<name>
<surname>Feeney</surname>
<given-names>P. J.</given-names>
</name>
</person-group> (<year>1997</year>). <article-title>Experimental and Computational Approaches to Estimate Solubility and Permeability in Drug Discovery and Development Settings</article-title>. <source>Adv. Drug Deliv. Rev.</source> <volume>23</volume>, <fpage>3</fpage>&#x2013;<lpage>25</lpage>. <pub-id pub-id-type="doi">10.1016/s0169-409x(96)00423-1</pub-id> </citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Walter</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Wright</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Bartosik</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Dolciami</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Elbasir</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Prediction and Mechanistic Analysis of Drug-Induced Liver Injury (DILI) Based on Chemical Structure</article-title>. <source>Biol. Direct</source> <volume>16</volume>, <fpage>6</fpage>. <pub-id pub-id-type="doi">10.1186/s13062-020-00285-0</pub-id> </citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lo</surname>
<given-names>A. W.</given-names>
</name>
<name>
<surname>Siah</surname>
<given-names>K. W.</given-names>
</name>
<name>
<surname>Wong</surname>
<given-names>C. H.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Machine Learning with Statistical Imputation for Predicting Drug Approval</article-title>. <source>Harv. Data Sci. Rev.</source> <volume>1</volume>. <pub-id pub-id-type="doi">10.1162/99608f92.5c5f0525</pub-id> </citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Luque</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Carrasco</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Mart&#xed;n</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>de las Heras</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>The Impact of Class Imbalance in Classification Performance Metrics Based on the Binary Confusion Matrix</article-title>. <source>Pattern Recognition</source> <volume>91</volume>, <fpage>216</fpage>&#x2013;<lpage>231</lpage>. <pub-id pub-id-type="doi">10.1016/j.patcog.2019.02.023</pub-id> </citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lysenko</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Sharma</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Boroevich</surname>
<given-names>K. A.</given-names>
</name>
<name>
<surname>Tsunoda</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>An Integrative Machine Learning Approach for Prediction of Toxicity-Related Drug Safety</article-title>. <source>Life Sci. Alliance</source> <volume>1</volume> (<issue>6</issue>), <fpage>e201800098</fpage>. <pub-id pub-id-type="doi">10.26508/lsa.201800098</pub-id> </citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Munos</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Niederreiter</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Riccaboni</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Improving the Prediction of Clinical success Using Machine Learning</article-title>. <source>medRxiv preprint</source>. <pub-id pub-id-type="doi">10.1101/2021.02.01.21250864</pub-id> </citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Scannell</surname>
<given-names>J.&#x20;W.</given-names>
</name>
<name>
<surname>Blanckley</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Boldon</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Warrington</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Diagnosing the Decline in Pharmaceutical R&#x26;D Efficiency</article-title>. <source>Nat. Rev. Drug Discov.</source> <volume>11</volume>, <fpage>191</fpage>&#x2013;<lpage>200</lpage>. <pub-id pub-id-type="doi">10.1038/nrd3681</pub-id> </citation>
</ref>
<ref id="B21">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Vapnik</surname>
<given-names>V.</given-names>
</name>
</person-group> (<year>1995</year>). <source>The Nature of Statistical Learning Theory</source>. <publisher-name>Springer</publisher-name>. <pub-id pub-id-type="doi">10.1007/978-1-4757-2440-0</pub-id> </citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Veber</surname>
<given-names>D. F.</given-names>
</name>
<name>
<surname>Johnson</surname>
<given-names>S. R.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>H.-Y.</given-names>
</name>
<name>
<surname>Smith</surname>
<given-names>B. R.</given-names>
</name>
<name>
<surname>Ward</surname>
<given-names>K. W.</given-names>
</name>
<name>
<surname>Kopple</surname>
<given-names>K. D.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>Molecular Properties that Influence the Oral Bioavailability of Drug Candidates</article-title>. <source>J.&#x20;Med. Chem.</source> <volume>45</volume>, <fpage>2615</fpage>&#x2013;<lpage>2623</lpage>. <pub-id pub-id-type="doi">10.1021/jm020017n</pub-id> </citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Gan</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Yan</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Dynamic Curriculum Learning for Imbalanced Data Classification</article-title>. <source>CoRR abs/</source> <volume>1901</volume>, <fpage>06783</fpage>. </citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Multi-modal Circulant Fusion for Video-To-Language and Backward</article-title>. <conf-name>Proceedings of the 27th International Joint Conference on Artificial Intelligence</conf-name>, <conf-loc>Stockholm, Sweden</conf-loc>, <conf-date>July 13&#x2013;19, 2018</conf-date>, <fpage>1029</fpage>&#x2013;<lpage>1035</lpage>. </citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zadeh</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Poria</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Cambria</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Morency</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Tensor Fusion Network for Multimodal Sentiment Analysis</article-title>. <conf-name>Proceedings of the Conference on Empirical Methods in Natural Language Processing</conf-name>, <conf-loc>Copenhagen, Denmark</conf-loc>, <conf-date>September 7&#x2013;11, 2017</conf-date>, <fpage>1103</fpage>&#x2013;<lpage>1114</lpage>. </citation>
</ref>
</ref-list>
</back>
</article>