<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Artif. Intell.</journal-id>
<journal-title>Frontiers in Artificial Intelligence</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Artif. Intell.</abbrev-journal-title>
<issn pub-type="epub">2624-8212</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/frai.2022.744755</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Artificial Intelligence</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Non-linearity of Metabolic Pathways Critically Influences the Choice of Machine Learning Model</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Lo-Thong-Viramoutou</surname> <given-names>Oph&#x000E9;lie</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Charton</surname> <given-names>Philippe</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1507993/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Cadet</surname> <given-names>Xavier F.</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1394847/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Grondin-Perez</surname> <given-names>Brigitte</given-names></name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Saavedra</surname> <given-names>Emma</given-names></name>
<xref ref-type="aff" rid="aff6"><sup>6</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/263188/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Damour</surname> <given-names>C&#x000E9;dric</given-names></name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1582822/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Cadet</surname> <given-names>Fr&#x000E9;d&#x000E9;ric</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1092788/overview"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>University of Paris, BIGR&#x02014;Biologie Int&#x000E9;gr&#x000E9;e du Globule Rouge, Inserm, UMR_S1134</institution>, <addr-line>Paris</addr-line>, <country>France</country></aff>
<aff id="aff2"><sup>2</sup><institution>Laboratory of Excellence GR-Ex</institution>, <addr-line>Paris</addr-line>, <country>France</country></aff>
<aff id="aff3"><sup>3</sup><institution>Laboratory DSIMB, UMR_S1134, BIGR, Inserm, Faculty of Sciences and Technology, University of La Reunion</institution>, <addr-line>Saint-Denis</addr-line>, <country>France</country></aff>
<aff id="aff4"><sup>4</sup><institution>PEACCEL, Artificial Intelligence Department</institution>, <addr-line>Paris</addr-line>, <country>France</country></aff>
<aff id="aff5"><sup>5</sup><institution>EnergyLab, EA 4079, Faculty of Sciences and Technology, University of La Reunion</institution>, <addr-line>Saint-Denis</addr-line>, <country>France</country></aff>
<aff id="aff6"><sup>6</sup><institution>Departamento de Bioqu&#x000ED;mica, Instituto Nacional de Cardiolog&#x000ED;a Ignacio Ch&#x000E1;vez</institution>, <addr-line>Mexico City</addr-line>, <country>Mexico</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Rudiyanto Gunawan, University at Buffalo, United States</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Takeyuki Tamura, Kyoto University, Japan; Sudharshan Ravi, University of Zurich, Switzerland</p></fn>
<corresp id="c001">&#x0002A;Correspondence: Fr&#x000E9;d&#x000E9;ric Cadet <email>frederic.cadet.run&#x00040;gmail.com</email></corresp>
<fn fn-type="other" id="fn001"><p>This article was submitted to Machine Learning and Artificial Intelligence, a section of the journal Frontiers in Artificial Intelligence</p></fn></author-notes>
<pub-date pub-type="epub">
<day>10</day>
<month>06</month>
<year>2022</year>
</pub-date>
<pub-date pub-type="collection">
<year>2022</year>
</pub-date>
<volume>5</volume>
<elocation-id>744755</elocation-id>
<history>
<date date-type="received">
<day>20</day>
<month>07</month>
<year>2021</year>
</date>
<date date-type="accepted">
<day>29</day>
<month>04</month>
<year>2022</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2022 Lo-Thong-Viramoutou, Charton, Cadet, Grondin-Perez, Saavedra, Damour and Cadet.</copyright-statement>
<copyright-year>2022</copyright-year>
<copyright-holder>Lo-Thong-Viramoutou, Charton, Cadet, Grondin-Perez, Saavedra, Damour and Cadet</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract>
<p>The use of machine learning (ML) in life sciences has gained wide interest over the past years, as it speeds up the development of high performing models. Important modeling tools in biology have proven their worth for pathway design, such as mechanistic models and metabolic networks, as they allow better understanding of mechanisms involved in the functioning of organisms. However, little has been done on the use of ML to model metabolic pathways, and the degree of non-linearity associated with them is not clear. Here, we report the construction of different metabolic pathways with several linear and non-linear ML models. Different types of data are used; they lead to the prediction of important biological data, such as pathway flux and final product concentration. A comparison reveals that the data features impact model performance and highlight the effectiveness of non-linear models (e.g., QRF: RMSE = 0.021 nmol&#x000B7;min<sup>&#x02212;1</sup> and R<sup>2</sup> = 1 vs. Bayesian GLM: RMSE = 1.379 nmol&#x000B7;min<sup>&#x02212;1</sup> R<sup>2</sup> = 0.823). It turns out that the greater the degree of non-linearity of the pathway, the better suited a non-linear model will be. Therefore, a decision-making support for pathway modeling is established. These findings generally support the hypothesis that non-linear aspects predominate within the metabolic pathways. This must be taken into account when devising possible applications of these pathways for the identification of biomarkers of diseases (e.g., infections, cancer, neurodegenerative diseases) or the optimization of industrial production processes.</p>
</abstract>
<kwd-group>
<kwd>artificial intelligence</kwd>
<kwd>machine learning</kwd>
<kwd>non-linear modeling</kwd>
<kwd>drug target identification</kwd>
<kwd><italic>Trypanosoma cruzi</italic> detoxification pathway</kwd>
<kwd><italic>Entamoeba histolytica</italic> glycolysis pathway</kwd>
<kwd>penicillin production</kwd>
</kwd-group>
<contract-num rid="cn001">UE_Tiers216275</contract-num>
<contract-sponsor id="cn001">Conseil R&#x000E9;gional de La R&#x000E9;union<named-content content-type="fundref-id">10.13039/501100010081</named-content></contract-sponsor>
<counts>
<fig-count count="11"/>
<table-count count="4"/>
<equation-count count="3"/>
<ref-count count="93"/>
<page-count count="21"/>
<word-count count="13379"/>
</counts>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<title>Introduction</title>
<p>Machine learning (ML) holds an increasingly prominent place in the field of biology. Indeed, it can lead to better results and has a large range of applications including: drug design using machine leaning algorithms such as the support vector machine (SVM) algorithm to perform structure-activity relationship analysis (Hartwell et al., <xref ref-type="bibr" rid="B30">1999</xref>; Burbidge et al., <xref ref-type="bibr" rid="B9">2001</xref>; R&#x000E9;da et al., <xref ref-type="bibr" rid="B67">2020</xref>); directed protein evolution and enzyme function prediction (Li et al., <xref ref-type="bibr" rid="B48">2018</xref>; Wu et al., <xref ref-type="bibr" rid="B84">2019</xref>); reconstruction of biological routes (Kotera et al., <xref ref-type="bibr" rid="B42">2013</xref>; Baranwal et al., <xref ref-type="bibr" rid="B8">2020</xref>) or modeling and optimization of metabolic pathways (Zhang et al., <xref ref-type="bibr" rid="B92">2019</xref>; Kim et al., <xref ref-type="bibr" rid="B41">2020</xref>). With regard to the latter topic, several methods have been developed to analyze complex biological systems (<xref ref-type="fig" rid="F1">Figure 1</xref>):</p>
<list list-type="bullet">
<list-item><p>The <bold>knowledge-based model</bold> including kinetic models (Chance, <xref ref-type="bibr" rid="B12">1943</xref>; Sel&#x00027;Kov, <xref ref-type="bibr" rid="B73">1968</xref>; Curto et al., <xref ref-type="bibr" rid="B18">1997</xref>, <xref ref-type="bibr" rid="B19">1998</xref>; Hatzimanikatis et al., <xref ref-type="bibr" rid="B32">1998</xref>; Visser and Heijnen, <xref ref-type="bibr" rid="B79">2003</xref>; Liebermeister et al., <xref ref-type="bibr" rid="B49">2010</xref>) and metabolic flux analysis methods (Fell and Small, <xref ref-type="bibr" rid="B23">1986</xref>; Stephanopoulos, <xref ref-type="bibr" rid="B77">1999</xref>);</p></list-item>
<list-item><p>The <bold>data-based model</bold> including ML algorithms and ensemble learning (Zelezniak et al., <xref ref-type="bibr" rid="B91">2018</xref>; Ajjolli Nagaraja et al., <xref ref-type="bibr" rid="B1">2019</xref>; Oyetunde et al., <xref ref-type="bibr" rid="B60">2019</xref>);</p></list-item>
<list-item><p>The <bold>hybrid model</bold> including combinations of models or modified preceding methods (Cascante et al., <xref ref-type="bibr" rid="B11">2002</xref>; Morgan and Rhodes, <xref ref-type="bibr" rid="B56">2002</xref>).</p></list-item>
</list>
<p>Although, these analyses are conducted on metabolic pathways, few of them are used to predict their fluxes. Among these few works on metabolic fluxes, it is interesting to highlight those of (Ajjolli Nagaraja et al., <xref ref-type="bibr" rid="B1">2019</xref>). For the present work, the method of greatest interest is the data-based model and more precisely, ML. In fact, ML abounds in various methods and is a promising and growing approach that could greatly help to improve existing models, integrate multi-omics data and save researchers&#x00027; time. Also, a distinction can be made between ML methods: some are linear (ridge and lasso regression, multivariate adaptive regression spline&#x02026;) and others are non-linear (artificial neural network, k-nearest neighbors, decision tree&#x02026;). In addition, the non-linearity of metabolic pathway is considered inherent to the pathway, depending on the non-linearity of chemical reaction kinetics and that related to regulatory processes (Song and Ramkrishna, <xref ref-type="bibr" rid="B76">2013</xref>; Yasemi and Jolicoeur, <xref ref-type="bibr" rid="B88">2021</xref>). Reviews on the fundamentals of Metabolic Control Analysis (Heinrich and Rapoport, <xref ref-type="bibr" rid="B36">1974</xref>; Kacser et al., <xref ref-type="bibr" rid="B39">1995</xref>) suggest mathematically that the pathway fluxes are non-linear. Moreover, experiments were done on glycolytic fluxes cells, where intact cells were incubated at different glucose concentrations (Mar&#x000ED;n-Hern&#x000E1;ndez et al., <xref ref-type="bibr" rid="B52">2020</xref>). The results showed a clearly hyperbolic behavior of the experimental data. Another experimental data used notably in this study indicates that the pattern is non-linear (Moreno-S&#x000E1;nchez et al., <xref ref-type="bibr" rid="B55">2008</xref>; Gonz&#x000E1;lez-Ch&#x000E1;vez et al., <xref ref-type="bibr" rid="B28">2015</xref>). These experimental data demonstrate that the pathway fluxes are non-linear. However, it has not yet been investigated whether linear or non-linear methods are more efficient in predicting pathway fluxes, and how to choose the appropriate one.</p>
<fig id="F1" position="float">
<label>Figure 1</label>
<caption><p>Classification of metabolic pathway modeling methods according to their complexity and the year of first application in this field. The ellipse size is proportional to the occurrence of the method for pathway modeling in the literature. Three main groups are defined: knowledge-based model (Michaelis and Menten, <xref ref-type="bibr" rid="B54">1913</xref>; Chance, <xref ref-type="bibr" rid="B12">1943</xref>; Shapiro and Shapley, <xref ref-type="bibr" rid="B74">1965</xref>; Garfinkel et al., <xref ref-type="bibr" rid="B25">1970</xref>; Savageau, <xref ref-type="bibr" rid="B69">1970</xref>, <xref ref-type="bibr" rid="B70">1988</xref>; Fell and Small, <xref ref-type="bibr" rid="B23">1986</xref>; Hatzimanikatis and Bailey, <xref ref-type="bibr" rid="B31">1997</xref>; Curto et al., <xref ref-type="bibr" rid="B19">1998</xref>; Heijnen, <xref ref-type="bibr" rid="B34">2005</xref>; Liebermeister et al., <xref ref-type="bibr" rid="B49">2010</xref>), data-based model (Wu et al., <xref ref-type="bibr" rid="B83">2016</xref>; Cuperlovic-Culf, <xref ref-type="bibr" rid="B17">2018</xref>; Ajjolli Nagaraja et al., <xref ref-type="bibr" rid="B1">2019</xref>; Zampieri et al., <xref ref-type="bibr" rid="B90">2019</xref>; Zhang et al., <xref ref-type="bibr" rid="B92">2019</xref>; Kim et al., <xref ref-type="bibr" rid="B41">2020</xref>) and hybrid model (Wiechert et al., <xref ref-type="bibr" rid="B81">1997</xref>; Drysch et al., <xref ref-type="bibr" rid="B21">2003</xref>; Antoniewicz et al., <xref ref-type="bibr" rid="B3">2007</xref>; N&#x000F6;h et al., <xref ref-type="bibr" rid="B59">2007</xref>; Leighty and Antoniewicz, <xref ref-type="bibr" rid="B45">2011</xref>; Antoniewicz, <xref ref-type="bibr" rid="B4">2015</xref>; Pan et al., <xref ref-type="bibr" rid="B61">2017</xref>; Yousoff et al., <xref ref-type="bibr" rid="B89">2017</xref>; Heckmann, <xref ref-type="bibr" rid="B33">2018</xref>; Oyetunde et al., <xref ref-type="bibr" rid="B60">2019</xref>; Zampieri et al., <xref ref-type="bibr" rid="B90">2019</xref>; Lo-Thong et al., <xref ref-type="bibr" rid="B50">2020</xref>; Rana et al., <xref ref-type="bibr" rid="B66">2020</xref>). Linear methods are represented in gray and non-linear ones are in blue. Methods in bold and white are those evaluated in this study.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-05-744755-g0001.tif"/>
</fig>
<p>Therefore, this study aims to elucidate the most appropriate methods to model three distinct metabolic pathways by designing and comparing five linear and eight non-linear machine learning-based methods (<xref ref-type="fig" rid="F2">Figure 2</xref>):</p>
<list list-type="bullet">
<list-item><p>The lower part of <italic>Entamoeba histolytica</italic> glycolysis (<xref ref-type="fig" rid="F3">Figure 3A</xref>), one of the major metabolic pathways of the parasite (Moreno-S&#x000E1;nchez et al., <xref ref-type="bibr" rid="B55">2008</xref>; Muller et al., <xref ref-type="bibr" rid="B57">2012</xref>; Pineda et al., <xref ref-type="bibr" rid="B62">2015</xref>), through the use of a recently developed model (Lo-Thong et al., <xref ref-type="bibr" rid="B50">2020</xref>);</p></list-item>
<list-item><p>The peroxide detoxification pathway of <italic>Trypanosoma cruzi</italic> (<xref ref-type="fig" rid="F3">Figure 3B</xref>) (Gonz&#x000E1;lez-Ch&#x000E1;vez et al., <xref ref-type="bibr" rid="B28">2015</xref>, <xref ref-type="bibr" rid="B29">2019</xref>);</p></list-item>
<list-item><p>The industrial-scale penicillin fermentation process of <italic>Penicillium chrysogenum</italic> (<xref ref-type="fig" rid="F3">Figure 3C</xref>) (Goldrick et al., <xref ref-type="bibr" rid="B27">2015</xref>).</p></list-item>
</list>
<p>Although these machine-learning approaches have been used to model metabolic pathways, few studies have focused on their usefulness in predicting flux (Wu et al., <xref ref-type="bibr" rid="B83">2016</xref>; Ajjolli Nagaraja et al., <xref ref-type="bibr" rid="B1">2019</xref>).</p>
<fig id="F2" position="float">
<label>Figure 2</label>
<caption><p>Study workflow. Data from three different metabolic pathways are collected and used to build data-based models. Datasets that contain a small amount of data (<italic>n</italic>) go through a process of data augmentation, before being separated into two sets: training set and test set. Then, in order to predict the final flux or final product concentration, multiple ML models are built with the training set, while the test set is used to assess the final models. The resulting predictions are compared in a last step to evaluate model reliability.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-05-744755-g0002.tif"/>
</fig>
<fig id="F3" position="float">
<label>Figure 3</label>
<caption><p>Overview of the three metabolic pathways modeled with machine learning methods. <bold>(A)</bold> Lower part of <italic>E. histolytica</italic> glycolysis pathway with pyruvate (Pyr) formation from 3- phosphoglycerate (3PG). The L-lactate (Lac) formation (dashed lines) is not part of the natural pathway; however, lactate dehydrogenase (LDH) has been added in order to experimentally follow the final flux and establish a quasi-steady-state to Lac (Moreno-S&#x000E1;nchez et al., <xref ref-type="bibr" rid="B55">2008</xref>). Metabolite inhibitions are represented in red. PGAM, 3-phosphoglycerate mutase; 2PG, 2-phosphoglycerate; ENO, enolase; PEP, phosphoenolpyruvate; PPDK, pyruvate phosphate dikinase. <bold>(B)</bold> Tryparedoxin-dependent hydroperoxide detoxification pathway in <italic>Trypanosoma cruzi</italic> (Gonz&#x000E1;lez-Ch&#x000E1;vez et al., <xref ref-type="bibr" rid="B28">2015</xref>). Reduction of cumene hydroperoxide (CumOOH) is assessed here. TryR, trypanothione reductase; T(SH)<sub>2</sub>, trypanothione; TS<sub>2</sub>, trypanothione disulfide; TXN<sub>ox/red</sub>, oxidized/reduced tryparedoxin; TXNPx<sub>ox/red</sub>, oxidized/reduced tryparedoxin peroxidase. <bold>(C)</bold> Simplified representation of the industrial-scale penicillin fermentation process of <italic>Penicillium chrysogenum</italic>. The bioreactor parameters represented here are those that will be of interest in this study. See a more detailed scheme in the work of Goldrick et al. (<xref ref-type="bibr" rid="B27">2015</xref>). Experimental details for <bold>(A,B)</bold> are provided in section Material and Methods.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-05-744755-g0003.tif"/>
</fig>
<p>Creating an efficient ML model depends on the availability of a large amount of experimental data (L&#x00027;Heureux et al., <xref ref-type="bibr" rid="B47">2017</xref>; Schmidt et al., <xref ref-type="bibr" rid="B72">2019</xref>). The measurement of fluxes is cumbersome to carry out experimentally and hinders the possibility of having massive data. Because of the scarcity of these large experimental datasets in the literature, the methodology employed here consists of applying data augmentation to the first two pathways by using hybrid models (<xref ref-type="fig" rid="F2">Figure 2</xref>). These hybrid models, called gray-box models, often predict better results than pure knowledge-based models or data-based models (Wei et al., <xref ref-type="bibr" rid="B80">2018</xref>; Lo-Thong et al., <xref ref-type="bibr" rid="B50">2020</xref>; Pintelas et al., <xref ref-type="bibr" rid="B63">2020</xref>); in this study, the gray-box models consist of metabolic networks that include an adjustment term in one or more kinetic equations.</p>
<p>In this study, models are based both on experimental datasets and predicted data coming from the previous gray-box model. Here, we show that random forest models are the most effective, with a high predictive capacity starting from predicted and experimental enzyme activities or experimental parameters collected from a bioreactor. Also, two other models stand out as good ways to predict the flux or the final product concentration: XGBoost Linear and Cubist models. This shows the importance of using a non-linear model to design metabolic pathways. Based on these findings, we propose a means of decision support for researchers who wish to use machine learning techniques as a starting or a complementary method for modeling and for improving existing biological pathway models. By greatly increasing the quality of the outputs (flux prediction), machine learning opens the way to better drug target identification within a pathway, efficient disease modeling at molecular level and more efficient optimization for industrial production of metabolites.</p>
</sec>
<sec sec-type="materials and methods" id="s2">
<title>Materials and Methods</title>
<sec>
<title>Experimental Procedures</title>
<p>The lower part of glycolysis is reconstituted <italic>in-vitro</italic> in a reaction assay medium described in a recent work (Moreno-S&#x000E1;nchez et al., <xref ref-type="bibr" rid="B55">2008</xref>), containing different recombinant enzymes (PGAM, ENO and PPDK). The reaction was started by adding 3PG (4 mM). An additional reaction is added, the formation of lactate with lactate dehydrogenase (<xref ref-type="fig" rid="F3">Figure 3A</xref>), in order to follow the flux of the overall pathway by following the rate of NADH oxidation, for more details, see Moreno-S&#x000E1;nchez et al. (<xref ref-type="bibr" rid="B55">2008</xref>) works. Concerning the peroxide detoxification pathway (<xref ref-type="fig" rid="F3">Figure 3B</xref>), each enzyme was individually titrated, while keeping the other parameters in the <italic>in-vitro</italic> system constant. The pathway flux was determined in parallel by observing NADPH oxidation, see Gonz&#x000E1;lez-Ch&#x000E1;vez et al. (<xref ref-type="bibr" rid="B28">2015</xref>) for more information. Finally, the experimental procedures that were followed to obtain penicillin production data are described in the studies of Goldrick et al. (<xref ref-type="bibr" rid="B27">2015</xref>).</p>
</sec>
<sec>
<title>Lower Part of Glycolysis Datasets</title>
<p>Two datasets are constructed here by applying data augmentation, using a gray-box model detailed in one of the following sections. For the first one, an exploration around the experimental data flux (43 &#x000B1; 10 nmol&#x000B7;min<sup>&#x02212;1</sup>) from Moreno-S&#x000E1;nchez et al. (<xref ref-type="bibr" rid="B55">2008</xref>) at pH 6 is conducted. In fact, a sample of 2,000 normally distributed enzymatic balances was generated with the <italic>sample</italic> function on RStudio and resulted in a predicted flux between 0 and 53 nmol&#x000B7;min<sup>&#x02212;1</sup> with the gray-box model. The term balance refers to a set of activities of the enzymes involved in the cascade of reactions. The second dataset is made up of experimental and predicted (gray-box model) data of PGAM, ENO and PPDK activities and pathway flux (J). The experimental data are obtained from plots of Moreno-S&#x000E1;nchez study (Moreno-S&#x000E1;nchez et al., <xref ref-type="bibr" rid="B55">2008</xref>) (only the dots), while the predicted data are obtained with the gray-box model developed in a recent work (Lo-Thong et al., <xref ref-type="bibr" rid="B50">2020</xref>), by varying each enzyme activity from 0 to 1000 mU with a step of 25 mU. These datasets are shown in <xref ref-type="supplementary-material" rid="SM3">Supplementary Tables 7</xref>, <xref ref-type="supplementary-material" rid="SM4">8</xref> respectively.</p>
</sec>
<sec>
<title>Peroxide Detoxification Datasets</title>
<p>The second studied pathway consisted first of 58 experimental enzymatic balances and their corresponding flux. After applying data augmentation by using a gray-box model of this pathway, a bigger dataset of 1,671 data was obtained. As with the previous dataset, a combination of data normally distributed is generated with the <italic>sample</italic> function on RStudio, resulting in a predicted flux ranging from 0 to 11.46 nmol&#x000B7;min<sup>&#x02212;1</sup>. The new dataset is a mix of the previous experimental data and new predicted data of enzyme activities (TryR, TXN and TXNPx); final flux and is shown in <xref ref-type="supplementary-material" rid="SM5">Supplementary Table 9</xref>.</p>
</sec>
<sec>
<title>The Gray-Box Models</title>
<p>The two following pathways are modeled with an open-source software called COPASI (Version 4.24) (Hoops et al., <xref ref-type="bibr" rid="B37">2006</xref>): the second part of glycolysis and the peroxide detoxification pathway. This software is used for metabolic network design, analysis and optimization. The first gray-box model, representing the lower part of glycolysis, is taken from Lo-Thong et al. (<xref ref-type="bibr" rid="B50">2020</xref>) work. It is based on the use of enzyme properties, including kinetic parameters and kinetic equations. To enhance the flux predictions, they suggested adding an adjustment term to the PPDK kinetic equation. The whole process concerning the composition of this term is explained in the previous work (see Methodology part of Lo-Thong et al., <xref ref-type="bibr" rid="B50">2020</xref> and <xref ref-type="supplementary-material" rid="SM1">Supplementary Table 1</xref>).</p>
<p>The second gray-box model represents the peroxide detoxification pathway and is built specifically for this study. It contains kinetic parameters and equations of three enzymes: TryR, TXN and TXNPx (<xref ref-type="table" rid="T1">Table 1</xref>). Also, we proposed to add two adjustment terms in TryR and TXNPx equations to improve flux predictions (<xref ref-type="table" rid="T1">Table 1</xref>). These are determined in the same way as the terms used for the glycolysis pathway. In fact, a first model was provided by Gonz&#x000E1;lez-Ch&#x000E1;vez et al. (<xref ref-type="bibr" rid="B29">2019</xref>) and could predict the final flux quite well when TryR and TXN activities were varied. However, it overestimated the flux when TryR activity was varied and underestimated it when TXNPx activity was varied. Therefore, we suggest adding a first adjustment term <bold>&#x003B1;</bold>(<italic><bold>V</bold></italic><sub><italic><bold>f</bold></italic></sub> <bold>&#x02212;</bold> <italic><bold>V</bold></italic><sub><italic><bold>f</bold></italic><bold>0</bold></sub>) in order to increase TryR rate and a second adjustment term <bold>&#x003B2;</bold>(<italic><bold>V</bold></italic><sub><italic><bold>f</bold></italic></sub> <bold>&#x02212;</bold> <italic><bold>V</bold></italic><sub><italic><bold>f</bold></italic><bold>0</bold></sub><bold>)</bold> to decrease TXNPx rate. In these adjustment terms, &#x003B1; and &#x003B2; are defined numbers selected as the best for flux prediction from a tested range, <italic>V</italic><sub><italic>f</italic></sub> is TryR (or TXNPx) maximum rate in the forward direction in the model and <italic>V</italic><sub><italic>f</italic>0</sub> TryR (or TXNPx) maximum rate in the forward direction used in the <italic>in vitro</italic> reconstitution. Also, as <italic>V</italic><sub><italic>f</italic></sub> of TryR (or TXNPx) is equal to <italic>V</italic><sub><italic>f</italic>0</sub> when TXN&#x00027;s/TXNPx&#x00027;s (or TryR&#x00027;s/TXN&#x00027;s) activity is varied, we multiplied &#x003B1; (or &#x003B2;) by <italic>V</italic><sub><italic>f</italic></sub> &#x02212; <italic>V</italic><sub><italic>f</italic>0</sub>, so that the adjustment term would be zero when <italic>V</italic><sub><italic>f</italic></sub> &#x0003D; <italic>V</italic><sub><italic>f</italic>0</sub> and the flux predictions are not modified in these cases mentioned above.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>Kinetic equations used in the gray-box model of the peroxide detoxification pathway (Gonz&#x000E1;lez-Ch&#x000E1;vez et al., <xref ref-type="bibr" rid="B28">2015</xref>).</p></caption>
<table frame="hsides" rules="groups">
<thead><tr>
<th valign="top" align="left"><bold>Enzyme</bold></th>
<th valign="top" align="center"><bold>Kinetic equations</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left" style="background-color:#f2f3f3">TryR<xref ref-type="table-fn" rid="TN1"><sup>a</sup></xref></td>
<td valign="top" align="center" style="background-color:#f2f3f3"><inline-formula><mml:math id="M1"><mml:mi>v</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:msub><mml:mrow><mml:mi>V</mml:mi></mml:mrow><mml:mrow><mml:mi>f</mml:mi></mml:mrow></mml:msub><mml:mfrac><mml:mrow><mml:mi>A</mml:mi><mml:mi>B</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>K</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>A</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>K</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>B</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:mo>-</mml:mo><mml:msub><mml:mrow><mml:mi>V</mml:mi></mml:mrow><mml:mrow><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:mfrac><mml:mrow><mml:mi>P</mml:mi><mml:mi>Q</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>K</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>P</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>K</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>Q</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:mo>&#x0002B;</mml:mo><mml:mi>&#x003B1;</mml:mi><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>V</mml:mi></mml:mrow><mml:mrow><mml:mi>f</mml:mi></mml:mrow></mml:msub><mml:mo>-</mml:mo><mml:msub><mml:mrow><mml:mi>V</mml:mi></mml:mrow><mml:mrow><mml:mi>f</mml:mi><mml:mn>0</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mn>1</mml:mn><mml:mo>&#x0002B;</mml:mo><mml:mfrac><mml:mrow><mml:mi>A</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>K</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>A</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:mo>&#x0002B;</mml:mo><mml:mfrac><mml:mrow><mml:mi>B</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>K</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>B</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:mo>&#x0002B;</mml:mo><mml:mfrac><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>K</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>P</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:mo>&#x0002B;</mml:mo><mml:mfrac><mml:mrow><mml:mi>Q</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>K</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>Q</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:mo>&#x0002B;</mml:mo><mml:mfrac><mml:mrow><mml:mi>A</mml:mi><mml:mi>B</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>K</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>A</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>K</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>B</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:mo>&#x0002B;</mml:mo><mml:mfrac><mml:mrow><mml:mi>A</mml:mi><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>K</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>A</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>K</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>P</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:mo>&#x0002B;</mml:mo><mml:mfrac><mml:mrow><mml:mi>B</mml:mi><mml:mi>Q</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>K</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>B</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>K</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>Q</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:mo>&#x0002B;</mml:mo><mml:mfrac><mml:mrow><mml:mi>P</mml:mi><mml:mi>Q</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>K</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>P</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>K</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>Q</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:mo>&#x0002B;</mml:mo><mml:mfrac><mml:mrow><mml:mi>A</mml:mi><mml:mi>B</mml:mi><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>K</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>A</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>K</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>B</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>K</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>P</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:mo>&#x0002B;</mml:mo><mml:mfrac><mml:mrow><mml:mi>B</mml:mi><mml:mi>P</mml:mi><mml:mi>Q</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>K</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>B</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>K</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>P</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>K</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>Q</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac></mml:mrow></mml:mfrac></mml:math></inline-formula></td>
</tr>
<tr>
<td valign="top" align="left">TXN<xref ref-type="table-fn" rid="TN2"><sup>b</sup></xref></td>
<td valign="top" align="center"><inline-formula><mml:math id="M2"><mml:mi>v</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:msub><mml:mrow><mml:mi>V</mml:mi></mml:mrow><mml:mrow><mml:mi>f</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:mi>A</mml:mi><mml:mi>B</mml:mi><mml:mo>-</mml:mo><mml:mfrac><mml:mrow><mml:mi>P</mml:mi><mml:mi>Q</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>K</mml:mi></mml:mrow><mml:mrow><mml:mi>e</mml:mi><mml:mi>q</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>A</mml:mi><mml:mi>B</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>K</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>B</mml:mi></mml:mrow></mml:msub><mml:mi>A</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>K</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>A</mml:mi></mml:mrow></mml:msub><mml:mi>B</mml:mi><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>&#x0002B;</mml:mo><mml:mfrac><mml:mrow><mml:mi>Q</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>K</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mi>Q</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow><mml:mo>&#x0002B;</mml:mo><mml:mfrac><mml:mrow><mml:msub><mml:mrow><mml:mi>V</mml:mi></mml:mrow><mml:mrow><mml:mi>f</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>V</mml:mi></mml:mrow><mml:mrow><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>K</mml:mi></mml:mrow><mml:mrow><mml:mi>e</mml:mi><mml:mi>q</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>K</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>Q</mml:mi></mml:mrow></mml:msub><mml:mi>P</mml:mi><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>&#x0002B;</mml:mo><mml:mfrac><mml:mrow><mml:mi>A</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>K</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mi>A</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow><mml:mo>&#x0002B;</mml:mo><mml:mi>Q</mml:mi><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>K</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>P</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:mi>P</mml:mi></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mrow></mml:mfrac></mml:math></inline-formula></td>
</tr>
<tr>
<td valign="top" align="left" style="background-color:#f2f3f3">TXNPx<xref ref-type="table-fn" rid="TN3"><sup>c</sup></xref></td>
<td valign="top" align="center" style="background-color:#f2f3f3"><inline-formula><mml:math id="M3"><mml:mi>v</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:msub><mml:mrow><mml:mi>V</mml:mi></mml:mrow><mml:mrow><mml:mi>f</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>C</mml:mi><mml:mi>u</mml:mi><mml:mi>m</mml:mi><mml:mi>O</mml:mi><mml:mi>O</mml:mi><mml:mi>H</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>T</mml:mi><mml:mi>X</mml:mi><mml:msub><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>&#x0002B;</mml:mo><mml:mi>&#x003B2;</mml:mi><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>V</mml:mi></mml:mrow><mml:mrow><mml:mi>f</mml:mi></mml:mrow></mml:msub><mml:mo>-</mml:mo><mml:msub><mml:mrow><mml:mi>V</mml:mi></mml:mrow><mml:mrow><mml:mi>f</mml:mi><mml:mn>0</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>K</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>T</mml:mi><mml:mi>X</mml:mi><mml:msub><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>C</mml:mi><mml:mi>u</mml:mi><mml:mi>m</mml:mi><mml:mi>O</mml:mi><mml:mi>O</mml:mi><mml:mi>H</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>K</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>C</mml:mi><mml:mi>u</mml:mi><mml:mi>m</mml:mi><mml:mi>O</mml:mi><mml:mi>O</mml:mi><mml:mi>H</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>T</mml:mi><mml:mi>X</mml:mi><mml:msub><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>&#x0002B;</mml:mo><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>C</mml:mi><mml:mi>u</mml:mi><mml:mi>m</mml:mi><mml:mi>O</mml:mi><mml:mi>O</mml:mi><mml:mi>H</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>T</mml:mi><mml:mi>X</mml:mi><mml:msub><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mrow></mml:mfrac></mml:math></inline-formula></td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="TN1"><label>a</label><p><italic>A, B and K<sub>mA</sub>, K<sub>mB</sub> are respectively the concentrations and K<sub>m</sub> of the substrates NADPH and TS<sub>2</sub>; P, Q and K<sub>mP</sub>, K<sub>mQ</sub> are the concentrations and K<sub>m</sub> of the products NADP<sup>&#x0002B;</sup> and T(SH)<sub>2</sub>; &#x003B1;(V<sub>f</sub> &#x02212; V<sub>f0</sub>) is the adjustment term with &#x003B1;, a defined number, V<sub>f0</sub>, TryR maximum rate in the forward direction used in the in vitro reconstitution and V<sub>f</sub> is TryR maximum rate in the forward direction in the model</italic>.</p></fn>
<fn id="TN2"><label>b</label><p><italic>A, B and K<sub>mA</sub>, K<sub>mB</sub> are respectively the concentrations and K<sub>m</sub> of the substrates T(SH)<sub>2</sub> and TXN<sub>ox</sub>; P, Q and K<sub>mP</sub>, K<sub>mQ</sub> are the concentrations and K<sub>m</sub> of the products TS<sub>2</sub> and TXN<sub>red</sub></italic>.</p></fn>
<fn id="TN3"><label>c</label><p><italic><inline-formula><mml:math id="M4"><mml:mi>&#x003B2;</mml:mi><mml:mrow><mml:mo stretchy="true">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>V</mml:mi></mml:mrow><mml:mrow><mml:mi>f</mml:mi></mml:mrow></mml:msub><mml:mo>-</mml:mo><mml:msub><mml:mrow><mml:mi>V</mml:mi></mml:mrow><mml:mrow><mml:mi>f</mml:mi><mml:mn>0</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="true">)</mml:mo></mml:mrow></mml:math></inline-formula> is the adjustment term with &#x003B2;, a defined number, V<sub>f0</sub>, TXNPx maximum rate in the forward direction used in the in vitro reconstitution and V<sub>f</sub> is TXNPx maximum rate in the forward direction in the model</italic>.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>Also, residual values are determined to evaluate how accurate the gray-box model is, and calculated as follows (1):</p>
<disp-formula id="E1"><label>(1)</label><mml:math id="M5"><mml:mtable columnalign="left"><mml:mtr><mml:mtd><mml:mstyle mathvariant='bold-italic'><mml:mi>e</mml:mi></mml:mstyle><mml:mtext>&#x000A0;</mml:mtext><mml:mstyle mathvariant="bold"><mml:mo>=</mml:mo></mml:mstyle><mml:mtext>&#x000A0;</mml:mtext><mml:mstyle mathvariant='bold-italic'><mml:mi>y</mml:mi></mml:mstyle><mml:mtext>&#x000A0;</mml:mtext><mml:mstyle mathvariant="bold"><mml:mo>-</mml:mo></mml:mstyle><mml:mtext>&#x000A0;</mml:mtext><mml:mover accent="true"><mml:mrow><mml:mstyle mathvariant='bold-italic'><mml:mi>y</mml:mi></mml:mstyle></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>where <italic>e</italic> is the residual, <italic>y</italic> is the observed value and &#x00177; the corresponding predicted value.</p>
</sec>
<sec>
<title>Data Augmentation</title>
<p>For the datasets with &#x0003C;100 data, a process called data augmentation is performed. It consists of using models that accurately predict the experimental data to generate a new bigger dataset. Two different gray-box models are used in this study for the lower part of glycolysis pathway, retrieved from a recent study (Lo-Thong et al., <xref ref-type="bibr" rid="B50">2020</xref>), and for the peroxide detoxification pathway (built for the present work). The gray-box models built on COPASI is set up to predict the variation of the final product concentration over the first hour for a given set of enzyme activities; then the COPASI outputs are processed to obtain the final flux of the studied metabolic pathway. Also, the overall process from the one-hour simulation for each enzymatic balances to the determination of the final flux is then automatized and applied to a range of enzymatic balances detailed in the previous subparts (Lower Part of Glycolysis Datasets and Peroxide Detoxification Datasets).</p>
</sec>
<sec>
<title>Dataset Analysis and Non-linearity Assessment</title>
<p>A brief analysis of the datasets is performed, including an examination of data distribution and the calculation of linear correlations between the input and output variables.</p>
<p>The determination of linear correlation between the inputs and output variables allows the assessment of the non-linearity for each studied metabolic pathway. As a rule of thumb, we consider that the non-linearity is high when one or more inputs has a linear correlation lower than 0.6. The lower the linear correlation, the greater the degree of non-linearity of the pathway.</p>
</sec>
<sec>
<title>Machine Learning Models Building and Selection</title>
<p>To model the metabolic pathway, different machine learning models are developed on RStudio (Version 1.2.5001), with the help of Classification And Regression Training (caret, Version 6.0-86) (Kuhn, <xref ref-type="bibr" rid="B44">2020</xref>).</p>
<p>The datasets are split into 80/20 for the training and test sets, and a k-fold cross-validation (with k = 10 for <xref ref-type="supplementary-material" rid="SM3">Dataset 1</xref>, <xref ref-type="supplementary-material" rid="SM4">2</xref> and k = 3 for <xref ref-type="supplementary-material" rid="SM5">Dataset 3</xref>) is performed on the models with the training set.</p>
<p>After this, the best models are selected based on:</p>
<p>The root-mean-square error (RMSE):</p>
<disp-formula id="E2"><label>(2)</label><mml:math id="M6"><mml:mtable columnalign="left"><mml:mtr><mml:mtd><mml:mstyle mathvariant='bold-italic'><mml:mi>R</mml:mi><mml:mi>M</mml:mi><mml:mi>S</mml:mi><mml:mi>E</mml:mi><mml:mtext>&#x000A0;</mml:mtext><mml:mo>=</mml:mo><mml:mtext>&#x000A0;</mml:mtext></mml:mstyle><mml:msqrt><mml:mrow><mml:mfrac><mml:mrow><mml:mstyle mathvariant="bold"><mml:mn>1</mml:mn></mml:mstyle></mml:mrow><mml:mrow><mml:mstyle mathvariant='bold-italic'><mml:mi>n</mml:mi></mml:mstyle></mml:mrow></mml:mfrac><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mstyle mathvariant='bold-italic'><mml:mi>i</mml:mi></mml:mstyle><mml:mtext>&#x000A0;</mml:mtext><mml:mstyle mathvariant="bold"><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mstyle></mml:mrow><mml:mrow><mml:mstyle mathvariant='bold-italic'><mml:mi>n</mml:mi></mml:mstyle></mml:mrow></mml:munderover></mml:mstyle><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mstyle mathvariant='bold-italic'><mml:mi>Y</mml:mi></mml:mstyle></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mstyle mathvariant='bold-italic'><mml:mi>i</mml:mi></mml:mstyle></mml:mrow></mml:msub><mml:mstyle mathvariant="bold"><mml:mo>-</mml:mo></mml:mstyle><mml:msub><mml:mrow><mml:mstyle mathvariant='bold-italic'><mml:mi>Y</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mstyle mathvariant='bold-italic'><mml:mi>i</mml:mi></mml:mstyle></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mstyle mathvariant="bold"><mml:mn>2</mml:mn></mml:mstyle></mml:mrow></mml:msup></mml:mrow></mml:msqrt></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>with <italic>Y</italic><sub><italic>i</italic></sub> and <italic>&#x00176;</italic><sub><italic>i</italic></sub> being respectively the observed and predicted values, n being the total number of values and i = 1, 2&#x02026;<italic>n</italic>;</p>
<p>the coefficient of determination (R<sup>2</sup>):</p>
<disp-formula id="E3"><label>(3)</label><mml:math id="M7"><mml:mtable columnalign="left"><mml:mtr><mml:mtd><mml:msup><mml:mrow><mml:mstyle mathvariant='bold-italic'><mml:mi>R</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mstyle mathvariant="bold"><mml:mn>2</mml:mn></mml:mstyle></mml:mrow></mml:msup><mml:mstyle mathvariant="bold"><mml:mo>=</mml:mo><mml:mtext>&#x000A0;</mml:mtext><mml:mn>1</mml:mn><mml:mo>-</mml:mo><mml:mtext>&#x000A0;</mml:mtext></mml:mstyle><mml:mfrac><mml:mrow><mml:mstyle displaystyle="true"><mml:msubsup><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mstyle mathvariant='bold-italic'><mml:mi>i</mml:mi></mml:mstyle><mml:mstyle mathvariant="bold"><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mstyle></mml:mrow><mml:mrow><mml:mstyle mathvariant='bold-italic'><mml:mi>n</mml:mi></mml:mstyle></mml:mrow></mml:msubsup></mml:mstyle><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mstyle mathvariant='bold-italic'><mml:mi>Y</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mstyle mathvariant='bold-italic'><mml:mi>i</mml:mi></mml:mstyle></mml:mrow></mml:msub><mml:mstyle mathvariant="bold"><mml:mo>-</mml:mo></mml:mstyle><mml:msub><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mstyle mathvariant='bold-italic'><mml:mi>Y</mml:mi></mml:mstyle></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mstyle mathvariant='bold-italic'><mml:mi>i</mml:mi></mml:mstyle></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mstyle mathvariant="bold"><mml:mn>2</mml:mn></mml:mstyle></mml:mrow></mml:msup></mml:mrow><mml:mrow><mml:mstyle displaystyle="true"><mml:msubsup><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mstyle mathvariant='bold-italic'><mml:mi>i</mml:mi></mml:mstyle><mml:mstyle mathvariant="bold"><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mstyle></mml:mrow><mml:mrow><mml:mstyle mathvariant='bold-italic'><mml:mi>n</mml:mi></mml:mstyle></mml:mrow></mml:msubsup></mml:mstyle><mml:msup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mstyle mathvariant='bold-italic'><mml:mi>Y</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mstyle mathvariant='bold-italic'><mml:mi>i</mml:mi></mml:mstyle></mml:mrow></mml:msub><mml:mstyle mathvariant="bold"><mml:mo>-</mml:mo></mml:mstyle><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mstyle mathvariant='bold-italic'><mml:mi>Y</mml:mi></mml:mstyle></mml:mrow><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mstyle mathvariant="bold"><mml:mn>2</mml:mn></mml:mstyle></mml:mrow></mml:msup></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>with <italic>Y</italic><sub><italic>i</italic></sub> and <italic>&#x00176;</italic><sub><italic>i</italic></sub> respectively the observed and predicted values, <italic>n</italic> being the total number of values and i = 1, 2&#x02026;<italic>n</italic>.</p>
<p>Also, a calculator was used for modeling the metabolic pathways, which has the following characteristics: cluster 2x Intel Xeon E5-2630v4 Broadwell-EP &#x00040; 2.20GHz 10 cores, 8x 16GB of RAM, 2400MHz, DDR4, ECC.</p>
</sec>
</sec>
<sec sec-type="results" id="s3">
<title>Results</title>
<p>As previously mentioned, ML models could have different applications in biology, including the identification of biomarkers, i.e., a valuable, quantitative component (metabolites, proteins, enzymes&#x02026;), within a metabolic pathway for health purposes (diseases diagnosis, treatment) or the optimization of a valuable production pathway. Therefore, we have targeted three different datasets based on these two applications. The first one concerns the lower part of glycolysis in <italic>Entamoeba histolytica</italic> (<xref ref-type="fig" rid="F3">Figure 3A</xref>) and contains a set of enzyme activities for which the final flux has been measured (Moreno-S&#x000E1;nchez et al., <xref ref-type="bibr" rid="B55">2008</xref>). The second pathway is the tryparedoxin-dependent hydroperoxide detoxification pathway in <italic>Trypanosoma cruzi</italic> (<xref ref-type="fig" rid="F3">Figure 3B</xref>), which provides the same type of data as in the previous dataset (Gonz&#x000E1;lez-Ch&#x000E1;vez et al., <xref ref-type="bibr" rid="B28">2015</xref>). It is important to consider how essential these two previous pathways are, as they play a significant role in the survival of these parasites. Given the small size of the experimental dataset, we use two gray-box models: one developed recently (Lo-Thong et al., <xref ref-type="bibr" rid="B50">2020</xref>) and the other developed in this study, to generate a larger dataset for these two pathways (<xref ref-type="supplementary-material" rid="SM3">Datasets 1</xref> and <xref ref-type="supplementary-material" rid="SM4">2</xref>) before building the ML models (<xref ref-type="fig" rid="F2">Figure 2</xref>).</p>
<p>The last metabolic pathway modeled here is the penicillin fermentation process in <italic>Penicillium chrysogenum</italic> (<xref ref-type="fig" rid="F3">Figure 3C</xref>). This dataset did not need to be enlarged (<xref ref-type="supplementary-material" rid="SM5">Dataset 3</xref>), and we used it to build different ML models (<xref ref-type="fig" rid="F2">Figure 2</xref>).</p>
<sec>
<title>Example 1: The Lower Part of <italic>Entamoeba histolytica</italic> Glycolysis</title>
<sec>
<title>The Gray-Box Model Allows the Building of Huge Datasets</title>
<p>Since the amount of experimental data is limited, the first step here is to build a robust model to generate more data.</p>
<p>As explained in the Methods section, the gray-box model developed in a previous work contains all kinetic parameters and kinetic equations of PGAM, ENO and PPDK (Lo-Thong et al., <xref ref-type="bibr" rid="B50">2020</xref>). In order to improve the flux prediction, the first two enzymes employ the Michaelis-Menten reversible rate equation, whereas the third employs a modified termolecular reaction reversible rate equation including an adjustment term in the denominator (<xref ref-type="supplementary-material" rid="SM1">Supplementary Table 1</xref>). The resulting fluxes show good reliability of the model to predict the final experimental flux (R<sup>2</sup> &#x02248; 0.95 and RMSE = 1.993 nmol&#x000B7;min<sup>&#x02212;1</sup>), even when enzyme activities are varied (<xref ref-type="supplementary-material" rid="SM1">Supplementary Figures 1A-C</xref>).</p>
<p>The calculation of residuals shows a defined pattern that is the same for PGAM and ENO. It reveals a general trend of the model to underestimate the flux for low enzyme activity values, and overestimate it for high enzyme activity values (<xref ref-type="supplementary-material" rid="SM1">Supplementary Figures 1D,E</xref>). Concerning PPDK, the gray-box model tends instead to underestimate the final flux when the enzyme activity is varied, with an exception for the last point (at 232.13 mU), which is overestimated (<xref ref-type="supplementary-material" rid="SM1">Supplementary Figure 1F</xref>). The model is quite accurate to predict the pathway flux and presents low residuals between&#x02212;3.4-4.7 nmol&#x000B7;min<sup>&#x02212;1</sup>.</p>
<p>The next step of this work consists of using the <italic>in-silico</italic> model for generating larger datasets, a process we call data augmentation. The first new dataset contains 2,000 enzyme balances evolving around the experimental ones (see <xref ref-type="supplementary-material" rid="SM2">Supplementary Table 6</xref>, <xref ref-type="supplementary-material" rid="SM1">Supplementary Figure 2</xref>). The term balance refers to a set of concentrations of the enzymes involved in the cascade of reactions. The predicted final fluxes vary between 0 and 60.84 nmol&#x000B7;min<sup>&#x02212;1</sup>; the distribution of the other data from the first dataset is shown in <xref ref-type="supplementary-material" rid="SM1">Supplementary Figure 3</xref>. In fact, the predicted fluxes count with the highest representation are within the experimental data of the reconstituted pathway (Moreno-S&#x000E1;nchez et al., <xref ref-type="bibr" rid="B55">2008</xref>) and <italic>in vivo</italic> pathway fluxes in live parasites (Pineda et al., <xref ref-type="bibr" rid="B62">2015</xref>). In order to compare the models, a second dataset (<xref ref-type="supplementary-material" rid="SM1">Dataset 1</xref>) is generated and includes 68,950 data for which all enzyme activity is varied between 0 and 1,000 mU (see <xref ref-type="supplementary-material" rid="SM1">Supplementary Tables 2</xref>, <xref ref-type="supplementary-material" rid="SM3">7</xref>). The final fluxes are then predicted and fluctuate between 0 and 215.45 nmol&#x000B7;min<sup>&#x02212;1</sup>; additional information is provided in <xref ref-type="supplementary-material" rid="SM1">Supplementary Table 2</xref>, <xref ref-type="supplementary-material" rid="SM1">Supplementary Figure 4</xref>.</p>
<p>We then plotted the final flux in function of the enzyme activity for the largest dataset (<xref ref-type="supplementary-material" rid="SM3">Supplementary Table 7</xref>) and obtained the same type of curve as we did previously (<xref ref-type="supplementary-material" rid="SM1">Supplementary Figures 2</xref>, <xref ref-type="supplementary-material" rid="SM1">5</xref>). Indeed, variations of PGAM activity have a great impact on the final flux, while those of ENO and PPDK have a lesser impact on the pathway flux (<xref ref-type="supplementary-material" rid="SM1">Supplementary Figure 5</xref>). It should also be noticed that the experimental fluxes are in the lower part of the predicted flux values. The insets show a gap between the experimental flux values and the dataset flux values; this difference is due to the intervals between two values, used in the two cases, with the interval being smaller for the experimental dots (7&#x02013;85 mU) than for the predicted data (25 mU). Following this initial analysis of the data, we assessed the correlation between the various variables. The table of correlation shows that the enzymes and the final flux are correlated to varying degrees, with the highest correlation coefficient for PGAM, followed by ENO, and the lowest coefficient for PPDK (<xref ref-type="table" rid="T2">Table 2</xref>). These linear correlation coefficients provide insight into the degree of non-linearity of this metabolic pathway. Here, only PPDK has a linear coefficient lower than 0.6 indicating that the lower part of glycolysis has a large degree of non-linearity. Also, even if the mean value of the correlations is above 0.5 (<xref ref-type="table" rid="T2">Table 2</xref>), we observe a weak linear correlation for many ranges of enzyme activity (<xref ref-type="fig" rid="F4">Figure 4</xref>) when one of the enzymes is varied over the three, for example for PPDK when PGAM varies between 0 and 625 mU and ENO between 0 and 1,000 mU (<xref ref-type="fig" rid="F4">Figure 4C</xref>). These results indicate significant non-linearity in the metabolic pathway, particularly for PPDK and ENO. In addition, these results lead to the same conclusions as those from flux control coefficient calculations (Lo-Thong et al., <xref ref-type="bibr" rid="B50">2020</xref>): the enzyme exerting the greatest flux control is PGAM, followed by ENO, and PPDK has the weakest control of the pathway flux.</p>
<table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p>Table of mean linear correlations between the enzyme activities and the predicted final flux (<italic>J</italic><sub><italic>pred</italic></sub>) for <xref ref-type="supplementary-material" rid="SM3">Dataset 1</xref>.</p></caption>
<table frame="hsides" rules="groups">
<thead><tr>
<th/>
<th valign="top" align="center"><italic><bold>J<sub>pred</sub></bold></italic></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">PGAM</td>
<td valign="top" align="center" style="background-color:#f2f3f3">0.90</td>
</tr>
<tr>
<td valign="top" align="left">ENO</td>
<td valign="top" align="center">0.85</td>
</tr>
<tr>
<td valign="top" align="left">PPDK</td>
<td valign="top" align="center" style="background-color:#f2f3f3">0.53</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="F4" position="float">
<label>Figure 4</label>
<caption><p>Evolution of linear correlation coefficient for each enzyme of <xref ref-type="supplementary-material" rid="SM3">Dataset 1</xref> (68,950 data). <bold>(A&#x02013;C)</bold> Variation of PGAM <bold>(A)</bold>, ENO <bold>(B)</bold> or PPDK <bold>(C)</bold> correlation coefficient between enzymes activities and the predicted final flux.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-05-744755-g0004.tif"/>
</fig>
<p>Good quality augmented datasets having been generated; they are used to test different ML approaches in the following section.</p>
</sec>
<sec>
<title>Non-linear Machine Learning Methods for Metabolic Pathway Modeling Outperform Rborist</title>
<p>Based on the preceding data, we also investigate whether we can build a good predictive model by using linear and non-linear ML methods. In the study cited previously, Artificial Neural Networks (ANN) were used to predict the flux (Lo-Thong et al., <xref ref-type="bibr" rid="B50">2020</xref>). Here, only one ANN model is developed and proves to be one of the best models obtained (<xref ref-type="table" rid="T3">Table 3</xref> and <xref ref-type="fig" rid="F5">Figure 5E</xref>). Among the designed models and for the first dataset (<xref ref-type="supplementary-material" rid="SM3">Supplementary Table 7</xref>), the random forest models stand out, with better flux prediction for the training set with the model built with Rborist package: cvRMSE = 0.883 nmol&#x000B7;min<sup>&#x02212;1</sup> and cvR<sup>2</sup> = 0.995, than the QRF model: cvRMSE = 0.931 nmol&#x000B7;min<sup>&#x02212;1</sup> and cvR<sup>2</sup> =0.994 (<xref ref-type="supplementary-material" rid="SM1">Supplementary Table 3</xref>, <xref ref-type="supplementary-material" rid="SM1">Supplementary Figures 6B,D</xref>). As for the test set, the QRF model outperforms the Rborist model, with RMSE = 0.076 nmol&#x000B7;min<sup>&#x02212;1</sup> and R<sup>2</sup> = 1. Another good model, also non-linear, is the XGBoost Linear method, with cvRMSE = 0.833 nmol&#x000B7;min<sup>&#x02212;1</sup> and cvR<sup>2</sup> = 0.995 (<xref ref-type="supplementary-material" rid="SM1">Supplementary Table 3</xref>, <xref ref-type="supplementary-material" rid="SM1">Supplementary Figure 6A</xref>). Moreover, the results obtained with Bayesian GLM, Lasso, Ridge, Spike-and-slab and the PLS model indicate that a linear model is not really adequate to describe this metabolic pathway. In fact, the PLS model gives the highest value for cvRMSE and the lowest value for cvR<sup>2</sup> (<xref ref-type="supplementary-material" rid="SM1">Supplementary Table 3</xref>); also, we can see that the flux predictions are not very good (<xref ref-type="supplementary-material" rid="SM1">Supplementary Figure 6M</xref>). For the second dataset (<xref ref-type="supplementary-material" rid="SM3">Supplementary Table 7</xref>), we obtained almost the same results: first with the Cubist model (cvRMSE = 0.215 nmol&#x000B7;min<sup>&#x02212;1</sup> and cvR<sup>2</sup> =1), then the two random forest models (<xref ref-type="table" rid="T3">Table 3</xref>). This time, better results are obtained with the QRF model: cvRMSE = 0.572 nmol&#x000B7;min<sup>&#x02212;1</sup> and cvR<sup>2</sup> = 1, than with the Rborist model: cvRMSE = 0.647 nmol&#x000B7;min<sup>&#x02212;1</sup> and cvR<sup>2</sup> = 1 for the training set (<xref ref-type="table" rid="T3">Table 3</xref> and <xref ref-type="fig" rid="F5">Figures 5A&#x02013;C</xref>). The XGBoost Linear method also gives good flux predictions, with cvRMSE = 0.489 nmol&#x000B7;min<sup>&#x02212;1</sup> and cvR<sup>2</sup> = 1 (<xref ref-type="table" rid="T3">Table 3</xref> and <xref ref-type="fig" rid="F5">Figure 5D</xref>). If the SVM Radial method gives almost good results (<xref ref-type="table" rid="T3">Table 3</xref> and <xref ref-type="fig" rid="F5">Figure 5F</xref>), it is no longer the case for the last two non-linear models (SVM Poly and bagEarth GCV) which present worse results in predicting flux, with much higher RMSE (<xref ref-type="table" rid="T3">Table 3</xref> and <xref ref-type="fig" rid="F5">Figures 5G,H</xref>).</p>
<table-wrap position="float" id="T3">
<label>Table 3</label>
<caption><p>Summary table of statistical measurements for each predictive model.</p></caption>
<table frame="hsides" rules="groups">
<thead><tr>
<th/>
<th valign="top" align="center" colspan="4" style="border-bottom: thin solid #000000;"><bold>Dataset 1</bold></th>
<th valign="top" align="center" colspan="4" style="border-bottom: thin solid #000000;"><bold>Dataset 2</bold></th>
<th valign="top" align="center" colspan="4" style="border-bottom: thin solid #000000;"><bold>Dataset 3</bold></th>
</tr>
<tr>
<th/>
<th valign="top" align="center" colspan="2" style="border-bottom: thin solid #000000;"><bold>Training set</bold></th>
<th valign="top" align="center" colspan="2" style="border-bottom: thin solid #000000;"><bold>Test set</bold></th>
<th valign="top" align="center" colspan="2" style="border-bottom: thin solid #000000;"><bold>Training set</bold></th>
<th valign="top" align="center" colspan="2" style="border-bottom: thin solid #000000;"><bold>Test set</bold></th>
<th valign="top" align="center" colspan="2" style="border-bottom: thin solid #000000;"><bold>Training set</bold></th>
<th valign="top" align="center" colspan="2" style="border-bottom: thin solid #000000;"><bold>Test set</bold></th>
</tr>
<tr>
<th valign="top" align="left"><bold>Model</bold></th>
<th valign="top" align="center"><bold>cvRMSE</bold></th>
<th valign="top" align="center"><bold>cvR<sup><bold>2</bold></sup></bold></th>
<th valign="top" align="center"><bold>RMSE</bold></th>
<th valign="top" align="center"><bold>R<sup><bold>2</bold></sup></bold></th>
<th valign="top" align="center"><bold>cvRMSE</bold></th>
<th valign="top" align="center"><bold>cvR<sup><bold>2</bold></sup></bold></th>
<th valign="top" align="center"><bold>RMSE</bold></th>
<th valign="top" align="center"><bold>R<sup><bold>2</bold></sup></bold></th>
<th valign="top" align="center"><bold>cvRMSE</bold></th>
<th valign="top" align="center"><bold>cvR<sup><bold>2</bold></sup></bold></th>
<th valign="top" align="center"><bold>RMSE</bold></th>
<th valign="top" align="center"><bold>R<sup><bold>2</bold></sup></bold></th>
</tr>
</thead>
<tbody>
<tr style="background-color:#a0c7dc">
<td valign="top" align="left"><bold>QRF (RF)</bold></td>
<td valign="top" align="center">0.572</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">0.218</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">0.183</td>
<td valign="top" align="center">0.996</td>
<td valign="top" align="center">0.022</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">0.814</td>
<td valign="top" align="center">0.993</td>
<td valign="top" align="center">0.134</td>
<td valign="top" align="center">1</td>
</tr>
<tr style="background-color:#a0c7dc">
<td valign="top" align="left"><bold>XGBoost Linear</bold></td>
<td valign="top" align="center">0.489</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">0.425</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">0.152</td>
<td valign="top" align="center">0.997</td>
<td valign="top" align="center">0.024</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">1.344</td>
<td valign="top" align="center">0.982</td>
<td valign="top" align="center">1.097</td>
<td valign="top" align="center">0.988</td>
</tr>
<tr style="background-color:#a0c7dc">
<td valign="top" align="left"><bold>Cubist</bold></td>
<td valign="top" align="center">0.215</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">0.154</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">0.128</td>
<td valign="top" align="center">0.998</td>
<td valign="top" align="center">0.057</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">1.22</td>
<td valign="top" align="center">0.985</td>
<td valign="top" align="center">1.224</td>
<td valign="top" align="center">0.985</td>
</tr>
<tr style="background-color:#a0c7dc">
<td valign="top" align="left"><bold>Rborist (RF)</bold></td>
<td valign="top" align="center">0.647</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">0.406</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">0.186</td>
<td valign="top" align="center">0.996</td>
<td valign="top" align="center">0.068</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">0.877</td>
<td valign="top" align="center">0.992</td>
<td valign="top" align="center">0.319</td>
<td valign="top" align="center">0.999</td>
</tr>
<tr style="background-color:#a0c7dc">
<td valign="top" align="left"><bold>ANN</bold></td>
<td valign="top" align="center">2.787</td>
<td valign="top" align="center">0.997</td>
<td valign="top" align="center">2.7</td>
<td valign="top" align="center">0.998</td>
<td valign="top" align="center">0.133</td>
<td valign="top" align="center">0.998</td>
<td valign="top" align="center">0.098</td>
<td valign="top" align="center">0.999</td>
<td valign="top" align="center">1.924</td>
<td valign="top" align="center">0.962</td>
<td valign="top" align="center">1.9</td>
<td valign="top" align="center">0.964</td>
</tr>
<tr style="background-color:#a0c7dc">
<td valign="top" align="left">SVM Radial</td>
<td valign="top" align="center">3.373</td>
<td valign="top" align="center">0.996</td>
<td valign="top" align="center">3.36</td>
<td valign="top" align="center">0.996</td>
<td valign="top" align="center">0.349</td>
<td valign="top" align="center">0.989</td>
<td valign="top" align="center">0.233</td>
<td valign="top" align="center">0.996</td>
<td valign="top" align="center">1.897</td>
<td valign="top" align="center">0.964</td>
<td valign="top" align="center">1.902</td>
<td valign="top" align="center">0.964</td>
</tr>
<tr style="background-color:#a0c7dc">
<td valign="top" align="left">SVM Poly</td>
<td valign="top" align="center">9.486</td>
<td valign="top" align="center">0.971</td>
<td valign="top" align="center">9.467</td>
<td valign="top" align="center">0.97</td>
<td valign="top" align="center">0.473</td>
<td valign="top" align="center">0.979</td>
<td valign="top" align="center">0.409</td>
<td valign="top" align="center">0.985</td>
<td valign="top" align="center">2.102</td>
<td valign="top" align="center">0.955</td>
<td valign="top" align="center">2.111</td>
<td valign="top" align="center">0.955</td>
</tr>
<tr style="background-color:#a0c7dc">
<td valign="top" align="left">bagEarth GCV (bagging MARS)</td>
<td valign="top" align="center">20.893</td>
<td valign="top" align="center">0.858</td>
<td valign="top" align="center">22.2</td>
<td valign="top" align="center">0.844</td>
<td valign="top" align="center">0.956</td>
<td valign="top" align="center">0.916</td>
<td valign="top" align="center">0.964</td>
<td valign="top" align="center">0.914</td>
<td valign="top" align="center">2.384</td>
<td valign="top" align="center">0.942</td>
<td valign="top" align="center">2.418</td>
<td valign="top" align="center">0.941</td>
</tr>
<tr style="background-color:#f2f3f3">
<td valign="top" align="left">Bayesian GLM</td>
<td valign="top" align="center">30.246</td>
<td valign="top" align="center">0.701</td>
<td valign="top" align="center">29.31</td>
<td valign="top" align="center">0.716</td>
<td valign="top" align="center">1.44</td>
<td valign="top" align="center">0.805</td>
<td valign="top" align="center">1.379</td>
<td valign="top" align="center">0.823</td>
<td valign="top" align="center">3.522</td>
<td valign="top" align="center">0.874</td>
<td valign="top" align="center">3.579</td>
<td valign="top" align="center">0.87</td>
</tr>
<tr style="background-color:#f2f3f3">
<td valign="top" align="left">Spike-and-slab</td>
<td valign="top" align="center">30.246</td>
<td valign="top" align="center">0.701</td>
<td valign="top" align="center">29.31</td>
<td valign="top" align="center">0.716</td>
<td valign="top" align="center">1.44</td>
<td valign="top" align="center">0.805</td>
<td valign="top" align="center">1.379</td>
<td valign="top" align="center">0.823</td>
<td valign="top" align="center">3.522</td>
<td valign="top" align="center">0.874</td>
<td valign="top" align="center">3.579</td>
<td valign="top" align="center">0.87</td>
</tr>
<tr style="background-color:#f2f3f3">
<td valign="top" align="left">Ridge</td>
<td valign="top" align="center">30.246</td>
<td valign="top" align="center">0.701</td>
<td valign="top" align="center">29.311</td>
<td valign="top" align="center">0.716</td>
<td valign="top" align="center">1.44</td>
<td valign="top" align="center">0.805</td>
<td valign="top" align="center">1.381</td>
<td valign="top" align="center">0.823</td>
<td valign="top" align="center">3.522</td>
<td valign="top" align="center">0.874</td>
<td valign="top" align="center">3.579</td>
<td valign="top" align="center">0.87</td>
</tr>
<tr style="background-color:#f2f3f3">
<td valign="top" align="left">Lasso</td>
<td valign="top" align="center">30.567</td>
<td valign="top" align="center">0.701</td>
<td valign="top" align="center">29.518</td>
<td valign="top" align="center">0.714</td>
<td valign="top" align="center">1.462</td>
<td valign="top" align="center">0.803</td>
<td valign="top" align="center">1.407</td>
<td valign="top" align="center">0.821</td>
<td valign="top" align="center">3.526</td>
<td valign="top" align="center">0.874</td>
<td valign="top" align="center">3.582</td>
<td valign="top" align="center">0.87</td>
</tr>
<tr style="background-color:#f2f3f3">
<td valign="top" align="left">PLS</td>
<td valign="top" align="center">30.246</td>
<td valign="top" align="center">0.701</td>
<td valign="top" align="center">29.309</td>
<td valign="top" align="center">0.716</td>
<td valign="top" align="center">1.581</td>
<td valign="top" align="center">0.765</td>
<td valign="top" align="center">1.55</td>
<td valign="top" align="center">0.777</td>
<td valign="top" align="center">4.046</td>
<td valign="top" align="center">0.834</td>
<td valign="top" align="center">4.12</td>
<td valign="top" align="center">0.828</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p><italic>RF, Random Forest. RMSE are in nmol&#x000B7;min<sup>&#x02212;1</sup>. Colors refer to: linear models (gray) and non-linear models (blue). Models in bold are the top five models for all datasets. <xref ref-type="supplementary-material" rid="SM3">Dataset 1</xref> corresponds to the lower part of Entamoeba histolytica glycolysis; <xref ref-type="supplementary-material" rid="SM4">Dataset 2</xref> to the peroxide detoxification pathway of Trypanosoma cruzi and <xref ref-type="supplementary-material" rid="SM5">Dataset 3</xref> to the industrial-scale penicillin fermentation process of Penicillium chrysogenum</italic>.</p>
</table-wrap-foot>
</table-wrap>
<fig id="F5" position="float">
<label>Figure 5</label>
<caption><p>Predictions of a mix of experimental and gray-box predicted flux by different predictive models. <bold>(A&#x02013;D)</bold> Flux from <xref ref-type="supplementary-material" rid="SM3">Dataset 1</xref> (<xref ref-type="supplementary-material" rid="SM3">Supplementary Table 7</xref>) predicted by the Cubist <bold>(A)</bold>, QRF <bold>(B)</bold>, Rborist <bold>(C)</bold>, XGBoost Linear <bold>(D)</bold>, ANN <bold>(E)</bold>, SVM Radial <bold>(F)</bold>, SVM Poly <bold>(G)</bold>, bagEarth GCV <bold>(H)</bold>, PLS <bold>(I)</bold>, Bayesian GLM <bold>(J)</bold>, Spike-and-slab <bold>(K)</bold>, Ridge <bold>(L)</bold> and Lasso <bold>(M)</bold> models. Gray circles: training set and blue triangles: test set. See <xref ref-type="table" rid="T3">Table 3</xref> for the statistical measurements of each model.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-05-744755-g0005.tif"/>
</fig>
<p>For the same reasons stated above, all linear models show poor results in predicting flux starting from enzyme activities, and are therefore not adequate to model the lower part of glycolysis here (<xref ref-type="fig" rid="F5">Figures 5I&#x02013;M</xref>). Overall and for <xref ref-type="supplementary-material" rid="SM3">Dataset 1</xref>, the Cubist model has the best generalization capability, with a lower RMSE = 0.154 nmol&#x000B7;min<sup>&#x02212;1</sup> and a higher R<sup>2</sup> = 1 for the test set (<xref ref-type="table" rid="T3">Table 3</xref>). These results show that the non-linear models, such as random forests, Cubist and XGBoost Linear, are able to indicate the final flux of the pathway by using the predicted data.</p>
</sec>
</sec>
<sec>
<title>Example 2: The Peroxide Detoxification Pathway of <italic>Trypanosoma cruzi</italic></title>
<sec>
<title>An <italic>ad hoc</italic> Gray-Box Model Allows Data Augmentation of Enzyme Activities and Flux</title>
<p>We look at modeling the second metabolic pathway, which can also be used for drug design purposes. In the gray-box model developed here around this second dataset, the first and third enzymes employ a modified kinetic equation including two different adjustment terms: &#x003B1; =23 and &#x000DF; = 8 (<xref ref-type="table" rid="T1">Table 1</xref>). The determination of these parameters is detailed in the Methods section. We obtained a relatively good model of flux prediction (R <sup>2</sup>&#x02248; 0.67 and RMSE = 4.668 nmol&#x000B7;min<sup>&#x02212;1</sup>) when enzyme activities are varied (<xref ref-type="supplementary-material" rid="SM1">Supplementary Figure 7</xref>). However, the model still overestimates the flux when TryR activity is varied and when TXNPx activity is higher than 698.35 mU. The new dataset contains 1,671 enzyme balances evolving around the experimental ones (<xref ref-type="supplementary-material" rid="SM4">Dataset 2</xref>, see <xref ref-type="supplementary-material" rid="SM4">Supplementary Table 8</xref>). The predicted final fluxes vary between 0 and 11.46 nmol&#x000B7;min<sup>&#x02212;1</sup>; the dataset&#x00027;s distribution is shown in <xref ref-type="supplementary-material" rid="SM1">Supplementary Figure 8</xref>, <xref ref-type="supplementary-material" rid="SM1">Supplementary Table 4</xref>. It is important to note that we could not go below 16.1 mU and 57.6 mU for TryR and TXNPx activity. The reason is that the gray-box model is not able to predict the flux below these values. Also, an analysis of the correlation between the different variable shows that TXN has the highest correlation coefficient, followed by TXNPx and lastly TryR (<xref ref-type="fig" rid="F6">Figure 6A</xref>). Here, these linear correlation coefficients point out the predominantly non-linear character of this metabolic pathway, when TryR orTXNPx activities is varied. The non-linear aspect of the peroxide detoxification pathway is certainly not to be negligeable, since the coefficient average, when all enzyme activities are varied, is lower than 0.6. These results support those obtained by Gonz&#x000E1;lez-Ch&#x000E1;vez et al. (<xref ref-type="bibr" rid="B28">2015</xref>, <xref ref-type="bibr" rid="B29">2019</xref>) which demonstrate that TXN and TXNPx exert the greatest control on the pathway&#x00027;s flux, while TryR exerts very little control on the flux.</p>
<fig id="F6" position="float">
<label>Figure 6</label>
<caption><p>Comparison of final flux predictions by different predictive models. <bold>(A)</bold> Linear correlation between enzyme activities (inputs) and the flux (output) of <xref ref-type="supplementary-material" rid="SM4">Dataset 2</xref> (<xref ref-type="supplementary-material" rid="SM4">Supplementary Table 8</xref>) when all enzyme activities are varied. Correlation coefficients are also calculated when only one enzyme activity is varied: 0.76 (TryR), 0.998 (TXN) and 0.97 (TXNPx). A perfect circle means that there is no linear correlation between the variables, while a straight line means that there is a perfect linear correlation between the variables. <bold>(B&#x02013;G)</bold> Flux variation as a function of the enzymatic activity of TryR <bold>(B, E)</bold>, TXN <bold>(C, F)</bold> and TXNPx <bold>(D, G)</bold>. Colored circles refer to predicted data from: QRF (dark blue), XGBoost Linear (light blue), Cubist (red), Rborist (yellow), ANN (orange), bagEarth GCV (light gray), SVM Poly (light green), SVM Radial (dark green), Bayesian GLM (purple), Spike-and-slab (brown), Ridge (black), Lasso (dark gray) and PLS (pink). A curve of the fitting experimental data is represented by the black curve. See <xref ref-type="table" rid="T3">Table 3</xref> for the statistical measurements of each model.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-05-744755-g0006.tif"/>
</fig>
<p>The augmented dataset is now used to test different ML approaches, as described in the following section.</p>
</sec>
<sec>
<title>Non-linear Machine Learning Methods Are Efficient for Flux Prediction</title>
<p>We built different ML models and evaluated their performance. Of the thirteen models built, only five predict well the flux for both training and test sets: the random forest (QRF and Rborist), XGBoost Linear, Cubist and ANN (<xref ref-type="fig" rid="F6">Figures 6B&#x02013;D</xref>, <xref ref-type="fig" rid="F7">7A-E</xref>). These models have a cvRMSE range of 0.128-0.186 nmol&#x000B7;min<sup>&#x02212;1</sup> and cvR<sup>2</sup> of 0.996-0.998 for the training set, and RMSE range of 0.022-0.098 nmol&#x000B7;min<sup>&#x02212;1</sup> and R<sup>2</sup> of 0.999-1 for the test set (<xref ref-type="table" rid="T3">Table 3</xref>). The following three models (SVM Radial, SVM Poly and bagEarth GCV) predict moderately well the flux of peroxide detoxification (<xref ref-type="fig" rid="F6">Figures 6B&#x02013;D</xref>, <xref ref-type="fig" rid="F7">7F&#x02013;H</xref>), with cvRMSE between 0.349 and 0.956 nmol&#x000B7;min<sup>&#x02212;1</sup>, and cvR<sup>2</sup> between 0.916 and 0.989 (<xref ref-type="table" rid="T3">Table 3</xref>). With the test set, their performance is slightly lower, with RMSE between 0.233 and 0.964 nmol&#x000B7;min<sup>&#x02212;1</sup> and R<sup>2</sup> between 0.914 and 0.996 (<xref ref-type="table" rid="T3">Table 3</xref>).</p>
<fig id="F7" position="float">
<label>Figure 7</label>
<caption><p>Predictions of gray-box predicted flux by different predictive models for <xref ref-type="supplementary-material" rid="SM4">Dataset 2</xref>. <bold>(A&#x02013;M)</bold> Flux from the second dataset (<xref ref-type="supplementary-material" rid="SM4">Supplementary Table 8</xref>) predicted by the QRF <bold>(A)</bold>, XGBoost Linear <bold>(B)</bold>, Cubist <bold>(C)</bold>, Rborist <bold>(D)</bold>, ANN <bold>(E)</bold>, SVM Radial <bold>(F)</bold>, SVM Poly <bold>(G)</bold>, bagEarth GCV <bold>(H)</bold>, Bayesian GLM <bold>(I)</bold>, Spike-and-slab <bold>(J)</bold>, Ridge <bold>(K)</bold>, Lasso <bold>(L)</bold> and PLS <bold>(M)</bold> models. Gray circles: training set, and blue triangles: test set. See <xref ref-type="table" rid="T3">Table 3</xref> for the statistical measurements of each model.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-05-744755-g0007.tif"/>
</fig>
<p>In contrast, the last five models can hardly predict the flux from enzymatic activities for both training and test sets, particularly for flux below 7.5 nmol&#x000B7;min<sup>&#x02212;1</sup> which is within the physiological and experimentally determined value (<xref ref-type="fig" rid="F6">Figures 6E&#x02013;G</xref>, <xref ref-type="fig" rid="F7">7I&#x02013;M</xref>). These models present higher RSME and lower R<sup>2</sup> values for the training set (cvRMSE range of 1.44-1.581 nmol&#x000B7;min<sup>&#x02212;1</sup> and cvR<sup>2</sup> range of 0.765-0.805) and test set (RMSE between 1.379 and 1.55 nmol&#x000B7;min<sup>&#x02212;1</sup> and R<sup>2</sup> range of 0.777-0.823), confirming their poorer performance not only in terms of learning but also in terms of generalization, in making robust predictions on new data (<xref ref-type="table" rid="T3">Table 3</xref>). We also observe that models Bayesian GLM, Spike-and-slab and Ridge give comparable results (<xref ref-type="table" rid="T3">Table 3</xref> and <xref ref-type="fig" rid="F7">Figures 7I&#x02013;K</xref>).</p>
<p>These results, together with those in example 1, allow us to confirm that non-linear models are more appropriate to predict the flux of a metabolic pathway than linear ones. Moreover, it should be noted that our gray-box models, built with COPASI, are non-linear models and that the data of <xref ref-type="supplementary-material" rid="SM3">Datasets 1</xref> and <xref ref-type="supplementary-material" rid="SM4">2</xref> are mostly obtained with these non-linear kinetic models. To ensure that the preceding results are not influenced by the kinetic model used to generate the data, we use a new raw dataset from experimental records of a bioreactor.</p>
</sec>
</sec>
<sec>
<title>Example 3: The Industrial-Scale Penicillin Fermentation Process of <italic>Penicillium chrysogenum</italic></title>
<p>In addition, another type of metabolic pathway we can examine is the production pathways; their modeling would allow the development of an optimized overall process. In fact, another study revealed that ML methods can accelerate the optimization of chemical synthesis (Hein, <xref ref-type="bibr" rid="B35">2021</xref>). As stated before, we do not need to enlarge this dataset, which is composed of records of the various parameters of an industrial-scale penicillin fermentation process. The use of this dataset made of only experimental data will ensure the reliability or not of the ML models for metabolic pathway prediction. It is important to consider that the inputs of our models are no longer the enzymatic activities, but different variables such as: batch time, oil flow, aeration rate, vessel volume and weight, carbon evolution rate and CO<sub>2</sub> percentage in off-gas. A slight variation of CO<sub>2</sub> in off-gas is recorded (<xref ref-type="supplementary-material" rid="SM5">Supplementary Table 9</xref>); this can be explained by the implementation of a system, by the operators, allowing corrective measures to be taken when the CO<sub>2</sub> level is too high, thus avoiding the detrimental effect of an accumulation of CO<sub>2</sub> on the growth of <italic>Penicillium chrysogenum</italic> and the production of penicillin. As the percentage of CO<sub>2</sub> in off-gas is maintained at a certain level, it is not surprising that the carbon evolution rate does not vary much either and presents a low standard deviation (<xref ref-type="supplementary-material" rid="SM1">Supplementary Table 5</xref>). Also, the output we are interested in is not the pathway flux, but the final concentration of penicillin (<xref ref-type="fig" rid="F3">Figure 3C</xref>). As regards the correlation coefficient between the variables, we note that it is generally high between the parameters and the final penicillin concentration (<xref ref-type="table" rid="T4">Table 4</xref>); this correlation can be positive (e.g., time) or negative (e.g., oil flow). These correlation coefficients reveal the linear nature of the fermentation process studied in <xref ref-type="supplementary-material" rid="SM5">Dataset 3</xref>.</p>
<table-wrap position="float" id="T4">
<label>Table 4</label>
<caption><p>Correlation table between the parameters of the bioreactor and the observed penicillin concentration for <xref ref-type="supplementary-material" rid="SM5">Dataset 3</xref>.</p></caption>
<table frame="hsides" rules="groups">
<thead><tr>
<th/>
<th valign="top" align="center"><italic><bold>Observed penicillin concentration</bold></italic></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Time</td>
<td valign="top" align="center" style="background-color:#f2f3f3">0.92</td>
</tr>
<tr>
<td valign="top" align="left">Oil flow</td>
<td valign="top" align="center">&#x02212;0.81</td>
</tr>
<tr>
<td valign="top" align="left">Aeration rate</td>
<td valign="top" align="center" style="background-color:#f2f3f3">0.78</td>
</tr>
<tr>
<td valign="top" align="left">Vessel weight</td>
<td valign="top" align="center" style="background-color:#f2f3f3">0.79</td>
</tr>
<tr>
<td valign="top" align="left">Carbon evolution rate</td>
<td valign="top" align="center" style="background-color:#f2f3f3">0.78</td>
</tr>
<tr>
<td valign="top" align="left">Vessel volume</td>
<td valign="top" align="center" style="background-color:#f2f3f3">0.76</td>
</tr>
<tr>
<td valign="top" align="left">CO<sub>2</sub> in off-gas</td>
<td valign="top" align="center" style="background-color:#f2f3f3">0.68</td>
</tr>
</tbody>
</table>
</table-wrap>
<sec>
<title>Non-linear Machine Learning Methods Predict the Fermentation Process Better Than Linear Methods</title>
<p>The results of penicillin concentration predictions reveal that Random Forest models effectively predict experimental concentrations, with cvRMSE = 0.814/0.877 g&#x000B7;L<sup>&#x02212;1</sup> and cvR<sup>2</sup> = 0.993/0.992 (QRF/Rborist) for the training set and RMSE = 0.134/0.319 g&#x000B7;L<sup>&#x02212;1</sup> and R<sup>2</sup> = 1/0.999 (QRF/Rborist) for the test set (<xref ref-type="table" rid="T3">Table 3</xref> and <xref ref-type="fig" rid="F8">Figures 8A,B</xref>).</p>
<fig id="F8" position="float">
<label>Figure 8</label>
<caption><p>Predictions of observed penicillin concentration by different predictive models. <bold>(A&#x02013;M)</bold> Flux from the third dataset (<xref ref-type="supplementary-material" rid="SM5">Supplementary Table 9</xref>) predicted by the QRF <bold>(A)</bold>, Rborist <bold>(B)</bold>, XGBoost Linear <bold>(C)</bold>, Cubist <bold>(D)</bold>, ANN <bold>(E)</bold>, SVM Radial <bold>(F)</bold>, SVM Poly <bold>(G)</bold>, bagEarth GCV <bold>(H)</bold>, Bayesian GLM <bold>(I)</bold>, Ridge <bold>(J)</bold>, Spike-and-slab <bold>(K)</bold>, Lasso <bold>(L)</bold> and PLS <bold>(M)</bold> models. Gray circles: training set, and blue triangles: test set. See <xref ref-type="table" rid="T3">Table 3</xref> for the statistical measurements of each model.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-05-744755-g0008.tif"/>
</fig>
<p>We can then separate the rest of the models into two groups, based on their performance on the test set. The first one, which predicts the penicillin concentration fairly well, has RMSE between 1.097 and 2.418 g&#x000B7;L<sup>&#x02212;1</sup>, and R<sup>2</sup> between 0.941 and 0.988 (<xref ref-type="table" rid="T3">Table 3</xref> and <xref ref-type="fig" rid="F8">Figures 8C&#x02013;H</xref>). By contrast, we found that the predictions of the second group are considerably worse, with many more outliers (<xref ref-type="fig" rid="F8">Figures 8I&#x02013;M</xref>), and with RSME higher than 3.5 g&#x000B7;L<sup>&#x02212;1</sup> and R<sup>2</sup> lower than 0.9 for the test set (<xref ref-type="table" rid="T3">Table 3</xref>). As noted in the previous dataset, we also found many models that give the same results, namely: Bayesian GLM, Spike-and-slab, Ridge and Lasso (<xref ref-type="table" rid="T3">Table 3</xref> and <xref ref-type="fig" rid="F8">Figures 8I&#x02013;L</xref>). Here also, Lasso and PLS were the worst in terms of predictions. Interestingly, compared to the preceding results, <xref ref-type="supplementary-material" rid="SM5">Dataset 3</xref> gives the best results for linear models (lowest RMSE and highest R<sup>2</sup> values for the training and test sets); this could be explained by the largely linear nature of the penicillin concentration used with respect to the parameters used. These results support the previous ones and confirm that non-linear models surpass linear models for the prediction of penicillin concentration through the fermentation process.</p>
</sec>
</sec>
<sec>
<title>Performance Comparison of All Models</title>
<p>After showing that non-linear ML methods are more suitable for modeling metabolic pathways, we performed a comparison of the performance of all models. At first glance, the plots further confirm the preceding results and display higher RMSE values and lower R<sup>2</sup> values for the linear models compared to non-linear models (<xref ref-type="fig" rid="F9">Figure 9</xref>). In addition, regardless of the number and/or type of data, we observe that Spike-and-slab, Ridge, Lasso and Bayesian GLM models give almost the same results (<xref ref-type="fig" rid="F9">Figure 9</xref> and <xref ref-type="table" rid="T3">Table 3</xref>). Also, it appears that some non-linear models work less well with large datasets; this is the case for ANN, bagEarth GCV, SVM Poly and SVM Radial (<xref ref-type="fig" rid="F9">Figure 9</xref>). Moreover, it appears that random forest models (QRF and Rborist) are the best suited for metabolic pathway modeling, as they give the best results in term of RMSE and R<sup>2</sup> whatever dataset was used. Furthermore, we can evaluate the impact of the degree of non-linearity of the pathway on the predictions. Indeed, the pathway that has a high non-linear structure (<xref ref-type="supplementary-material" rid="SM3">Dataset 1</xref>) gives worse results for linear models than the pathway that presents a less non-linear structure (<xref ref-type="supplementary-material" rid="SM5">Dataset 3</xref>), which also gives good results with non-linear models (<xref ref-type="fig" rid="F9">Figure 9A</xref> and <xref ref-type="table" rid="T3">Table 3</xref>). For example, <xref ref-type="supplementary-material" rid="SM3">Dataset 1</xref> performs less well with the Ridge model, with RMSE = 29.311 nmol&#x000B7;min<sup>&#x02212;1</sup> and R<sup>2</sup> = 0.716, than <xref ref-type="supplementary-material" rid="SM5">Dataset 3</xref>, which performs well with the same model, with RMSE = 3.579 nmol&#x000B7;min<sup>&#x02212;1</sup> and R<sup>2</sup> = 0.87.</p>
<fig id="F9" position="float">
<label>Figure 9</label>
<caption><p>Comparison of the RMSE and R<sup>2</sup> of the three datasets. <bold>(A,B)</bold> Variation of RMSE <bold>(A)</bold> and R<sup>2</sup> <bold>(B)</bold> values for the different models and for each dataset.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-05-744755-g0009.tif"/>
</fig>
<p>Besides, with a view to applying these methods at an industrial level, we perform a comparison of model error prediction and time of processing among the different datasets (<xref ref-type="fig" rid="F10">Figure 10</xref>). The results confirm the previous findings, where random forest models have the best performance for metabolic pathway flux prediction. We noted that Rborist model presents a better RMSE - time of processing ratio than QRF model. However, even if QRF models have a processing time higher than 1h, we obtain an RMSE gain of about 96 %, when comparing it with PLS model, which could be of considerable significance for the industrial level. In view of the considerable gain of using this method compared to a linear one, non-linear methods could be more beneficial at the industrial level, where a gain of 1% is colossal. Spike-and-slab, Ridge, Lasso and Bayesian GLM models result in comparable performance in terms of RMSE and time of processing. At least, these results show a better RMSE &#x02013; time of processing ratio for non-linear methods than for linear ones. We did not add the ANN models in the results, as they were not performed using parallelization process compared to the other methods.</p>
<fig id="F10" position="float">
<label>Figure 10</label>
<caption><p>Comparison of model processing time against RMSE of each dataset: <xref ref-type="supplementary-material" rid="SM3">Dataset 1</xref> (gray), <xref ref-type="supplementary-material" rid="SM4">Dataset 2</xref> (light blue) and <xref ref-type="supplementary-material" rid="SM5">Dataset 3</xref> (dark blue) for metabolic flux prediction.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-05-744755-g0010.tif"/>
</fig>
<p>Furthermore, we assess the impact of the amount of training data on ML model performance to have a desired level of performance (<xref ref-type="supplementary-material" rid="SM1">Supplementary Figure 9</xref>). We observe that the results are roughly the same for the datasets when they are predicted with linear models (<xref ref-type="supplementary-material" rid="SM1">Supplementary Figures 9B,D,E,H,I</xref>), thus the amount of data required to obtain a strong linear model can be higher than 80,000 data, as long as the studied pathway does not have a high degree of non-linearity. When it comes to non-linear models, we find that using a dataset smaller than 40,000 data is sufficient to obtain a good ML model (<xref ref-type="supplementary-material" rid="SM1">Supplementary Figures 9A,C,F,G,J&#x02013;L</xref>). Using a dataset higher than 40,000 data leads to non-linear models that are efficient only in case of random forests (QRF and Rborist), Cubist and XGboost Linear methods, for which RMSE is low. We could also consider making an ablation of our datasets to examine the impact of amount of training set data on the ML model performance.</p>
</sec>
</sec>
<sec sec-type="discussion" id="s4">
<title>Discussion</title>
<sec>
<title>Comparison and Applicability of Knowledge-Based and Data-Driven Approaches</title>
<p>The first objective of this study is to determine what sort of data-driven model could better simulate the biological pathways studied. By using different datasets, we build several models with the enzyme balances or parameters collected from a bioreactor and reveal that Random Forests (QRF and Rborist), Cubist and XGBoost Linear are three good methods to predict the final flux or concentration of a final product. This works is part of a larger study about the applicability of either a knowledge-based or a data-driven approach. Indeed, in other fields such as fault detection and diagnosis, a comparison of these two methods demonstrates that they both have comparable performance and can be used (Alzghoul et al., <xref ref-type="bibr" rid="B2">2014</xref>; Yang and Rizzoni, <xref ref-type="bibr" rid="B87">2016</xref>). In biological system modeling, as is the case here, we demonstrated that in instances where little knowledge is available and difficult to obtain on a large scale basis (e.g., kinetic parameters k<sub>cat</sub> and K<sub>m</sub> of an enzyme, pathway fluxes), or when complex feedback regulation mechanisms take place, a data-driven method can be a good alternative for modeling a metabolic pathway, as many authors have shown before (Ramachandran et al., <xref ref-type="bibr" rid="B65">2011</xref>; Hou et al., <xref ref-type="bibr" rid="B38">2016</xref>). By comparison, the knowledge-based method can be laborious and long, due to data mining from the literature or wet laboratory experiments, whereas there is ease and speed of building models with the data-driven method (Kadarmideen, <xref ref-type="bibr" rid="B40">2016</xref>).</p>
<p>Another criterion that we considered was the degree of non-linearity of the pathway. As mentioned above, it is generally admit that metabolic systems have an inherent non-linear behavior (Koza et al., <xref ref-type="bibr" rid="B43">2001</xref>; Song and Ramkrishna, <xref ref-type="bibr" rid="B76">2013</xref>; Yasemi and Jolicoeur, <xref ref-type="bibr" rid="B88">2021</xref>). However, there is no formal demonstration of the non-linear structure of metabolic pathways. According to Song and Ramkrishna, this non-linear behavior would be due to: (i) the non-linearity of the chemical reactions forming the pathway and (ii) the regulatory processes that added non-linearity to the system (Song and Ramkrishna, <xref ref-type="bibr" rid="B76">2013</xref>). Also, it is expected that pathway fluxes are non-linear, because they are controlled by enzymes and the activities of metabolic enzymes are saturable by their ligands. Besides, when the fluxes are measured in intact cells, they give a non-linear behavior and flux variation appears as hyperbolic or even sigmoidal. If the measured fluxes appear linear, it might be because the saturation point is not reached. Furthermore, according to the Metabolic Control Analysis, the fluxes are hyperbolic or non-linear because always exist one or two flux-controlling steps which utlimately determine the pathway flux (Fell, <xref ref-type="bibr" rid="B22">1992</xref>). The determination of linear correlation coefficients of the different variables of the datasets gives us insights into the degree of non-linearity of the studied metabolic pathways and provides a method to evaluate the non-linearity of metabolic pathways. We found that all metabolic pathways studied here have a notable non-linear structure, with <xref ref-type="supplementary-material" rid="SM3">Dataset 1</xref> having the highest degree of non-linearity, then <xref ref-type="supplementary-material" rid="SM4">Dataset 2</xref> and lastly <xref ref-type="supplementary-material" rid="SM5">Dataset 3</xref>. These results generally comfort the main hypothesis that metabolic pathways are predominantly non-linear. The determination of the degree of non-linearity is therefore important for selecting and applying of a ML technique when modeling a metabolic pathway.</p>
<p>Moreover, the suitability of using either method relies on the quantity and quality of the knowledge or the data. Here, to illustrate this point, we simulate two datasets: the first one consisting of an exploration of the experimental data (2,000 data) and the second one composed of enzyme activities from 0 to 1,000 mU (68,950 data). The largest one gives better predictions for the three best models (Random forests, Cubist and XGBoost Linear) than the other dataset, and shows us the importance of having a large dataset before using machine learning methods. In fact, the size of the training set has been shown to be a major driving factor of prediction accuracy (Somarathna et al., <xref ref-type="bibr" rid="B75">2017</xref>). However, we used two datasets made up of a mix of experimental and predicted data to build the models, and even if predicted from a good quality model, they remain mostly predicted data and are not comparable to a fully experimental dataset, which is also difficult to obtain. Thus, it would be worth considering methods using only experimental data, when sufficient data are available to build the models. Interestingly, a data-driven approach is often used to discover biological pathways or unravel pathways that are not well understood. Thus, combined with the knowledge-based approach, this can quickly make clear the complexity of biological systems modeling. Another possibility would be to test ML models on experimental data from <italic>E. coli</italic> or yeast, which can present a larger degree of non-linearity and are easily found in the literature. This issue will be addressed in our next study.</p>
<p>Surprisingly, model performance was weaker for the largest dataset from the bioreactor records than for the smaller datasets. The reason for this result may lie in the choice of input variables. Several studies have highlighted the need for variable selection in order to have better predictions (Camacho et al., <xref ref-type="bibr" rid="B10">2018</xref>; Awan et al., <xref ref-type="bibr" rid="B6">2019</xref>; Genuer et al.). Indeed, variable selection allows the use of the most informative variables to predict the output variable(s) and reduce the time of computing. Unlike the knowledge-based model, a diversity of variables for data-based models does not always mean better performance. This is one of the limitations of our study, since only one combination of input variables was tested during the work. It would be interesting for a future study to compare, for the same dataset, models using different sets of input variables, and to analyze their impact on model effectiveness.</p>
</sec>
<sec>
<title>Interpretability of Machine-Learning Approaches</title>
<p>Another major issue facing users of machine learning approaches is the interpretability of these models. Even if, at this time, we do not have a common general definition of this term, many researchers, such as Schmidt et al. (<xref ref-type="bibr" rid="B72">2019</xref>), define a model&#x00027;s interpretability based on two aspects: (a) intrinsic interpretability (or transparency): the ability to understand the inner mechanism of the model in the context of the study (e.g., identification of variables most involved in the predictions), and (b) <italic>post hoc</italic> interpretability: the ability to extract new information from the model or provide new insights into the relationships discovered during the process (e.g., the effect of a variable on another one) (Murdoch et al., <xref ref-type="bibr" rid="B58">2019</xref>; Schmidt et al., <xref ref-type="bibr" rid="B72">2019</xref>; Pintelas et al., <xref ref-type="bibr" rid="B63">2020</xref>). Although some ML methods, such as decision trees or linear regression models, are easily interpretable; this is not the case for most of the models developed here (e.g., XGBoost Linear, bagging MARS, ANN). Nevertheless, using the variables that are directly related to the variable to be predicted, as we do here, allows us to gain some understanding of how the model works and the types of relationships that are revealed, enabling us to rely on the models. Furthermore, while we identified Random forests as one of the best methods for predicting final flux or product concentration, Pintelas et al. (<xref ref-type="bibr" rid="B63">2020</xref>) classifies it as a model that is hard to interpret. Therefore, it would be interesting to compute variable importance or to apply different techniques to explain the model in order to increase its interpretability (Zhou et al., <xref ref-type="bibr" rid="B93">2019</xref>; Azodi et al., <xref ref-type="bibr" rid="B7">2020</xref>). Besides, knowing that models based on decision trees are among the simplest to interpret, we support the idea of Schmidt et al. that RF models are more accessible than others from an interpretability point of view (Schmidt et al., <xref ref-type="bibr" rid="B72">2019</xref>). An alternate solution would be to develop simpler models, but this would certainly reduce their overall performance.</p>
<p>Moreover, one of the key factors in the interpretability of the models is linked to the equations used. In fact, compared to knowledge-based models that use well-defined equations with a biological significance, ML models are governed by other equations, which sometimes are &#x0201C;outside our understanding&#x0201D; as Schmidt et al. (<xref ref-type="bibr" rid="B72">2019</xref>) observed in their study of the applications of ML in solid-state materials science. This raises a real problem of confidence in the prediction results obtained with such methods. As these authors point out, the fact that these models were not based on physical principles in their studies, or on biological principles in ours, could result in wrong predictions in completely unexpected cases, while providing great results overall. And in the present case where the models are used in the context of biomarker identification or optimization of an industrial bioreactor, we cannot risk obtaining such results from our models in these specific situations. Far from hindering us in the use of ML models, awareness of these problems allows us to formulate several recommendations for future research. These include the combination of interpretable models, e.g., knowledge-based kinetic models with ML models, e.g., random forests models; the prediction of a new set of experimental data with unexpected values. In this latter instance, this would require experimentally testing a range of &#x0201C;extreme&#x0201D; data that would be found in the parasites studied, or recording the bioreactor data even during failures of the penicillin production.</p>
</sec>
<sec>
<title>Strengths and Weaknesses of the Modeling Methods</title>
<p>After analyzing the interpretability of the different modeling methods, it is worthwhile to note some advantages and disadvantages of their use in flux and concentration prediction. One of the best methods in our case is the random forest (QRF and Rborist). Many studies report the use of random forest in the biological field for the prediction of: protein interaction (Qi et al., <xref ref-type="bibr" rid="B64">2006</xref>), drug response based on protein markers (Ma et al., <xref ref-type="bibr" rid="B51">2006</xref>) and <italic>in vitro</italic> drug sensitivity (Riddick et al., <xref ref-type="bibr" rid="B68">2011</xref>). Also, Riddick et al. used SVM and random forest to predict the flux of N<sub>2</sub>O emissions, and found that random forest achieves the best performances among the built models (Villa-Vialaneix et al., <xref ref-type="bibr" rid="B78">2010</xref>). They highlighted that these models offered the advantage of having a low computational cost, compared to the SVM method. However, in our case, we notice that random forest is the least accurate predictability model compared to SVM methods, with the highest computation time for almost all datasets. Moreover, among the random forest packages developed on R, Rborist is quite a recent implementation, designed for multicore hardware, which minimizes data movement within memory to increase the performance and decrease the processing time (Wright and Ziegler, <xref ref-type="bibr" rid="B82">2017</xref>). Surprisingly, here, Rborist package is the one that has the longest time of computation and is more efficient on big datasets compared to other methods. It would be of interest to create variant models combining the random forest method and other methods, as in previous studies (Chen et al., <xref ref-type="bibr" rid="B13">2018</xref>; Zampieri et al., <xref ref-type="bibr" rid="B90">2019</xref>). An existing variant of random forests is the quantile regression forest (QRF) method, which has the capability of establishing prediction intervals that cover uncertainties, useful in the prediction of possible new data (Meinshausen, <xref ref-type="bibr" rid="B53">2006</xref>). Francke et al. demonstrated in their work that this method had the advantage of calculating uncertainties associated with the predicted sediment yields, through the calculation of confidence intervals (Francke et al., <xref ref-type="bibr" rid="B24">2008</xref>). But they also stated that the model predictions will always be within the range of observations, which prevents implausible values but inhibits prediction outside the range of values learned from the training set. We saw here that, overall, QRF models have a good generalization capability; additional prediction of new experimental data, with data separated by a larger stepsize (&#x0003E;25), would be beneficial to confirm or invalidate this capability. This could be useful for the study of metabolic pathways in extremotolerant organisms.</p>
<p>This leads us to note one of the advantages not only of the QRF method but also of other ensemble learning methods, such as XGBoost Linear: prediction from high-dimensional data. Indeed, these models are among the best we have, with any starting dataset we have, from the simplest to the most complex with several types of variables. Remarkably, compared to other models, XGBoost Linear is better ranked for small datasets. This is confirmed by the work of Yang et al. (<xref ref-type="bibr" rid="B86">2010</xref>) which propose that ensemble methods have the advantage of reducing the potential for overfitting in small sample size problem. Another strength of XGBoost Linear compared to its peers is the combination of high accuracy and a short time of processing. However, despite the great accuracy of these models, they are often more complex and less interpretable, and present a higher computational intensity.</p>
<p>Moreover, Cubist, a model based on modified regression tree theory, has the advantage of analyzing big data with high speed (Xu et al., <xref ref-type="bibr" rid="B85">2018</xref>). This was confirmed by our results, which show that Cubist is one of our best models (e.g., for <xref ref-type="supplementary-material" rid="SM3">Dataset 1</xref>, Cubist: 2.49 min and QRF: 1.76 hr). However, we noted that the performance was better for the small datasets than for the bigger one. Another advantage that Das et al. noticed is the fact that the Cubist model is easy to interpret and is a suitable method for beginners (Zhou et al., <xref ref-type="bibr" rid="B93">2019</xref>; Das et al., <xref ref-type="bibr" rid="B20">2020</xref>).</p>
<p>The PLS method turned out not to be appropriate here to model these pathways and predict the final flux starting from enzyme activities, or the final product concentration starting from parameters of a bioreactor. This may be due to the inherent limitation of the PLS method to capture the non-linearities of the metabolic pathways. However, it performs better when we have a smaller dataset, as it has also been noted in a previous study on gluconeogenic flux prediction (Antoniewicz et al., <xref ref-type="bibr" rid="B5">2006</xref>). But these results contradict those obtained with the PLS model for the prediction of limonene and isopentenol synthesis. In fact, in this work, results showed that the model performed well when the dataset was larger (lower RMSE, better predictions) (Costello and Martin, <xref ref-type="bibr" rid="B16">2018</xref>). Also, one big advantage of the PLS technique remains that it has the shortest calculation time for modeling.</p>
<p>It is relevant to observe that the model implementation will differ depending on varying levels of data. In fact, a ML model will be more difficult to implement, if the available data is limited. In this case, a significant additional time is required. Among the various studied models, the difficulty to implement the model could also be based on the higher number of parameters to adjust during the training time.</p>
<p>Our findings generally support the idea that non-linear models are more suitable than linear ones for modeling metabolic pathways. Furthermore, it would be interesting to apply these ML models on genome-scale metabolic networks for which the literature abounds in data. Recently, hybrid models coupling a genome-scale model and ML model have been found to be effective for different purposes such as the prediction of individual amino acid concentration in culture medium (Schinn et al., <xref ref-type="bibr" rid="B71">2021</xref>) and identification of prognostic metabolic biomarkers in cancer studies (Lewis and Kemp, <xref ref-type="bibr" rid="B46">2021</xref>). One of the benefits that ML models could bring is the integration of multi-omics data as genomic, transcriptomic, metabolomic and proteomic data. This topic will be addressed in an upcoming study.</p>
<p>As far as we know, genome-scale models have a predominant place in the field of metabolic networks for the identification of key-molecules in the metabolism. This study allows us to consider the machine learning methods as performant models to predict metabolic pathways. Indeed, their ability to take over large datasets makes them applicable techniques to efficiently predict larger metabolic pathways (e.g., <italic>E. coli</italic>). While flux balance analysis (FBA) based methods, as used in the genome-scale models, need information about the pathway in a given condition as they are hypothesis-driven, machine learning models could predict the metabolic pathways without needing to clearly understand the underlying biological mechanisms of the pathways. Also, constraint-based model (e.g., FBA) are not able to predict metabolite concentrations, while the machine learning methods can consider these predictions. We can thus envisage a hybrid method using both machine learning and FBA methods for metabolic pathway modeling (Zampieri et al., <xref ref-type="bibr" rid="B90">2019</xref>).</p>
</sec>
<sec>
<title>Decision-Making Support for Pathway Modeling</title>
<p>Given the many different methods that exist and continue to emerge, one can struggle with the choice of a model to build from a dataset. Faced with this decision, we can choose to build simple models or to use models being used in the same field of study and giving good results (Camacho et al., <xref ref-type="bibr" rid="B10">2018</xref>; Cifuentes et al., <xref ref-type="bibr" rid="B15">2020</xref>). In view of this, it would be useful to review and define some basic rules for building a decision-making support for future studies on modeling metabolic pathways. The first feature to consider is the quality of the biological dataset (<xref ref-type="fig" rid="F11">Figure 11</xref>). Do we have an initial dataset of good quality? Data quality can highly impact the model predictions. If the model is not of good quality, it would be better to build a new dataset and generate good quality experimental data. When the dataset is of good quality but small in size, it is useful to do data augmentation, as we did in this study; if this is not possible, we can use an ensemble model to build the metabolic pathway, since such models can deal with small datasets. Another useful criterion we can investigate is the number of variables. If the dataset presents a high number of variables, we can consider doing variable selection before building the model, or we have the option of building the model by using ensemble modeling that gives good accuracy with several input variables. Also, one key factor is the non-linearity of the studied metabolic pathway; do we have a non-linear or a linear process? If our pathway is linear, we can design a battery of linear models which will give a high performance. But if our study involves a pathway that is non-linear, then it is preferable to use a non-linear model. After building our model, an evaluation of its accuracy is necessary to validate it. In case the performance of the model is not suitable, we can plan to refine it, for example by tuning the hyperparameters (Chicco, <xref ref-type="bibr" rid="B14">2017</xref>), or simply to replace it and build a new one.</p>
<fig id="F11" position="float">
<label>Figure 11</label>
<caption><p>Decision-making support for the construction of metabolic pathway models using machine learning methods.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-05-744755-g0011.tif"/>
</fig>
<p>Non-linear machine learning methods enable us to model metabolic pathways by identifying key-molecules, which are important for the drug-design process, improving disease diagnosis (cancer, viral/parasitic/bacterial infections, neurodegenerative diseases) by highlighting the differences between healthy and pathological situations, or even optimizing industrial production processes.</p>
</sec>
</sec>
<sec sec-type="data-availability" id="s5">
<title>Data Availability Statement</title>
<p>The data that support the findings of this study are available from the corresponding author upon reasonable request. The custom codes for the data analysis used in this study are available from the corresponding author in the Github repository: <ext-link ext-link-type="uri" xlink:href="https://github.com/ophelielt/Lo-Thong_et_al._Non-linearity_of_metabolic_pathways_influences_the_choice_of_ML.git">https://github.com/ophelielt/Lo-Thong_et_al._Non-linearity_of_metabolic_pathways_influences_the_choice_of_ML.git</ext-link>.</p>
</sec>
<sec id="s6">
<title>Author Contributions</title>
<p>FC, CD, and PC designed the method. OL-T-V, CD, PC, BG-P, XFC, ES, and FC participated in the design of the study and performed the analysis. OL-T-V and XFC wrote algorithms. OL-T-V, CD, PC, XFC, ES, and FC wrote and corrected the manuscript. All authors read and approved the final version of the manuscript.</p>
</sec>
<sec sec-type="funding-information" id="s7">
<title>Funding</title>
<p>OL-T-V was supported by a PhD grant from the Region Reunion and European Union (FEDER) under European Operational Program FEDER REUNION &#x02013; 2014/2020 file number 20171389, tiers 216275. Peaccel was supported through a research program partially co-funded by the European Union (UE) and Region Reunion (FEDER). Research at ES laboratory is supported by CONACyT-Mexico grant 282663. The funding agencies had no influence on the research process. XFC was supported by the UKRI CDT in AI for Healthcare <ext-link ext-link-type="uri" xlink:href="http://ai4health.io">http://ai4health.io</ext-link> (Grant No. P/S023283/1).</p>
</sec>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of Interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s8">
<title>Publisher&#x00027;s Note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
</body>
<back><sec sec-type="supplementary-material" id="s9">
<title>Supplementary Material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/frai.2022.744755/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/frai.2022.744755/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Data_Sheet_1.pdf" id="SM1" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table_1.XLSX" id="SM2" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table_2.XLSX" id="SM3" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table_3.XLSX" id="SM4" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" xmlns:xlink="http://www.w3.org/1999/xlink"/>
<supplementary-material xlink:href="Table_4.XLSX" id="SM5" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ajjolli Nagaraja</surname> <given-names>A.</given-names></name> <name><surname>Fontaine</surname> <given-names>N.</given-names></name> <name><surname>Delsaut</surname> <given-names>M.</given-names></name> <name><surname>Charton</surname> <given-names>P.</given-names></name> <name><surname>Damour</surname> <given-names>C.</given-names></name> <name><surname>Offmann</surname> <given-names>B.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>Flux prediction using artificial neural network (ANN) for the upper part of glycolysis</article-title>. <source>PLoS ONE</source> <volume>14</volume>, <fpage>e0216178</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0216178</pub-id><pub-id pub-id-type="pmid">31067238</pub-id></citation></ref>
<ref id="B2">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Alzghoul</surname> <given-names>A.</given-names></name> <name><surname>Backe</surname> <given-names>B.</given-names></name> <name><surname>L&#x000F6;fstrand</surname> <given-names>M.</given-names></name> <name><surname>Bystr&#x000F6;m</surname> <given-names>A.</given-names></name> <name><surname>Liljedahl</surname> <given-names>B.</given-names></name></person-group> (<year>2014</year>). <article-title>Comparing a knowledge-based and a data-driven method in querying data streams for system fault detection: a hydraulic drive system application</article-title>. <source>Comput. Ind.</source> <volume>65</volume>, <fpage>1126</fpage>&#x02013;<lpage>1135</lpage>. <pub-id pub-id-type="doi">10.1016/j.compind.2014.06.003</pub-id></citation>
</ref>
<ref id="B3">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Antoniewicz</surname> <given-names>M.</given-names></name> <name><surname>Kraynie</surname> <given-names>D.</given-names></name> <name><surname>Laffend</surname> <given-names>L.</given-names></name> <name><surname>Gonzalezlergier</surname> <given-names>J.</given-names></name> <name><surname>Kelleher</surname> <given-names>J.</given-names></name> <name><surname>Stephanopoulos</surname> <given-names>G.</given-names></name></person-group> (<year>2007</year>). <article-title>Metabolic flux analysis in a nonstationary system: Fed-batch fermentation of a high yielding strain of <italic>E. coli</italic> producing 1,3-propanediol</article-title>. <source>Metab. Eng.</source> <volume>9</volume>, <fpage>277</fpage>&#x02013;<lpage>292</lpage>. <pub-id pub-id-type="doi">10.1016/j.ymben.2007.01.003</pub-id><pub-id pub-id-type="pmid">17400499</pub-id></citation></ref>
<ref id="B4">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Antoniewicz</surname> <given-names>M. R.</given-names></name></person-group> (<year>2015</year>). <article-title>Methods and advances in metabolic flux analysis: a mini-review</article-title>. <source>J. Ind. Microbiol. Biotechnol.</source> <volume>9</volume>, <fpage>41</fpage>&#x02013;<lpage>52</lpage>. <pub-id pub-id-type="doi">10.1007/s10295-015-1585-x</pub-id><pub-id pub-id-type="pmid">25613286</pub-id></citation></ref>
<ref id="B5">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Antoniewicz</surname> <given-names>M. R.</given-names></name> <name><surname>Stephanopoulos</surname> <given-names>G.</given-names></name> <name><surname>Kelleher</surname> <given-names>J. K.</given-names></name></person-group> (<year>2006</year>). <article-title>Evaluation of regression models in metabolic physiology: predicting fluxes from isotopic data without knowledge of the pathway</article-title>. <source>Metabolomics</source> <volume>2</volume>, <fpage>41</fpage>&#x02013;<lpage>52</lpage>. <pub-id pub-id-type="doi">10.1007/s11306-006-0018-2</pub-id><pub-id pub-id-type="pmid">17066125</pub-id></citation></ref>
<ref id="B6">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Awan</surname> <given-names>S. E.</given-names></name> <name><surname>Bennamoun</surname> <given-names>M.</given-names></name> <name><surname>Sohel</surname> <given-names>F.</given-names></name> <name><surname>Sanfilippo</surname> <given-names>F. M.</given-names></name> <name><surname>Chow</surname> <given-names>B. J.</given-names></name> <name><surname>Dwivedi</surname> <given-names>G.</given-names></name></person-group> (<year>2019</year>). <article-title>Feature selection and transformation by machine learning reduce variable numbers and improve prediction for heart failure readmission or death</article-title>. <source>PLoS ONE</source> <volume>14</volume>, <fpage>e0218760</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0218760</pub-id><pub-id pub-id-type="pmid">31242238</pub-id></citation></ref>
<ref id="B7">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Azodi</surname> <given-names>C. B.</given-names></name> <name><surname>Tang</surname> <given-names>J.</given-names></name> <name><surname>Shiu</surname> <given-names>S. H.</given-names></name></person-group> (<year>2020</year>). <article-title>Opening the Black Box: Interpretable Machine Learning for Geneticists</article-title>. <source>Trends Genet.</source> <volume>36</volume>, <fpage>442</fpage>&#x02013;<lpage>455</lpage>. <pub-id pub-id-type="doi">10.1016/j.tig.2020.03.005</pub-id><pub-id pub-id-type="pmid">32396837</pub-id></citation></ref>
<ref id="B8">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Baranwal</surname> <given-names>M.</given-names></name> <name><surname>Magner</surname> <given-names>A.</given-names></name> <name><surname>Elvati</surname> <given-names>P.</given-names></name> <name><surname>Saldinger</surname> <given-names>J.</given-names></name> <name><surname>Violi</surname> <given-names>A.</given-names></name> <name><surname>Hero</surname> <given-names>A. O.</given-names></name></person-group> (<year>2020</year>). <article-title>A deep learning architecture for metabolic pathway prediction</article-title>. <source>Bioinformatics</source> <volume>36</volume>, <fpage>2547</fpage>&#x02013;<lpage>2553</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btz954</pub-id><pub-id pub-id-type="pmid">31879763</pub-id></citation></ref>
<ref id="B9">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Burbidge</surname> <given-names>R.</given-names></name> <name><surname>Trotter</surname> <given-names>M.</given-names></name> <name><surname>Buxton</surname> <given-names>B.</given-names></name> <name><surname>Holden</surname> <given-names>S.</given-names></name></person-group> (<year>2001</year>). <article-title>Drug design by machine learning: support vector machines for pharmaceutical data analysis</article-title>. <source>Comput. Chem.</source> <volume>26</volume>, <fpage>5</fpage>&#x02013;<lpage>14</lpage>. <pub-id pub-id-type="doi">10.1016/S0097-8485(01)00094-8</pub-id><pub-id pub-id-type="pmid">11765851</pub-id></citation></ref>
<ref id="B10">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Camacho</surname> <given-names>D. M.</given-names></name> <name><surname>Collins</surname> <given-names>K. M.</given-names></name> <name><surname>Powers</surname> <given-names>R. K.</given-names></name> <name><surname>Costello</surname> <given-names>J. C.</given-names></name> <name><surname>Collins</surname> <given-names>J. J.</given-names></name></person-group> (<year>2018</year>). <article-title>Next-generation machine learning for biological networks</article-title>. <source>Cell</source> <volume>173</volume>, <fpage>1581</fpage>&#x02013;<lpage>1592</lpage>. <pub-id pub-id-type="doi">10.1016/j.cell.2018.05.015</pub-id><pub-id pub-id-type="pmid">29887378</pub-id></citation></ref>
<ref id="B11">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cascante</surname> <given-names>M.</given-names></name> <name><surname>Boros</surname> <given-names>L. G.</given-names></name> <name><surname>Comin-Anduix</surname> <given-names>B.</given-names></name> <name><surname>de Atauri</surname> <given-names>P.</given-names></name> <name><surname>Centelles</surname> <given-names>J. J.</given-names></name> <name><surname>Lee</surname> <given-names>P. W.-N.</given-names></name></person-group> (<year>2002</year>). <article-title>Metabolic control analysis in drug discovery and disease</article-title>. <source>Nat. Biotechnol.</source> <volume>20</volume>, <fpage>243</fpage>&#x02013;<lpage>249</lpage>. <pub-id pub-id-type="doi">10.1038/nbt0302-243</pub-id><pub-id pub-id-type="pmid">11875424</pub-id></citation></ref>
<ref id="B12">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chance</surname> <given-names>B.</given-names></name></person-group> (<year>1943</year>). <article-title>The kinetics of the enzyme-substrate compound of peroxidase. 1943</article-title>. <source>Adv. Enzymol. Relat. Areas. Mol. Biol.</source> <volume>73</volume>, <fpage>3</fpage>&#x02013;<lpage>23</lpage>. <pub-id pub-id-type="pmid">10218104</pub-id></citation></ref>
<ref id="B13">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>Z.</given-names></name> <name><surname>He</surname> <given-names>N.</given-names></name> <name><surname>Huang</surname> <given-names>Y.</given-names></name> <name><surname>Qin</surname> <given-names>W. T.</given-names></name> <name><surname>Liu</surname> <given-names>X.</given-names></name> <name><surname>Li</surname> <given-names>L.</given-names></name></person-group> (<year>2018</year>). <article-title>Integration of a deep learning classifier with a random forest approach for predicting malonylation sites</article-title>. <source>Genomics Proteomics Bioinform.</source> <volume>16</volume>, <fpage>451</fpage>&#x02013;<lpage>459</lpage>. <pub-id pub-id-type="doi">10.1016/j.gpb.2018.08.004</pub-id><pub-id pub-id-type="pmid">30639696</pub-id></citation></ref>
<ref id="B14">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chicco</surname> <given-names>D.</given-names></name></person-group> (<year>2017</year>). <article-title>Ten quick tips for machine learning in computational biology</article-title>. <source>BioData Min.</source> <volume>10</volume>, <fpage>35</fpage>. <pub-id pub-id-type="doi">10.1186/s13040-017-0155-3</pub-id><pub-id pub-id-type="pmid">29234465</pub-id></citation></ref>
<ref id="B15">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cifuentes</surname> <given-names>J.</given-names></name> <name><surname>Marulanda</surname> <given-names>G.</given-names></name> <name><surname>Bello</surname> <given-names>A.</given-names></name> <name><surname>Reneses</surname> <given-names>J.</given-names></name></person-group> (<year>2020</year>). <article-title>Air temperature forecasting using machine learning techniques: a review</article-title>. <source>Energies</source> <volume>13</volume>, <fpage>4215</fpage>. <pub-id pub-id-type="doi">10.3390/en13164215</pub-id></citation>
</ref>
<ref id="B16">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Costello</surname> <given-names>Z.</given-names></name> <name><surname>Martin</surname> <given-names>H. G.</given-names></name></person-group> (<year>2018</year>). <article-title>A machine learning approach to predict metabolic pathway dynamics from time-series multiomics data</article-title>. <source>npj Syst. Biol. Appl.</source> <volume>4</volume>, <fpage>19</fpage>. <pub-id pub-id-type="doi">10.1038/s41540-018-0054-3</pub-id><pub-id pub-id-type="pmid">29872542</pub-id></citation></ref>
<ref id="B17">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cuperlovic-Culf</surname> <given-names>M.</given-names></name></person-group> (<year>2018</year>). <article-title>Machine learning methods for analysis of metabolic data and metabolic pathway</article-title>. <source>Modeling</source>. <volume>16</volume>, <fpage>1</fpage>&#x02013;<lpage>16</lpage>. <pub-id pub-id-type="doi">10.3390/metabo8010004</pub-id><pub-id pub-id-type="pmid">29324649</pub-id></citation></ref>
<ref id="B18">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Curto</surname> <given-names>R.</given-names></name> <name><surname>Voit</surname> <given-names>E. O.</given-names></name> <name><surname>Sorribas</surname> <given-names>A.</given-names></name> <name><surname>Cascante</surname> <given-names>M.</given-names></name></person-group> (<year>1997</year>). <article-title>Validation and steady-state analysis of a power-law model of purine metabolism in man</article-title>. <source>Biochem. J.</source> <volume>324</volume>, <fpage>761</fpage>&#x02013;<lpage>775</lpage>. <pub-id pub-id-type="doi">10.1042/bj3240761</pub-id><pub-id pub-id-type="pmid">9210399</pub-id></citation></ref>
<ref id="B19">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Curto</surname> <given-names>R. O.</given-names></name> <name><surname>Voit</surname> <given-names>E.</given-names></name> <name><surname>Sorribas</surname> <given-names>A.</given-names></name> <name><surname>Cascante</surname> <given-names>M.</given-names></name></person-group> (<year>1998</year>). <article-title>Mathematical models of purine metabolism in man</article-title>. <source>Math. Biosci.</source> <volume>151</volume>, <fpage>1</fpage>&#x02013;<lpage>49</lpage>. <pub-id pub-id-type="doi">10.1016/S0025-5564(98)10001-9</pub-id><pub-id pub-id-type="pmid">9664759</pub-id></citation></ref>
<ref id="B20">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Das</surname> <given-names>S.</given-names></name> <name><surname>Sun</surname> <given-names>X.</given-names></name> <name><surname>Sun</surname> <given-names>M.</given-names></name></person-group> (<year>2020</year>). <article-title>Rule-based safety prediction models for rural two-lane run-off-road crashes</article-title>. <source>Int. J. Transp. Sci. Technol.</source> <volume>10</volume>, <fpage>235</fpage>&#x02013;<lpage>244</lpage>. <pub-id pub-id-type="doi">10.1016/j.ijtst.2020.08.001</pub-id></citation>
</ref>
<ref id="B21">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Drysch</surname> <given-names>A.</given-names></name> <name><surname>El Massaoudi</surname> <given-names>M.</given-names></name> <name><surname>Mack</surname> <given-names>C.</given-names></name> <name><surname>Takors</surname> <given-names>R.</given-names></name> <name><surname>de Graaf</surname> <given-names>A. A.</given-names></name> <name><surname>Sahm</surname> <given-names>H.</given-names></name></person-group> (<year>2003</year>). <article-title>Production process monitoring by serial mapping of microbial carbon flux distributions using a novel Sensor Reactor approach: II&#x02212;13C-labeling-based metabolic flux analysis and l-lysine production</article-title>. <source>Metab. Eng.</source> <volume>5</volume>, <fpage>96</fpage>&#x02013;<lpage>107</lpage>. <pub-id pub-id-type="doi">10.1016/S1096-7176(03)00005-3</pub-id><pub-id pub-id-type="pmid">12850132</pub-id></citation></ref>
<ref id="B22">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fell</surname> <given-names>D. A.</given-names></name></person-group> (<year>1992</year>). <article-title>Metabolic control analysis: a survey of its theoretical and experimental development</article-title>. <source>Biochem. J.</source> <volume>286</volume>, <fpage>313</fpage>&#x02013;<lpage>330</lpage>. <pub-id pub-id-type="doi">10.1042/bj2860313</pub-id><pub-id pub-id-type="pmid">1530563</pub-id></citation></ref>
<ref id="B23">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fell</surname> <given-names>D. A.</given-names></name> <name><surname>Small</surname> <given-names>J. R.</given-names></name></person-group> (<year>1986</year>). <article-title>Fat synthesis in adipose tissue. An examination of stoichiometric constraints</article-title>. <source>Biochem. J.</source> <volume>238</volume>, <fpage>781</fpage>&#x02013;<lpage>786</lpage>. <pub-id pub-id-type="doi">10.1042/bj2380781</pub-id><pub-id pub-id-type="pmid">3800960</pub-id></citation></ref>
<ref id="B24">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Francke</surname> <given-names>T.</given-names></name> <name><surname>L&#x000F3;pez-Taraz&#x000F3;n</surname> <given-names>J. A.</given-names></name> <name><surname>Schr&#x000F6;der</surname> <given-names>B.</given-names></name></person-group> (<year>2008</year>). <article-title>Estimation of suspended sediment concentration and yield using linear models, random forests and quantile regression forests</article-title>. <source>Hydrol. Process.</source> <volume>22</volume>, <fpage>4892</fpage>&#x02013;<lpage>4904</lpage>. <pub-id pub-id-type="doi">10.1002/hyp.7110</pub-id></citation>
</ref>
<ref id="B25">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Garfinkel</surname> <given-names>D.</given-names></name> <name><surname>Garfinkel</surname> <given-names>L.</given-names></name> <name><surname>Pring</surname> <given-names>M.</given-names></name> <name><surname>Green</surname> <given-names>S. B.</given-names></name> <name><surname>Chance</surname> <given-names>B.</given-names></name></person-group> (<year>1970</year>). <article-title>Computer applications to biochemical kinetics</article-title>. <source>Annu. Rev. Biochem.</source> <volume>39</volume>, <fpage>473</fpage>&#x02013;<lpage>498</lpage>. <pub-id pub-id-type="doi">10.1146/annurev.bi.39.070170.002353</pub-id><pub-id pub-id-type="pmid">4921695</pub-id></citation></ref>
<ref id="B26">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Genuer</surname> <given-names>R.</given-names></name> <name><surname>Poggi</surname> <given-names>J. -M.</given-names></name> <name><surname>Tuleau-Malot</surname> <given-names>C.</given-names></name></person-group> (<year>2010</year>). <article-title>Variable selection using random forests</article-title>. <source>Patt. Recogn. Lett</source>. <volume>31</volume>, <fpage>2225</fpage>&#x02013;<lpage>2236</lpage>. <pub-id pub-id-type="doi">10.1016/j.patrec.2010.03.014</pub-id></citation>
</ref>
<ref id="B27">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Goldrick</surname> <given-names>S.</given-names></name> <name><surname>Stefan</surname> <given-names>A.</given-names></name> <name><surname>Lovett</surname> <given-names>D.</given-names></name> <name><surname>Montague</surname> <given-names>G.</given-names></name> <name><surname>Lennox</surname> <given-names>B.</given-names></name></person-group> (<year>2015</year>). <article-title>The development of an industrial-scale fed-batch fermentation simulation</article-title>. <source>J. Biotechnol.</source> <volume>193</volume>, <fpage>70</fpage>&#x02013;<lpage>82</lpage>. <pub-id pub-id-type="doi">10.1016/j.jbiotec.2014.10.029</pub-id><pub-id pub-id-type="pmid">25449107</pub-id></citation></ref>
<ref id="B28">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gonz&#x000E1;lez-Ch&#x000E1;vez</surname> <given-names>Z.</given-names></name> <name><surname>Olin-Sandoval</surname> <given-names>V.</given-names></name> <name><surname>Rod&#x000ED;guez-Zavala</surname> <given-names>J. S.</given-names></name> <name><surname>Moreno-S&#x000E1;nchez</surname> <given-names>R.</given-names></name> <name><surname>Saavedra</surname> <given-names>E.</given-names></name></person-group> (<year>2015</year>). <article-title>Metabolic control analysis of the Trypanosoma cruzi peroxide detoxification pathway identifies tryparedoxin as a suitable drug target</article-title>. <source>Biochim. Biophys. Acta</source> <volume>1850</volume>, <fpage>263</fpage>&#x02013;<lpage>273</lpage>. <pub-id pub-id-type="doi">10.1016/j.bbagen.2014.10.029</pub-id><pub-id pub-id-type="pmid">25450181</pub-id></citation></ref>
<ref id="B29">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gonz&#x000E1;lez-Ch&#x000E1;vez</surname> <given-names>Z.</given-names></name> <name><surname>V&#x000E1;zquez</surname> <given-names>C.</given-names></name> <name><surname>Mejia-Tlachi</surname> <given-names>M.</given-names></name> <name><surname>M&#x000E1;rquez-Due&#x000F1;as</surname> <given-names>C.</given-names></name> <name><surname>Manning-Cela</surname> <given-names>R.</given-names></name> <name><surname>Encalada</surname> <given-names>R.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>Gamma-glutamylcysteine synthetase and tryparedoxin 1 exert high control on the antioxidant system in Trypanosoma cruzi contributing to drug resistance and infectivity</article-title>. <source>Redox Biol.</source> <volume>26</volume>, <fpage>101231</fpage>. <pub-id pub-id-type="doi">10.1016/j.redox.2019.101231</pub-id><pub-id pub-id-type="pmid">31203195</pub-id></citation></ref>
<ref id="B30">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hartwell</surname> <given-names>L. H.</given-names></name> <name><surname>Hopfield</surname> <given-names>J. J.</given-names></name> <name><surname>Leibler</surname> <given-names>S.</given-names></name> <name><surname>Murray</surname> <given-names>A. W.</given-names></name></person-group> (<year>1999</year>). <article-title>From molecular to modular cell biology</article-title>. <source>Nature</source> <volume>402</volume>, <fpage>C47</fpage>&#x02013;<lpage>C52</lpage>. <pub-id pub-id-type="doi">10.1038/35011540</pub-id><pub-id pub-id-type="pmid">10591225</pub-id></citation></ref>
<ref id="B31">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hatzimanikatis</surname> <given-names>V.</given-names></name> <name><surname>Bailey</surname> <given-names>J. E.</given-names></name></person-group> (<year>1997</year>). <article-title>Effects of spatiotemporal variations on metabolic control: approximate analysis using (log)linear kinetic models</article-title>. <source>Biotechnol. Bioeng.</source> <volume>54</volume>, <fpage>91</fpage>&#x02013;<lpage>104</lpage>. <pub-id pub-id-type="doi">10.1002/(SICI)1097-0290(19970420)54:2&#x0003C;91::AID-BIT1&#x0003E;3.0.CO</pub-id><pub-id pub-id-type="pmid">18634077</pub-id></citation></ref>
<ref id="B32">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hatzimanikatis</surname> <given-names>V.</given-names></name> <name><surname>Emmerling</surname> <given-names>M.</given-names></name> <name><surname>Sauer</surname> <given-names>U.</given-names></name> <name><surname>Bailey</surname> <given-names>J. E.</given-names></name></person-group> (<year>1998</year>). <article-title>Application of mathematical tools for metabolic design of microbial ethanol production</article-title>. <source>Biotechnol. Bioeng.</source> <volume>58</volume>, <fpage>154</fpage>&#x02013;<lpage>61</lpage>. <pub-id pub-id-type="doi">10.1002/(sici)1097-0290(19980420)58:2/3&#x0003C;154::aid-bit7&#x0003E;3.0.co</pub-id><pub-id pub-id-type="pmid">10191385</pub-id></citation></ref>
<ref id="B33">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Heckmann</surname> <given-names>D.</given-names></name></person-group> (<year>2018</year>). <article-title>Machine learning applied to enzyme turnover numbers reveals protein structural correlates and improves metabolic models</article-title>. <source>Nat. Commun.</source> <volume>10</volume>, <fpage>1</fpage>&#x02013;<lpage>10</lpage>. <pub-id pub-id-type="doi">10.1038/s41467-018-07652-6</pub-id><pub-id pub-id-type="pmid">30531987</pub-id></citation></ref>
<ref id="B34">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Heijnen</surname> <given-names>J. J.</given-names></name></person-group> (<year>2005</year>). <article-title>Approximative kinetic formats used in metabolic network modeling</article-title>. <source>Biotechnol. Bioeng.</source> <volume>91</volume>, <fpage>534</fpage>&#x02013;<lpage>545</lpage>. <pub-id pub-id-type="doi">10.1002/bit.20558</pub-id><pub-id pub-id-type="pmid">16003779</pub-id></citation></ref>
<ref id="B35">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hein</surname> <given-names>J. E.</given-names></name></person-group> (<year>2021</year>). <article-title>Machine learning made easy for optimizing chemical reactions</article-title>. <source>Nature</source> <volume>590</volume>, <fpage>40</fpage>&#x02013;<lpage>41</lpage>. <pub-id pub-id-type="doi">10.1038/d41586-021-00209-6</pub-id><pub-id pub-id-type="pmid">33536642</pub-id></citation></ref>
<ref id="B36">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Heinrich</surname> <given-names>R.</given-names></name> <name><surname>Rapoport</surname> <given-names>T. A.</given-names></name></person-group> (<year>1974</year>). <article-title>A linear steady-state treatment of enzymatic chains. General properties, control and effector strength</article-title>. <source>Eur J Biochem</source> <volume>42</volume>, <fpage>89</fpage>&#x02013;<lpage>95</lpage>. <pub-id pub-id-type="doi">10.1111/j.1432-1033.1974.tb03318.x</pub-id><pub-id pub-id-type="pmid">4830198</pub-id></citation></ref>
<ref id="B37">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hoops</surname> <given-names>S.</given-names></name> <name><surname>Sahle</surname> <given-names>S.</given-names></name> <name><surname>Gauges</surname> <given-names>R.</given-names></name> <name><surname>Lee</surname> <given-names>C.</given-names></name> <name><surname>Pahle</surname> <given-names>J.</given-names></name> <name><surname>Simus</surname> <given-names>N.</given-names></name> <etal/></person-group>. (<year>2006</year>). <article-title>COPASI&#x02013;a COmplex PAthway SImulator</article-title>. <source>Bioinformatics</source> <volume>22</volume>, <fpage>3067</fpage>&#x02013;<lpage>3074</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btl485</pub-id><pub-id pub-id-type="pmid">17032683</pub-id></citation></ref>
<ref id="B38">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hou</surname> <given-names>J.</given-names></name> <name><surname>Acharya</surname> <given-names>L.</given-names></name> <name><surname>Zhu</surname> <given-names>D.</given-names></name> <name><surname>Cheng</surname> <given-names>J.</given-names></name></person-group> (<year>2016</year>). <article-title>An overview of bioinformatics methods for modeling biological pathways in yeast</article-title>. <source>Brief. Funct. Genomics</source> <volume>15</volume>, <fpage>95</fpage>&#x02013;<lpage>108</lpage>. <pub-id pub-id-type="doi">10.1093/bfgp/elv040</pub-id><pub-id pub-id-type="pmid">26476430</pub-id></citation></ref>
<ref id="B39">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kacser</surname> <given-names>H.</given-names></name> <name><surname>Burns</surname> <given-names>J. A.</given-names></name> <name><surname>Kacser</surname> <given-names>H.</given-names></name> <name><surname>Fell</surname> <given-names>D. A.</given-names></name></person-group> (<year>1995</year>). <article-title>The control of flux</article-title>. <source>Biochem. Soc. Trans.</source> <volume>23</volume>, <fpage>341</fpage>&#x02013;<lpage>366</lpage>. <pub-id pub-id-type="doi">10.1042/bst0230341</pub-id><pub-id pub-id-type="pmid">7672373</pub-id></citation></ref>
<ref id="B40">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Kadarmideen</surname> <given-names>H. N.</given-names></name></person-group> (<year>2016</year>). <source>Systems Biology in Animal Production and Health</source>, <volume>Vol. 2</volume>. <publisher-loc>Cham</publisher-loc>: <publisher-name>Springer International Publishing</publisher-name>. <fpage>136</fpage>&#x02013;<lpage>143</lpage>.</citation>
</ref>
<ref id="B41">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kim</surname> <given-names>G. B.</given-names></name> <name><surname>Kim</surname> <given-names>W. J.</given-names></name> <name><surname>Kim</surname> <given-names>H. U.</given-names></name> <name><surname>Lee</surname> <given-names>S. Y.</given-names></name></person-group> (<year>2020</year>). <article-title>Machine learning applications in systems metabolic engineering</article-title>. <source>Curr. Opin. Biotechnol.</source> <volume>64</volume>, <fpage>1</fpage>&#x02013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1016/j.copbio.2019.08.010</pub-id><pub-id pub-id-type="pmid">31580992</pub-id></citation></ref>
<ref id="B42">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kotera</surname> <given-names>M.</given-names></name> <name><surname>Tabei</surname> <given-names>Y.</given-names></name> <name><surname>Yamanishi</surname> <given-names>Y.</given-names></name> <name><surname>Tokimatsu</surname> <given-names>T.</given-names></name> <name><surname>Goto</surname> <given-names>S.</given-names></name></person-group> (<year>2013</year>). <article-title>Supervised de novo reconstruction of metabolic pathways from metabolome-scale compound sets</article-title>. <source>Bioinformatics</source> <volume>29</volume>, <fpage>i135</fpage>&#x02013;<lpage>i144</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btt244</pub-id><pub-id pub-id-type="pmid">23812977</pub-id></citation></ref>
<ref id="B43">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Koza</surname> <given-names>J. R.</given-names></name> <name><surname>Mydlowec</surname> <given-names>W.</given-names></name> <name><surname>Lanza</surname> <given-names>G.</given-names></name> <name><surname>Yu</surname> <given-names>J.</given-names></name> <name><surname>Keane</surname> <given-names>M. A.</given-names></name></person-group> (<year>2001</year>). <article-title>Automatic synthesis of both the topology and sizing of metabolic pathways using genetic programming</article-title>, in <source>Proceedings of the 3rd Annual Conference on Genetic and Evolutionary Computation (GECCO&#x00027;01)</source> (<publisher-loc>San Francisco, CA</publisher-loc>: <publisher-name>Morgan Kaufmann Publishers Inc.</publisher-name>), <fpage>57</fpage>&#x02013;<lpage>65</lpage>.</citation>
</ref>
<ref id="B44">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Kuhn</surname> <given-names>M.</given-names></name></person-group> (<year>2020</year>). <source>Caret: Classification and Regression Training. R package version 6.0-86</source>. Available online at: <ext-link ext-link-type="uri" xlink:href="https://CRAN.R-project.org/package=care">https://CRAN.R-project.org/package=care</ext-link> (accessed July 01, 2019).</citation>
</ref>
<ref id="B45">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Leighty</surname> <given-names>R. W.</given-names></name> <name><surname>Antoniewicz</surname> <given-names>M. R.</given-names></name></person-group> (<year>2011</year>). <article-title>Dynamic metabolic flux analysis (DMFA): A framework for determining fluxes at metabolic non-steady state</article-title>. <source>Metab. Eng.</source> <volume>13</volume>, <fpage>745</fpage>&#x02013;<lpage>755</lpage>. <pub-id pub-id-type="doi">10.1016/j.ymben.2011.09.010</pub-id><pub-id pub-id-type="pmid">22001431</pub-id></citation></ref>
<ref id="B46">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lewis</surname> <given-names>J. E.</given-names></name> <name><surname>Kemp</surname> <given-names>M. L.</given-names></name></person-group> (<year>2021</year>). <article-title>Integration of machine learning and genome-scale metabolic modeling identifies multi-omics biomarkers for radiation resistance</article-title>. <source>Nat. Commun.</source> <volume>12</volume>, <fpage>2700</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-021-22989-1</pub-id><pub-id pub-id-type="pmid">33976213</pub-id></citation></ref>
<ref id="B47">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>L&#x00027;Heureux</surname> <given-names>A.</given-names></name> <name><surname>Grolinger</surname> <given-names>K.</given-names></name> <name><surname>Elyamany</surname> <given-names>H. F.</given-names></name> <name><surname>Capretz</surname> <given-names>M. A. M.</given-names></name></person-group> (<year>2017</year>). <article-title>Machine Learning With Big Data: Challenges and Approaches</article-title>. <source>IEEE Access</source> <volume>5</volume>, <fpage>7776</fpage>&#x02013;<lpage>7797</lpage>. <pub-id pub-id-type="doi">10.1109/ACCESS.2017.2696365</pub-id></citation>
</ref>
<ref id="B48">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>Y.</given-names></name> <name><surname>Wang</surname> <given-names>S.</given-names></name> <name><surname>Umarov</surname> <given-names>R.</given-names></name> <name><surname>Xie</surname> <given-names>B.</given-names></name> <name><surname>Fan</surname> <given-names>M.</given-names></name> <name><surname>Li</surname> <given-names>L.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title>DEEPre: sequence-based enzyme EC number prediction by deep learning</article-title>. <source>Bioinformatics</source> <volume>34</volume>, <fpage>760</fpage>&#x02013;<lpage>769</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btx680</pub-id><pub-id pub-id-type="pmid">29069344</pub-id></citation></ref>
<ref id="B49">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liebermeister</surname> <given-names>W.</given-names></name> <name><surname>Uhlendorf</surname> <given-names>J.</given-names></name> <name><surname>Klipp</surname> <given-names>E.</given-names></name></person-group> (<year>2010</year>). <article-title>Modular rate laws for enzymatic reactions: thermodynamics, elasticities and implementation</article-title>. <source>Bioinformatics</source> <volume>26</volume>, <fpage>1528</fpage>&#x02013;<lpage>1534</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btq141</pub-id><pub-id pub-id-type="pmid">20385728</pub-id></citation></ref>
<ref id="B50">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lo-Thong</surname> <given-names>O.</given-names></name> <name><surname>Charton</surname> <given-names>P.</given-names></name> <name><surname>Cadet</surname> <given-names>X. F.</given-names></name> <name><surname>Damour</surname> <given-names>C.</given-names></name> <name><surname>Grondin-Perez</surname> <given-names>B.</given-names></name> <name><surname>Saavedra</surname> <given-names>E.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Identification of flux checkpoints in a metabolic pathway through white-box, grey-box and black-box modeling approaches</article-title>. <source>Sci. Rep.</source> <volume>19</volume>, <fpage>1</fpage>&#x02013;<lpage>19</lpage>. <pub-id pub-id-type="doi">10.1038/s41598-020-70295-5</pub-id><pub-id pub-id-type="pmid">32778715</pub-id></citation></ref>
<ref id="B51">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ma</surname> <given-names>Y.</given-names></name> <name><surname>Ding</surname> <given-names>Z.</given-names></name> <name><surname>Qian</surname> <given-names>Y.</given-names></name> <name><surname>Shi</surname> <given-names>X.</given-names></name> <name><surname>Castranova</surname> <given-names>V.</given-names></name> <name><surname>Harner</surname> <given-names>E. J.</given-names></name> <etal/></person-group>. (<year>2006</year>). <article-title>Predicting cancer drug response by proteomic profiling</article-title>. <source>Clin. Cancer Res.</source> <volume>12</volume>, <fpage>4583</fpage>&#x02013;<lpage>4589</lpage>. <pub-id pub-id-type="doi">10.1158/1078-0432.CCR-06-0290</pub-id><pub-id pub-id-type="pmid">16899605</pub-id></citation></ref>
<ref id="B52">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mar&#x000ED;n-Hern&#x000E1;ndez</surname> <given-names>&#x000C1;.</given-names></name> <name><surname>Gallardo-P&#x000E9;rez</surname> <given-names>J. C.</given-names></name> <name><surname>Reyes-Garc&#x000ED;a</surname> <given-names>M. A.</given-names></name> <name><surname>Sosa-Garrocho</surname> <given-names>M.</given-names></name> <name><surname>Mac&#x000ED;as-Silva</surname> <given-names>M.</given-names></name> <name><surname>Rodr&#x000ED;guez-Enr&#x000ED;quez</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Kinetic modeling of glucose central metabolism in hepatocytes and hepatoma cells</article-title>. <source>Biochim. Biophys. Acta</source> <volume>1864</volume>, <fpage>129687</fpage>. <pub-id pub-id-type="doi">10.1016/j.bbagen.2020.129687</pub-id><pub-id pub-id-type="pmid">32712171</pub-id></citation></ref>
<ref id="B53">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Meinshausen</surname> <given-names>N.</given-names></name></person-group> (<year>2006</year>). <article-title>Quantile regression forests</article-title>. <source>J. Mach. Learn. Res</source>. <volume>7</volume>, <fpage>983</fpage>&#x02013;<lpage>999</lpage>. Available online at: <ext-link ext-link-type="uri" xlink:href="http://jmlr.org/papers/v7/meinshausen06a.html">http://jmlr.org/papers/v7/meinshausen06a.html</ext-link></citation>
</ref>
<ref id="B54">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Michaelis</surname> <given-names>L.</given-names></name> <name><surname>Menten</surname> <given-names>M.</given-names></name></person-group> (<year>1913</year>). <article-title>Die Kinetik der Invertinwirkung</article-title>. <source>Biochem. Z.</source> <fpage>333</fpage>&#x02013;<lpage>369</lpage>.</citation>
</ref>
<ref id="B55">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Moreno-S&#x000E1;nchez</surname> <given-names>R.</given-names></name> <name><surname>Encalada</surname> <given-names>R.</given-names></name> <name><surname>Mar&#x000ED;n-Hern&#x000E1;ndez</surname> <given-names>A.</given-names></name> <name><surname>Saavedra</surname> <given-names>E.</given-names></name></person-group> (<year>2008</year>). <article-title>Experimental validation of metabolic pathway modeling: an illustration with glycolytic segments from <italic>Entamoeba histolytica</italic></article-title>. <source>FEBS J.</source> <volume>275</volume>, <fpage>3454</fpage>&#x02013;<lpage>3469</lpage>. <pub-id pub-id-type="doi">10.1111/j.1742-4658.2008.06492.x</pub-id><pub-id pub-id-type="pmid">18510554</pub-id></citation></ref>
<ref id="B56">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Morgan</surname> <given-names>J. A.</given-names></name> <name><surname>Rhodes</surname> <given-names>D.</given-names></name></person-group> (<year>2002</year>). <article-title>Mathematical modeling of plant metabolic pathways</article-title>. <source>Metab. Eng.</source> <volume>4</volume>, <fpage>80</fpage>&#x02013;<lpage>89</lpage>. <pub-id pub-id-type="doi">10.1006/mben.2001.0211</pub-id><pub-id pub-id-type="pmid">11800577</pub-id></citation></ref>
<ref id="B57">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Muller</surname> <given-names>M.</given-names></name> <name><surname>Mentel</surname> <given-names>M.</given-names></name> <name><surname>van Hellemond</surname> <given-names>J. J.</given-names></name> <name><surname>Henze</surname> <given-names>K.</given-names></name> <name><surname>Woehle</surname> <given-names>C.</given-names></name> <name><surname>Gould</surname> <given-names>S. B.</given-names></name> <etal/></person-group>. (<year>2012</year>). <article-title>Biochemistry and Evolution of Anaerobic Energy Metabolism in Eukaryotes</article-title>. <source>Microbiol. Mol. Biol. Rev.</source> <volume>76</volume>, <fpage>444</fpage>&#x02013;<lpage>495</lpage>. <pub-id pub-id-type="doi">10.1128/MMBR.05024-11</pub-id><pub-id pub-id-type="pmid">22688819</pub-id></citation></ref>
<ref id="B58">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Murdoch</surname> <given-names>W. J.</given-names></name> <name><surname>Singh</surname> <given-names>C.</given-names></name> <name><surname>Kumbier</surname> <given-names>K.</given-names></name> <name><surname>Abbasi-Asl</surname> <given-names>R.</given-names></name> <name><surname>Yu</surname> <given-names>B.</given-names></name></person-group> (<year>2019</year>). <article-title>Definitions, methods, and applications in interpretable machine learning</article-title>. <source>Proc Natl Acad Sci USA</source> <volume>116</volume>, <fpage>22071</fpage>&#x02013;<lpage>22080</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.1900654116</pub-id><pub-id pub-id-type="pmid">31619572</pub-id></citation></ref>
<ref id="B59">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>N&#x000F6;h</surname> <given-names>K.</given-names></name> <name><surname>Gr&#x000F6;nke</surname> <given-names>K.</given-names></name> <name><surname>Luo</surname> <given-names>B.</given-names></name> <name><surname>Takors</surname> <given-names>R.</given-names></name> <name><surname>Oldiges</surname> <given-names>M.</given-names></name> <name><surname>Wiechert</surname> <given-names>W.</given-names></name></person-group> (<year>2007</year>). <article-title>Metabolic flux analysis at ultra short time scale: Isotopically non-stationary 13C labeling experiments</article-title>. <source>J. Biotechnol.</source> <volume>129</volume>, <fpage>249</fpage>&#x02013;<lpage>267</lpage>. <pub-id pub-id-type="doi">10.1016/j.jbiotec.2006.11.015</pub-id><pub-id pub-id-type="pmid">17207877</pub-id></citation></ref>
<ref id="B60">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Oyetunde</surname> <given-names>T.</given-names></name> <name><surname>Liu</surname> <given-names>D.</given-names></name> <name><surname>Martin</surname> <given-names>H. G.</given-names></name> <name><surname>Tang</surname> <given-names>Y. J.</given-names></name></person-group> (<year>2019</year>). <article-title>Machine learning framework for assessment of microbial factory performance</article-title>. <source>PLoS ONE</source> <volume>14</volume>, <fpage>e0210558</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0210558</pub-id><pub-id pub-id-type="pmid">30645629</pub-id></citation></ref>
<ref id="B61">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pan</surname> <given-names>L.</given-names></name> <name><surname>Cheng</surname> <given-names>C.</given-names></name> <name><surname>Haberkorn</surname> <given-names>U.</given-names></name> <name><surname>Dimitrakopoulou-Strauss</surname> <given-names>A.</given-names></name></person-group> (<year>2017</year>). <article-title>Machine learning-based kinetic modeling: a robust and reproducible solution for quantitative analysis of dynamic PET data</article-title>. <source>Phys. Med. Biol.</source>, <fpage>17</fpage>. <pub-id pub-id-type="doi">10.1088/1361-6560/aa6244</pub-id><pub-id pub-id-type="pmid">28379842</pub-id></citation></ref>
<ref id="B62">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pineda</surname> <given-names>E.</given-names></name> <name><surname>Encalada</surname> <given-names>R.</given-names></name> <name><surname>V&#x000E1;zquez</surname> <given-names>C.</given-names></name> <name><surname>N&#x000E9;quiz</surname> <given-names>M.</given-names></name> <name><surname>Olivos-Garc&#x000ED;a</surname> <given-names>A.</given-names></name> <name><surname>Moreno-S&#x000E1;nchez</surname> <given-names>R.</given-names></name> <etal/></person-group>. (<year>2015</year>). <article-title><italic>In vivo</italic> identification of the steps that control energy metabolism and survival of <italic>Entamoeba histolytica</italic></article-title>. <source>FEBS J.</source> <volume>282</volume>, <fpage>318</fpage>&#x02013;<lpage>331</lpage>. <pub-id pub-id-type="doi">10.1111/febs.13131</pub-id><pub-id pub-id-type="pmid">25350227</pub-id></citation></ref>
<ref id="B63">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pintelas</surname> <given-names>E.</given-names></name> <name><surname>Livieris</surname> <given-names>I. E.</given-names></name> <name><surname>Pintelas</surname> <given-names>P.</given-names></name></person-group> (<year>2020</year>). <article-title>A Grey-box ensemble model exploiting black-box accuracy and white-box intrinsic interpretability</article-title>. <source>Algorithms</source> <volume>13</volume>, <fpage>17</fpage>. <pub-id pub-id-type="doi">10.3390/a13010017</pub-id></citation>
</ref>
<ref id="B64">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Qi</surname> <given-names>Y.</given-names></name> <name><surname>Bar-Joseph</surname> <given-names>Z.</given-names></name> <name><surname>Klein-Seetharaman</surname> <given-names>J.</given-names></name></person-group> (<year>2006</year>). <article-title>Evaluation of different biological data and computational classification methods for use in protein interaction prediction</article-title>. <source>Proteins</source> <volume>63</volume>, <fpage>490</fpage>&#x02013;<lpage>500</lpage>. <pub-id pub-id-type="doi">10.1002/prot.20865</pub-id><pub-id pub-id-type="pmid">16450363</pub-id></citation></ref>
<ref id="B65">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Ramachandran</surname> <given-names>S.</given-names></name> <name><surname>Chaudhuri</surname> <given-names>R.</given-names></name> <name><surname>Prasad</surname> <given-names>S.</given-names></name> <name><surname>Rauf</surname> <given-names>A.</given-names></name> <name><surname>Paul</surname> <given-names>C.</given-names></name> <name><surname>Chakraborty</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2011</year>). <article-title>Biological Data Modelling and Scripting in R</article-title>, in <source>Systems and Computational Biology &#x02013; Bioinformatics and Computational Modeling</source>, ed <person-group person-group-type="editor"><name><surname>Yang</surname> <given-names>N. S.</given-names></name></person-group> (<publisher-loc>Delhi</publisher-loc>: <publisher-name>InTech</publisher-name>).</citation>
</ref>
<ref id="B66">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rana</surname> <given-names>P.</given-names></name> <name><surname>Berry</surname> <given-names>C.</given-names></name> <name><surname>Ghosh</surname> <given-names>P.</given-names></name> <name><surname>Fong</surname> <given-names>S. S.</given-names></name></person-group> (<year>2020</year>). <article-title>Recent advances on constraint-based models by integrating machine learning</article-title>. <source>Curr. Opin. Biotechnol.</source> <volume>7</volume>, <fpage>85</fpage>&#x02013;<lpage>91</lpage>. <pub-id pub-id-type="doi">10.1016/j.copbio.2019.11.007</pub-id><pub-id pub-id-type="pmid">31812921</pub-id></citation></ref>
<ref id="B67">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>R&#x000E9;da</surname> <given-names>C.</given-names></name> <name><surname>Kaufmann</surname> <given-names>E.</given-names></name> <name><surname>Delahaye-Duriez</surname> <given-names>A.</given-names></name></person-group> (<year>2020</year>). <article-title>Machine learning applications in drug development</article-title>. <source>Comput. Struct. Biotechnol. J.</source> <volume>18</volume>, <fpage>241</fpage>&#x02013;<lpage>252</lpage>. <pub-id pub-id-type="doi">10.1016/j.csbj.2019.12.006</pub-id><pub-id pub-id-type="pmid">33489002</pub-id></citation></ref>
<ref id="B68">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Riddick</surname> <given-names>G.</given-names></name> <name><surname>Song</surname> <given-names>H.</given-names></name> <name><surname>Ahn</surname> <given-names>S.</given-names></name> <name><surname>Walling</surname> <given-names>J.</given-names></name> <name><surname>Borges-Rivera</surname> <given-names>D.</given-names></name> <name><surname>Zhang</surname> <given-names>W.</given-names></name> <etal/></person-group>. (<year>2011</year>). <article-title>Predicting <italic>in vitro</italic> drug sensitivity using random forests</article-title>. <source>Bioinformatics</source> <volume>27</volume>, <fpage>220</fpage>&#x02013;<lpage>224</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btq628</pub-id><pub-id pub-id-type="pmid">21134890</pub-id></citation></ref>
<ref id="B69">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Savageau</surname> <given-names>M. A.</given-names></name></person-group> (<year>1970</year>). <article-title>Biochemical systems analysis</article-title>. <source>J. Theor. Biol.</source> <volume>26</volume>, <fpage>215</fpage>&#x02013;<lpage>226</lpage>. <pub-id pub-id-type="doi">10.1016/S0022-5193(70)80013-3</pub-id><pub-id pub-id-type="pmid">5434343</pub-id></citation></ref>
<ref id="B70">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Savageau</surname> <given-names>M. A.</given-names></name></person-group> (<year>1988</year>). <article-title>Introduction to S-systems and the underlying power-law formalism</article-title>. <source>Math. Comput. Model.</source> <volume>11</volume>, <fpage>546</fpage>&#x02013;<lpage>551</lpage>. <pub-id pub-id-type="doi">10.1016/0895-7177(88)90553-5</pub-id></citation>
</ref>
<ref id="B71">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Schinn</surname> <given-names>S.</given-names></name> <name><surname>Morrison</surname> <given-names>C.</given-names></name> <name><surname>Wei</surname> <given-names>W.</given-names></name> <name><surname>Zhang</surname> <given-names>L.</given-names></name> <name><surname>Lewis</surname> <given-names>N. E.</given-names></name></person-group> (<year>2021</year>). <article-title>A genome-scale metabolic network model and machine learning predict amino acid concentrations in Chinese Hamster Ovary cell cultures</article-title>. <source>Biotech. Bioeng.</source> <volume>118</volume>, <fpage>2118</fpage>&#x02013;<lpage>2123</lpage>. <pub-id pub-id-type="doi">10.1002/bit.27714</pub-id><pub-id pub-id-type="pmid">33580712</pub-id></citation></ref>
<ref id="B72">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Schmidt</surname> <given-names>J.</given-names></name> <name><surname>Marques</surname> <given-names>M. R. G.</given-names></name> <name><surname>Botti</surname> <given-names>S.</given-names></name> <name><surname>Marques</surname> <given-names>M. A. L.</given-names></name></person-group> (<year>2019</year>). <article-title>Recent advances and applications of machine learning in solid-state materials science</article-title>. <source>npj Comput. Mater.</source> <volume>5</volume>, <fpage>83</fpage>. <pub-id pub-id-type="doi">10.1038/s41524-019-0221-0</pub-id></citation>
</ref>
<ref id="B73">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sel&#x00027;Kov</surname> <given-names>E. E.</given-names></name></person-group> (<year>1968</year>). <article-title>Self-oscillations in glycolysis. 1. A simple kinetic model</article-title>. <source>Eur. J. Biochem.</source> <volume>4</volume>, <fpage>79</fpage>&#x02013;<lpage>86</lpage>. <pub-id pub-id-type="doi">10.1111/j.1432-1033.1968.tb00175.x</pub-id><pub-id pub-id-type="pmid">4230812</pub-id></citation></ref>
<ref id="B74">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Shapiro</surname> <given-names>N. Z.</given-names></name> <name><surname>Shapley</surname> <given-names>L. S.</given-names></name></person-group> (<year>1965</year>). <article-title>Mass action laws and the gibbs free energy function</article-title>. <source>J. Soc. Ind. Appl. Math.</source> <volume>13</volume>, <fpage>353</fpage>&#x02013;<lpage>375</lpage>. <pub-id pub-id-type="doi">10.1137/0113020</pub-id></citation>
</ref>
<ref id="B75">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Somarathna</surname> <given-names>P. D. S. N.</given-names></name> <name><surname>Minasny</surname> <given-names>B.</given-names></name> <name><surname>Malone</surname> <given-names>B. P.</given-names></name></person-group> (<year>2017</year>). <article-title>More data or a better model? Figuring out what matters most for the spatial prediction of soil carbon</article-title>. <source>Soil Sci. Soc. Am. J.</source> <volume>81</volume>, <fpage>1413</fpage>&#x02013;<lpage>1426</lpage>. <pub-id pub-id-type="doi">10.2136/sssaj2016.11.0376</pub-id></citation>
</ref>
<ref id="B76">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Song</surname> <given-names>H.-S.</given-names></name> <name><surname>Ramkrishna</surname> <given-names>D.</given-names></name></person-group> (<year>2013</year>). <article-title>Complex nonlinear behavior in metabolic processes: global bifurcation analysis of escherichia coli growth on multiple substrates</article-title>. <source>Processes</source> <volume>1</volume>, <fpage>263</fpage>&#x02013;<lpage>278</lpage>. <pub-id pub-id-type="doi">10.3390/pr1030263</pub-id></citation>
</ref>
<ref id="B77">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Stephanopoulos</surname> <given-names>G.</given-names></name></person-group> (<year>1999</year>). <article-title>Metabolic fluxes and metabolic engineering</article-title>. <source>Metab. Eng.</source> <volume>1</volume>, <fpage>1</fpage>&#x02013;<lpage>11</lpage>. <pub-id pub-id-type="doi">10.1006/mben.1998.0101</pub-id><pub-id pub-id-type="pmid">10935750</pub-id></citation></ref>
<ref id="B78">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Villa-Vialaneix</surname> <given-names>N.</given-names></name> <name><surname>Follador</surname> <given-names>M.</given-names></name> <name><surname>Leip</surname> <given-names>A.</given-names></name></person-group> (<year>2010</year>). <source>A Comparison of Three Learning Methods to Predict N2O Fluxes and N Leaching</source>. Available online at: <ext-link ext-link-type="uri" xlink:href="https://www.nathalievialaneix.eu/doc/pdf/villavialaneix_etal_MASHS2010.pdf">https://www.nathalievialaneix.eu/doc/pdf/villavialaneix_etal_MASHS2010.pdf</ext-link></citation>
</ref>
<ref id="B79">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Visser</surname> <given-names>D.</given-names></name> <name><surname>Heijnen</surname> <given-names>J. J.</given-names></name></person-group> (<year>2003</year>). <article-title>Dynamic simulation and metabolic re-design of a branched pathway using linlog kinetics</article-title>. <source>Metab. Eng.</source> <volume>5</volume>, <fpage>164</fpage>&#x02013;<lpage>176</lpage>. <pub-id pub-id-type="doi">10.1016/S1096-7176(03)00025-9</pub-id><pub-id pub-id-type="pmid">12948750</pub-id></citation></ref>
<ref id="B80">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wei</surname> <given-names>Y.</given-names></name> <name><surname>Zhang</surname> <given-names>X.</given-names></name> <name><surname>Shi</surname> <given-names>Y.</given-names></name> <name><surname>Xia</surname> <given-names>L.</given-names></name> <name><surname>Pan</surname> <given-names>S.</given-names></name> <name><surname>Wu</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title>A review of data-driven approaches for prediction and classification of building energy consumption</article-title>. <source>Renew. Sustain. Energy Rev.</source> <volume>82</volume>, <fpage>1027</fpage>&#x02013;<lpage>1047</lpage>. <pub-id pub-id-type="doi">10.1016/j.rser.2017.09.108</pub-id></citation>
</ref>
<ref id="B81">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wiechert</surname> <given-names>W.</given-names></name> <name><surname>Siefke</surname> <given-names>C.</given-names></name> <name><surname>de Graaf</surname> <given-names>A. A.</given-names></name> <name><surname>Marx</surname> <given-names>A.</given-names></name></person-group> (<year>1997</year>). <article-title>Bidirectional reaction steps in metabolic networks: II. Flux estimation and statistical analysis</article-title>. <source>Biotechnol. Bioeng.</source> <volume>55</volume>, <fpage>118</fpage>&#x02013;<lpage>35</lpage>. <pub-id pub-id-type="doi">10.1002/(SICI)1097-0290(19970705)55:1&#x0003C;118::AID-BIT13&#x0003E;3.0.CO</pub-id><pub-id pub-id-type="pmid">18636450</pub-id></citation></ref>
<ref id="B82">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wright</surname> <given-names>M. N.</given-names></name> <name><surname>Ziegler</surname> <given-names>A.</given-names></name></person-group> (<year>2017</year>). <article-title>Ranger: a fast implementation of random forests for high dimensional data in C&#x0002B;&#x0002B; and R</article-title>. <source>J. Stat. Soft.</source> <volume>77</volume>, <fpage>1</fpage>&#x02013;<lpage>17</lpage>. <pub-id pub-id-type="doi">10.18637/jss.v077.i01</pub-id></citation>
</ref>
<ref id="B83">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wu</surname> <given-names>S. G.</given-names></name> <name><surname>Wang</surname> <given-names>Y.</given-names></name> <name><surname>Jiang</surname> <given-names>W.</given-names></name> <name><surname>Oyetunde</surname> <given-names>T.</given-names></name> <name><surname>Yao</surname> <given-names>R.</given-names></name> <name><surname>Zhang</surname> <given-names>X.</given-names></name> <etal/></person-group>. (<year>2016</year>). <article-title>Rapid prediction of bacterial heterotrophic fluxomics using machine learning and constraint programming</article-title>. <source>PLoS Comput. Biol.</source> <volume>12</volume>, <fpage>e1004838</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pcbi.1004838</pub-id><pub-id pub-id-type="pmid">27092947</pub-id></citation></ref>
<ref id="B84">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wu</surname> <given-names>Z.</given-names></name> <name><surname>Kan</surname> <given-names>S. B. J.</given-names></name> <name><surname>Lewis</surname> <given-names>R. D.</given-names></name> <name><surname>Wittmann</surname> <given-names>B. J.</given-names></name> <name><surname>Arnold</surname> <given-names>F. H.</given-names></name></person-group> (<year>2019</year>). <article-title>Machine learning-assisted directed protein evolution with combinatorial libraries</article-title>. <source>Proc. Natl. Acad. Sci. U. S. A.</source> <volume>116</volume>, <fpage>8852</fpage>&#x02013;<lpage>8858</lpage>. <pub-id pub-id-type="doi">10.1073/pnas.1901979116</pub-id><pub-id pub-id-type="pmid">31888994</pub-id></citation></ref>
<ref id="B85">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Xu</surname> <given-names>T.</given-names></name> <name><surname>Guo</surname> <given-names>Z.</given-names></name> <name><surname>Liu</surname> <given-names>S.</given-names></name> <name><surname>He</surname> <given-names>X.</given-names></name> <name><surname>Meng</surname> <given-names>Y.</given-names></name> <name><surname>Xu</surname> <given-names>Z.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title>Evaluating different machine learning methods for upscaling evapotranspiration from flux towers to the regional scale</article-title>. <source>J. Geophys. Res. Atmos.</source> <volume>123</volume>, <fpage>8674</fpage>&#x02013;<lpage>8690</lpage>. <pub-id pub-id-type="doi">10.1029/2018JD028447</pub-id></citation>
</ref>
<ref id="B86">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>P.</given-names></name> <name><surname>Hwa Yang</surname> <given-names>Y.</given-names></name> <name><surname>Zhou</surname> <given-names>B. B.</given-names></name> <name><surname>Zomaya</surname> <given-names>A. Y.</given-names></name></person-group> (<year>2010</year>). <article-title>A review of ensemble methods in bioinformatics</article-title>. <source>CBIO</source> <volume>5</volume>, <fpage>296</fpage>&#x02013;<lpage>308</lpage>. <pub-id pub-id-type="doi">10.2174/157489310794072508</pub-id></citation>
</ref>
<ref id="B87">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>R.</given-names></name> <name><surname>Rizzoni</surname> <given-names>G.</given-names></name></person-group> (<year>2016</year>). <article-title>Comparison of model-based vs. data-driven methods for fault detection and isolation in engine idle speed control system</article-title>, in <source>Annual Conference of the PHM Society</source>, <fpage>8</fpage>. <pub-id pub-id-type="doi">10.36001/phmconf.2016.v8i1.2502</pub-id></citation>
</ref>
<ref id="B88">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yasemi</surname> <given-names>M.</given-names></name> <name><surname>Jolicoeur</surname> <given-names>M.</given-names></name></person-group> (<year>2021</year>). <article-title>Modelling cell metabolism: a review on constraint-based steady-state and kinetic approaches</article-title>. <source>Processes</source> <volume>9</volume>, <fpage>322</fpage>. <pub-id pub-id-type="doi">10.3390/pr9020322</pub-id><pub-id pub-id-type="pmid">29472367</pub-id></citation></ref>
<ref id="B89">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Yousoff</surname> <given-names>S. N. M.</given-names></name> <name><surname>Baharin</surname> <given-names>A.</given-names></name> <name><surname>Abdullah</surname> <given-names>A.</given-names></name></person-group> (<year>2017</year>). <source>Differential Search Algorithm in Deep Neural Network for the Predictive Analysis of xylitol production in Escherichia Coli</source>. <pub-id pub-id-type="doi">10.1007/978-981-10-6502-6_5</pub-id></citation>
</ref>
<ref id="B90">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zampieri</surname> <given-names>G.</given-names></name> <name><surname>Vijayakumar</surname> <given-names>S.</given-names></name> <name><surname>Yaneske</surname> <given-names>E.</given-names></name> <name><surname>Angione</surname> <given-names>C.</given-names></name></person-group> (<year>2019</year>). <article-title>Machine and deep learning meet genome-scale metabolic modeling</article-title>. <source>PLoS Compu. Biol.</source> <volume>15</volume>, <fpage>e1007084</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pcbi.1007084</pub-id><pub-id pub-id-type="pmid">31295267</pub-id></citation></ref>
<ref id="B91">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zelezniak</surname> <given-names>A.</given-names></name> <name><surname>Vowinckel</surname> <given-names>J.</given-names></name> <name><surname>Capuano</surname> <given-names>F.</given-names></name> <name><surname>Messner</surname> <given-names>C. B.</given-names></name> <name><surname>Demichev</surname> <given-names>V.</given-names></name> <name><surname>Polowsky</surname> <given-names>N.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title>Machine learning predicts the yeast metabolome from the quantitative proteome of kinase knockouts</article-title>. <source>Cell Syst. 7</source>, <fpage>269</fpage>&#x02013;<lpage>283.e6</lpage>. <pub-id pub-id-type="doi">10.1016/j.cels.2018.08.001</pub-id><pub-id pub-id-type="pmid">30195436</pub-id></citation></ref>
<ref id="B92">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>J.</given-names></name> <name><surname>Petersen</surname> <given-names>S. D.</given-names></name> <name><surname>Radivojevic</surname> <given-names>T.</given-names></name> <name><surname>Ramirez</surname> <given-names>A.</given-names></name> <name><surname>P&#x000E9;rez</surname> <given-names>A.</given-names></name> <name><surname>Abeliuk</surname> <given-names>E.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>Predictive engineering and optimization of tryptophan metabolism in yeast through a combination of mechanistic and machine learning models</article-title>. <source>Bioengineering</source>. <fpage>1</fpage>&#x02013;<lpage>36</lpage>. <pub-id pub-id-type="doi">10.1101/858464</pub-id></citation>
</ref>
<ref id="B93">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhou</surname> <given-names>J.</given-names></name> <name><surname>Li</surname> <given-names>E.</given-names></name> <name><surname>Wei</surname> <given-names>H.</given-names></name> <name><surname>Li</surname> <given-names>C.</given-names></name> <name><surname>Qiao</surname> <given-names>Q.</given-names></name> <name><surname>Armaghani</surname> <given-names>D. J.</given-names></name></person-group> (<year>2019</year>). <article-title>Random forests and cubist algorithms for predicting shear strengths of rockfill materials</article-title>. <volume>16</volume>, <fpage>1</fpage>&#x02013;<lpage>16</lpage>. <pub-id pub-id-type="doi">10.3390/app9081621</pub-id></citation>
</ref>
</ref-list> 
</back>
</article>