<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Syst. Biol.</journal-id>
<journal-title>Frontiers in Systems Biology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Syst. Biol.</abbrev-journal-title>
<issn pub-type="epub">2674-0702</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1338518</article-id>
<article-id pub-id-type="doi">10.3389/fsysb.2024.1338518</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Systems Biology</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Uncertainty quantified discovery of chemical reaction systems via Bayesian scientific machine learning</article-title>
<alt-title alt-title-type="left-running-head">Nieves et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fsysb.2024.1338518">10.3389/fsysb.2024.1338518</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Nieves</surname>
<given-names>Emily</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2532329/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Dandekar</surname>
<given-names>Raj</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Rackauckas</surname>
<given-names>Chris</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Department of Computational Science and Engineering</institution>, <institution>Massachusetts Institute of Technology</institution>, <addr-line>Cambridge</addr-line>, <addr-line>MA</addr-line>, <country>United States</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Department of Biological Engineering</institution>, <institution>Massachusetts Institute of Technology</institution>, <addr-line>Cambridge</addr-line>, <addr-line>MA</addr-line>, <country>United States</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/245490/overview">Maria I. Klapa</ext-link>, Foundation for Research and Technology, Hellas, Greece</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1150656/overview">Pasquale Palumbo</ext-link>, University of Milano-Bicocca, Italy</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1018290/overview">Zuyi (Jacky) Huang</ext-link>, Villanova University, United States</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Emily Nieves, <email>enieves@mit.edu</email>
</corresp>
</author-notes>
<pub-date pub-type="epub">
<day>08</day>
<month>03</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>4</volume>
<elocation-id>1338518</elocation-id>
<history>
<date date-type="received">
<day>14</day>
<month>11</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>20</day>
<month>02</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2024 Nieves, Dandekar and Rackauckas.</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Nieves, Dandekar and Rackauckas</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>The recently proposed Chemical Reaction Neural Network (CRNN) discovers chemical reaction pathways from time resolved species concentration data in a deterministic manner. Since the weights and biases of a CRNN are physically interpretable, the CRNN acts as a digital twin of a classical chemical reaction network. In this study, we employ a Bayesian inference analysis coupled with neural ordinary differential equations (ODEs) on this digital twin to discover chemical reaction pathways in a probabilistic manner. This allows for estimation of the uncertainty surrounding the learned reaction network. To achieve this, we propose an algorithm which combines neural ODEs with a preconditioned stochastic gradient langevin descent (pSGLD) Bayesian framework, and ultimately performs posterior sampling on the neural network weights. We demonstrate the successful implementation of this algorithm on several reaction systems by not only recovering the chemical reaction pathways but also estimating the uncertainty in our predictions. We compare the results of the pSGLD with that of the standard SGLD and show that this optimizer more efficiently and accurately estimates the posterior of the reaction network parameters. Additionally, we demonstrate how the embedding of scientific knowledge improves extrapolation accuracy by comparing results to purely data-driven machine learning methods. Together, this provides a new framework for robust, autonomous Bayesian inference on unknown or complex chemical and biological reaction systems.</p>
</abstract>
<kwd-group>
<kwd>scientific machine learning</kwd>
<kwd>quantitative systems pharmacology</kwd>
<kwd>Bayesian machine learning</kwd>
<kwd>systems biology</kwd>
<kwd>machine learning</kwd>
</kwd-group>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Translational Systems Biology and In Silico Trials</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<title>1 Introduction</title>
<p>Mechanistic models in Quantitative Systems Pharmacology (QSP) offer more interpretability than purely data-driven machine learning approaches, but practitioners often lack complete knowledge of the underlying systems. Methods of Scientific Machine Learning (SciML) account for this epistemic uncertainty by mixing neural network techniques with mechanistic modeling forms to allow for the automated discovery of missing components in mechanistic models <xref ref-type="bibr" rid="B19">Rackauckas et al. (2021)</xref>; <xref ref-type="bibr" rid="B5">Dandekar et al. (2020b)</xref>. In addition to interpretability, SciML models also offer superior prediction extrapolation beyond the domain in which they were trained, making them attractive candidates for QSP and systems biology applications.</p>
<p>The Chemical Reaction Neural Network (CRNN) is a recently proposed SciML method aimed at discovering chemical reaction pathways from concentration time course data without prior knowledge of the underlying system <xref ref-type="bibr" rid="B12">Ji and Deng (2021)</xref>. Specifically, the CRNN is a Neural ODE (Ordinary Differential Equation) architecture in that the neural network learns and represents the system of ODEs that comprise the reaction network <xref ref-type="bibr" rid="B2">Chen et al. (2018)</xref>. The CRNN is a neural network architecture which is designed to be a flexible representation of mass action kinetics models, thus imposing prior known constraints about the guiding equations of chemical reaction models while allowing for interpretable weights which represent the individual reaction weights. The weights of the embedded neural networks represent the stoichiometry of the chemical species and kinetic parameters of the reactions. Optimization of the neural network weights thus leads to the discovery of the reaction pathways involved in the network. As QSP models often involve complex, interconnected reaction networks such as signaling or metabolic reactions, the CRNN may aid in the development of these models when the reactions are unknown or incompletely known.</p>
<p>The learned reaction system and the kinetic parameters of the CRNN are obtained in a deterministic manner, however to build trust in predictions and help experts understand when more training data is needed, the uncertainty of the predictions should also be quantified. For this purpose, Bayesian inference frameworks can be integrated with the CRNN. Bayesian methods obtain estimates of the posterior probability density function of neural network parameters allowing for an understanding of prediction uncertainty. It is the goal of this work to extend the CRNN with a Bayesian framework to increase its utility in QSP and other scientific domains. This is similar to the work by Li et al. in which they also extend the CRNN to include a Bayesian framework <xref ref-type="bibr" rid="B15">Li et al. (2023)</xref>, but here we use a different methodology for sampling the posterior.</p>
<p>Recently, there has been an emergence of efficient Bayesian inference methods suitable for high-dimensional parameter systems, specifically methods like the No U-Turn Sampler (NUTS) <xref ref-type="bibr" rid="B8">Hoffman and Gelman (2014)</xref>, Stochastic Gradient Langevin Descent (SGLD) <xref ref-type="bibr" rid="B23">Welling and Teh (2011)</xref> and Stochastic Gradient Hamiltonian Monte Carlo (SGHMC) <xref ref-type="bibr" rid="B3">Chen et al. (2014)</xref>. These methods are based on Markov Chain Monte Carlo (MCMC) sampling and utilize gradient information to obtain estimates of the posterior. A number of studies have explored the use of these Bayesian methods to infer parameters of systems defined by ODEs <xref ref-type="bibr" rid="B16">Lunn et al. (2002)</xref>; <xref ref-type="bibr" rid="B22">von Toussaint (2011)</xref>; <xref ref-type="bibr" rid="B7">Girolami (2008)</xref>; <xref ref-type="bibr" rid="B9">Huang et al. (2020)</xref>, while others have used Bayesian methods to infer parameters of neural network models <xref ref-type="bibr" rid="B13">Jospin et al. (2020)</xref>; <xref ref-type="bibr" rid="B17">Maddox et al. (2019)</xref>; <xref ref-type="bibr" rid="B11">Izmailov et al. (2019)</xref>.</p>
<p>Specifically for Neural ODEs, the addition of Bayesian methods presents technical complexity as the training of the neural network is also tied to the solving of the ODE system. However recent work has demonstrated the feasibility of this approach <xref ref-type="bibr" rid="B4">Dandekar et al. (2020a)</xref>. To apply a Bayesian framework to a Neural ODE model, the authors make use of the Julia programming language which allows differential equation solvers <xref ref-type="bibr" rid="B18">Rackauckas et al. (2020)</xref>; <xref ref-type="bibr" rid="B20">Rackauckas and Nie (2017)</xref> to be combined with Julia&#x2019;s probabilistic programming ecosystem <xref ref-type="bibr" rid="B6">Ge et al. (2018)</xref>; <xref ref-type="bibr" rid="B24">Xu et al. (2019)</xref>. In this study, we similarly leverage the differential and probabilistic programming ecosystem of the Julia programming language to integrate the CRNN with Bayesian inference frameworks.</p>
<p>While powerful in their ability to estimate the posterior distribution, Bayesian inference methods may suffer from inefficient sampling of the posterior. The inefficiency often stems from the pathological curvature of the parameter space that leads to &#x201c;bouncing&#x201d; around minima. To address this, a new optimizer was proposed by Li et al. that combines an adaptive preconditioner with the SGLD algorithm <xref ref-type="bibr" rid="B14">Li et al. (2015)</xref>. The preconditioner performs a local transform of the parameter space such that the rate of curvature is the same in all directions, allowing for smoother, faster descent.</p>
<p>In this study, we propose combining the CRNN with the more efficient preconditioned SGLD optimizer <xref ref-type="bibr" rid="B14">Li et al. (2015)</xref> to discover chemical reaction networks and quantify the uncertainty of the learned reaction parameters, allowing for its more robust use in QSP and other scientific domains. We apply this Bayesian SciML method to a systems biology pathway to demonstrate its application to QSP. We also compare the results to those generated using purely data-driven machine learning methods to further demonstrate the advantages of SciML methods.</p>
</sec>
<sec sec-type="methods" id="s2">
<title>2 Methods</title>
<p>Similarly to the originally proposed CRNN <xref ref-type="bibr" rid="B12">Ji and Deng (2021)</xref>, we define the Chemical Reaction Neural Network (CRNN) to represent the following elementary reaction:<disp-formula id="e1">
<mml:math id="m1">
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bd;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mi>A</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bd;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mi>B</mml:mi>
<mml:mo>&#x2192;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bd;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mi>C</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bd;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mi>D</mml:mi>
</mml:math>
<label>(1)</label>
</disp-formula>
</p>
<p>where <italic>&#x3bd;</italic> refers to the stoichometric coefficients of the respective chemical species.</p>
<p>The law of mass action applied to the example reaction prescribed in Equation <xref ref-type="disp-formula" rid="e1">(1)</xref> leads to the following expression for the reaction rate <italic>r</italic>:<disp-formula id="e2">
<mml:math id="m2">
<mml:mi>r</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="normal">e</mml:mi>
<mml:mi mathvariant="normal">x</mml:mi>
<mml:mi mathvariant="normal">p</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi mathvariant="normal">l</mml:mi>
<mml:mi mathvariant="normal">n</mml:mi>
<mml:mi>k</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bd;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mspace width="0.3333em"/>
<mml:mi mathvariant="normal">l</mml:mi>
<mml:mi mathvariant="normal">n</mml:mi>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bd;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mspace width="0.3333em"/>
<mml:mi mathvariant="normal">l</mml:mi>
<mml:mi mathvariant="normal">n</mml:mi>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>0</mml:mn>
<mml:mspace width="0.3333em"/>
<mml:mi mathvariant="normal">l</mml:mi>
<mml:mi mathvariant="normal">n</mml:mi>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>0</mml:mn>
<mml:mspace width="0.3333em"/>
<mml:mi mathvariant="normal">l</mml:mi>
<mml:mi mathvariant="normal">n</mml:mi>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:math>
<label>(2)</label>
</disp-formula>where k is the rate constant of the reaction and [A] refers to the concentration of chemical species A.</p>
<p>This elementary reaction can be represented by a neuron governed by <italic>y</italic> &#x3d; <italic>&#x3c3;</italic>(<italic>wx</italic> &#x2b; <italic>b</italic>) where <italic>w</italic> are the weights, <italic>y</italic> is the neuron output, <italic>x</italic> is the input to the neuron and <italic>&#x3c3;</italic> is the activation function. The inputs are the concentration of the species in logarithmic scale, and the output is the production rate of all species: <inline-formula id="inf1">
<mml:math id="m3">
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>A</mml:mi>
</mml:mrow>
<mml:mo>&#x307;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
<mml:mo>&#x307;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mo>&#x307;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>D</mml:mi>
</mml:mrow>
<mml:mo>&#x307;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>. The input layer weights denote the reaction orders, i.e., [<italic>&#x3bd;</italic>
<sub>
<italic>A</italic>
</sub>, <italic>&#x3bd;</italic>
<sub>
<italic>B</italic>
</sub>, 0, 0] for the reactants [<italic>A</italic>, <italic>B</italic>, <italic>C</italic>, <italic>D</italic>] respectively. The output layer weights denote the stochiometric coefficients, i.e., [&#x2212;<italic>&#x3bd;</italic>
<sub>
<italic>A</italic>
</sub>, &#x2212; <italic>&#x3bd;</italic>
<sub>
<italic>B</italic>
</sub>, <italic>&#x3bd;</italic>
<sub>
<italic>C</italic>
</sub>, <italic>&#x3bd;</italic>
<sub>
<italic>D</italic>
</sub>] for [<italic>A</italic>, <italic>B</italic>, <italic>C</italic>, <italic>D</italic>] respectively. The bias denotes the rate constant in the logarithmic scale. This is depicted in <xref ref-type="fig" rid="F1">Figure 1</xref>.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Overview of the Bayesian chemical reaction network which uses timeseries concentration data <bold>(A)</bold> to train a constrained neural network <bold>(B)</bold> that uses a preconditioned SGLD optimizer to reconstruct the reaction network and estimate the uncertainty in the learned stoichiometry and reaction rates <bold>(C)</bold>.</p>
</caption>
<graphic xlink:href="fsysb-04-1338518-g001.tif"/>
</fig>
<p>A reaction network with multiple chemical reactions can be denoted by a CRNN with a hidden layer. Since the weights and biases of a CRNN are physically interpretable, it is a digital twin of a classical chemical reaction neural network. In this study, we employ Bayesian inference analysis on this digital twin to discover chemical reaction pathways in a probabilistic manner.</p>
<p>Considering the vector of chemical species <italic>Y</italic> to be varying in time, we aim to recover a CRNN which satisfies the following equation:<disp-formula id="e3">
<mml:math id="m4">
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>Y</mml:mi>
</mml:mrow>
<mml:mo>&#x307;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>C</mml:mi>
<mml:mi>R</mml:mi>
<mml:mi>N</mml:mi>
<mml:mi>N</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>Y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:math>
<label>(3)</label>
</disp-formula>where <inline-formula id="inf2">
<mml:math id="m5">
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>Y</mml:mi>
</mml:mrow>
<mml:mo>&#x307;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula> and is the rate of change of the concentration of chemical species Y.</p>
<p>Thus, by representing the ODE governing the variation of the chemical species <italic>Y</italic> by a neural network, we can solve Equation <xref ref-type="disp-formula" rid="e3">(3)</xref> using powerful, purpose built ODE solvers provided by the differential programming ecosystem of the Julia programming language <xref ref-type="bibr" rid="B20">Rackauckas and Nie (2017)</xref>. The solution we obtain by solving the ODE is denoted by <italic>Y</italic>
<sub>CRNN</sub>(<italic>t</italic>). In this study, the loss function between the actual species concentrations and the predicted values is governed by the Mean Absolute Error (MAE) with L2 regularization, and given by<disp-formula id="e4">
<mml:math id="m6">
<mml:mi>L</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="normal">M</mml:mi>
<mml:mi mathvariant="normal">A</mml:mi>
<mml:mi mathvariant="normal">E</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>Y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>CRNN</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>Y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>data</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x22c5;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:math>
<label>(4)</label>
</disp-formula>
</p>
<p>where <italic>&#x3b8;</italic>
<sub>
<italic>t</italic>
</sub> are the weights of the CRNN at time <italic>t</italic>, <italic>&#x3bb;</italic>
<sub>
<italic>L</italic>2</sub> is the L2 regularization coefficient, <italic>Y</italic>
<sub>
<italic>CRNN</italic>
</sub>(<italic>t</italic>) are the CRNN predicted values of the species concentrations at time t, and <italic>Y</italic>
<sub>
<italic>data</italic>
</sub>(<italic>t</italic>) is the true species concentration at time t.</p>
<p>To incorporate uncertainty into our predictions, we augment the gradient descent algorithms with a Bayesian framework. Our algorithm is shown in <xref ref-type="statement" rid="algorithm_1">Algorithm 1</xref>. We use a variation of the Stochastic Gradient Langevin Descent (SGLD) algorithm <xref ref-type="bibr" rid="B23">Welling and Teh (2011)</xref> and add a preconditioning term <italic>G</italic>(<italic>&#x3b8;</italic>
<sub>
<italic>t</italic>
</sub>), which adapts to the local geometry leading to more efficient training of deep neural networks as noted in <xref ref-type="bibr" rid="B14">Li et al. (2015)</xref>. In <xref ref-type="statement" rid="algorithm_1">Algorithm 1</xref>, <italic>&#x3b2;</italic> is a smoothing factor, <italic>&#x3bb;</italic> is a small constant that may be tuned, n is the number of training samples, and &#x2299; and &#x2298; refer to element-wise vector product and division respectively.</p>
<p>Similarly to <xref ref-type="bibr" rid="B14">Li et al. (2015)</xref>, we utilize a decreasing step-size that&#x2019;s given by the following equation:<disp-formula id="e5">
<mml:math id="m7">
<mml:mi>&#x3f5;</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3b1;</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>b</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3b3;</mml:mi>
</mml:mrow>
</mml:msup>
</mml:math>
<label>(5)</label>
</disp-formula>where <italic>&#x3f5;</italic>(<italic>t</italic>) is the step-size, t is the epoch number, and <italic>&#x3b1;</italic>, b and <italic>&#x3b3;</italic> are tunable parameters.</p>
<p>
<statement content-type="algorithm" id="algorithm_1">
<label>Algorithm 1</label>
<p>Preconditioned SGLD applied to Neural ODE.<list list-type="simple">
<list-item>
<p>
<bold>Inputs:</bold> <italic>&#x3f5;</italic>
<sub>
<italic>t</italic>
</sub>(<italic>t</italic> &#x3d; 1: <italic>T</italic>), <italic>&#x3bb;</italic>, <italic>&#x3b2;</italic>, <italic>&#x3bb;</italic>
<sub>
<italic>L</italic>2</sub>
<bold>. Outputs:</bold> <italic>&#x3b8;</italic>
<sub>
<italic>t</italic>
</sub>. <bold>Initialize:</bold> <italic>V</italic>
<sub>0</sub> &#x3d; 0, <italic>&#x3b8;</italic>
<sub>
<italic>t</italic>
</sub>: Xavier NN initialization. <bold>for t in 1: T do</bold>
</p>
</list-item>
<list-item>
<p>sample batch with size <italic>n</italic>
</p>
</list-item>
<list-item>
<p>
<italic>Y</italic>
<sub>CRNN</sub>(<italic>t</italic>) &#x3d; ODESolve(<italic>CRNN</italic>(<italic>Y</italic>, <italic>&#x3b8;</italic>
<sub>
<italic>t</italic>
</sub>), <italic>Y</italic>
<sub>0</sub>)</p>
</list-item>
<list-item>
<p>
<italic>L</italic>(<italic>&#x3b8;</italic>
<sub>
<italic>t</italic>
</sub>) &#x3d; MAE(<italic>Y</italic>
<sub>CRNN</sub>(<italic>t</italic>), <italic>Y</italic>
<sub>data</sub>(<italic>t</italic>)) &#x2b; <italic>&#x3bb;</italic>
<sub>
<italic>L</italic>2</sub>(<italic>&#x3b8;</italic>
<sub>
<italic>t</italic>
</sub> &#x22c5;<italic>&#x3b8;</italic>
<sub>
<italic>t</italic>
</sub>)</p>
</list-item>
<list-item>
<p>
<inline-formula id="inf3">
<mml:math id="m8">
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3b2;</mml:mi>
<mml:mi>&#x2207;</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2299;</mml:mo>
<mml:mi>&#x2207;</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>
<inline-formula id="inf4">
<mml:math id="m9">
<mml:mi>G</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="normal">d</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
<mml:mi mathvariant="normal">a</mml:mi>
<mml:mi mathvariant="normal">g</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn mathvariant="bold">1</mml:mn>
<mml:mo>&#x2298;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
<mml:mi mathvariant="bold-italic">I</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:msqrt>
<mml:mrow>
<mml:mi>V</mml:mi>
<mml:mo>(</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msqrt>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>
<inline-formula id="inf5">
<mml:math id="m10">
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3f5;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>&#x2207;</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mi>G</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mtext mathvariant="italic">n</mml:mtext>
</mml:mrow>
</mml:mfrac>
<mml:mi mathvariant="script">N</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
</mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3f5;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mi>G</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>
<bold>end for</bold>
</p>
</list-item>
</list>
</p>
</statement>
</p>
<p>In this algorithm, the induced noise and step-size decay to zero as the training proceeds. By adding Gaussian noise to each step of the gradient descent, the optimizer finds a local model, but never converges due to the induced noise. The decayed learning rate also prevents the optimizer from leaving the model, leading to random walking around the mode. The process after settling in a local model is the sampling phase, and in this phase we draw parameter posterior samples. The point at which the optimizer enters the sampling phase can be determined by observing when the loss function stagnates. Thus, the parameters <italic>&#x3b8;</italic> of the CRNN obtained during the sampling phase lead to the parameter posterior which ultimately helps in encoding uncertainty in the prediction of the chemical reaction pathways.</p>
</sec>
<sec sec-type="results" id="s3">
<title>3 Results</title>
<sec id="s3-1">
<title>3.1 Case 1: simple reaction network</title>
<p>We first consider the chemical reaction given in <xref ref-type="bibr" rid="B21">Searson et al. (2014)</xref> comprising of five species: [<italic>A</italic>, <italic>B</italic>, <italic>C</italic>, <italic>D</italic>, <italic>E</italic>] involved in four reactions, described in <xref ref-type="table" rid="T1">Table 1</xref>. Similar to <xref ref-type="bibr" rid="B12">Ji and Deng (2021)</xref>, a total of 100 synthetic datasets were simulated. Initial conditions of the first and second species, A and B, were randomly sampled from 0.2&#x2013;1.2. Initial conditions for species C, D, and E were set to 0. Each dataset is comprised of 100 time points. Out of the 100 datasets, 90 are used for training and the remaining are reserved for validation. Further, mini-batching is employed to accelerate the training process and add additional regularization. As the validation loss function stagnates, early stopping is employed. Mini-batching and early stopping prevent the CRNN from overfitting the training data. Hyperparameters were tuned via manual searching to find values that minimized error and improved convergence. Here, we choose <italic>&#x3bb;</italic> &#x3d; 1<italic>e</italic> &#x2212; 6, <italic>&#x3b2;</italic> &#x3d; 0.9, and <italic>&#x3bb;</italic>
<sub>
<italic>L</italic>2</sub> &#x3d; 1<italic>e</italic> &#x2212; 5. For the step-size parameters we choose <italic>&#x3b1;</italic> &#x3d; 0.001, <italic>b</italic> &#x3d; 0.15, and <italic>&#x3b3;</italic> &#x3d; 0.005.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Case 1 ground truth reactions.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Equation</th>
<th align="center">Rate</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">2 <italic>A</italic> &#x2192; <italic>B</italic>
</td>
<td align="center">0.1</td>
</tr>
<tr>
<td align="center">
<italic>A</italic> &#x2192; <italic>C</italic>
</td>
<td align="center">0.2</td>
</tr>
<tr>
<td align="center">
<italic>C</italic> &#x2192; <italic>D</italic>
</td>
<td align="center">0.13</td>
</tr>
<tr>
<td align="center">
<italic>B</italic>&#x2b; <italic>D</italic> &#x2192; <italic>E</italic>
</td>
<td align="center">0.3</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>
<xref ref-type="fig" rid="F2">Figure 2</xref> shows the comparison of the Bayesian Neural ODE prediction compared to the data for the species A, B, C, D, and E in the four reaction system described in Case 1. A total of 500 posterior sample prediction are superimposed on the data; and a good agreement is seen between all trajectories and the data.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Comparison of the Bayesian chemical reaction neural network prediction to the data for the species <bold>(A&#x2013;E)</bold> in the four reactions described in Case 1. No noise was added to the data. A total of 500 posterior sample predictions are superimposed on the data.</p>
</caption>
<graphic xlink:href="fsysb-04-1338518-g002.tif"/>
</fig>
<p>
<xref ref-type="fig" rid="F3">Figure 3</xref> shows the recovery probability of species A, B, C, D, and E in the four reactions described in Case 1, obtained using the preconditioned SGLD described in <xref ref-type="statement" rid="algorithm_1">Algorithm 1</xref>. A posterior set of 1,000 samples was chosen for the estimation. A species is considered to be present in a particular reaction if its weight is greater than 1<italic>e</italic> &#x2212; 4. The probability a species is contributing to a reaction is given by the ratio of the number of samples in which the species is present in that reaction to the total number of posterior samples (1,000 in this case).</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Reactant recovery probability of chemical species (top) and posterior distributions of learned reaction rates (bottom) for the four reactions described in Case 1. No noise was added to the training data and a posterior set of 1,000 samples was chosen for the estimation.</p>
</caption>
<graphic xlink:href="fsysb-04-1338518-g003.tif"/>
</fig>
<p>However, even if the probability of a species to be present in a reaction is high, its weight can be low compared to other species. Thus, we define a score metric for a species <italic>i</italic> in reaction <italic>j</italic> governed by weight <italic>w</italic>
<sub>
<italic>ij</italic>
</sub> to be as follows:<disp-formula id="e6">
<mml:math id="m11">
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="normal">s</mml:mi>
<mml:mi mathvariant="normal">c</mml:mi>
<mml:mi mathvariant="normal">o</mml:mi>
<mml:mi mathvariant="normal">r</mml:mi>
<mml:mi mathvariant="normal">e</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>/</mml:mo>
<mml:mi mathvariant="normal">m</mml:mi>
<mml:mi mathvariant="normal">a</mml:mi>
<mml:mi mathvariant="normal">x</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
<mml:mi mathvariant="normal">m</mml:mi>
<mml:mi mathvariant="normal">u</mml:mi>
<mml:mi mathvariant="normal">m</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>:</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:math>
<label>(6)</label>
</disp-formula>
</p>
<p>where <italic>p</italic>
<sub>
<italic>ij</italic>
</sub> is the recovery probability for the species shown in <xref ref-type="fig" rid="F3">Figure 3</xref>, <italic>&#x2211;w</italic>
<sub>
<italic>ij</italic>
</sub> is the summation of the weight values of species <italic>i</italic> for reaction <italic>j</italic> over all posterior samples and maximum(<italic>w</italic>
<sub>:<italic>j</italic>
</sub>) is the maximum value of this summation for the reaction <italic>j</italic> among all species.</p>
<p>From the recovered score metrics shown in <xref ref-type="fig" rid="F4">Figure 4</xref>, we can see that we obtain a sparser discovery of the reaction pathways; which match with the data shown in <xref ref-type="table" rid="T1">Table 1</xref>.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Recovered score metric for each of the four reactions shown in case 1. Score metrics are calculated using Equation <xref ref-type="disp-formula" rid="e6">6</xref> and represent the weighted probability that a species is a reactant of each reaction. No noise was added to the data.</p>
</caption>
<graphic xlink:href="fsysb-04-1338518-g004.tif"/>
</fig>
<sec id="s3-1-1">
<title>3.1.1 Moderately noisy data</title>
<p>We subsequently train on moderately noisy data with standard deviation of the noise being 5% of the concentrations. The data is visualized in <xref ref-type="fig" rid="F5">Figure 5</xref>. <xref ref-type="fig" rid="F5">Figure 5</xref> shows the comparison of the Bayesian Neural ODE predictions to the data for 500 posterior samples. Even with the addition of noise, our methodology can correctly capture the time course of all the chemical species.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Comparison of the Bayesian chemical reaction neural network prediction to the data for the species <bold>(A&#x2013;E)</bold> in the four reactions described in Case 1. The model was trained with moderately noisy data, where the standard deviation of the noise was set to 5% of the concentrations. A total of 500 posterior sample predictions are superimposed on the data.</p>
</caption>
<graphic xlink:href="fsysb-04-1338518-g005.tif"/>
</fig>
<p>The reactant recovery probability and the score metric plots are shown in <xref ref-type="fig" rid="F6">Figure 6</xref>, <xref ref-type="fig" rid="F7">Figure 7</xref> respectively. Even with the presence of noise, the probability plots are seen to assign higher probability to the correct reaction pathways. The score plots accurately predict the reaction pathways as seen in the data (<xref ref-type="table" rid="T1">Table 1</xref>). <xref ref-type="fig" rid="F6">Figure 6</xref> also shows the posterior distribution of the reaction rates. Even with the addition of the noise, the distributions are centered close to the true reaction rate for all reactions except reaction 1, which still contains the true reaction rate within its distribution.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Reactant recovery probability of chemical species (top) and posterior distributions of learned reaction rates (bottom) for the four reactions described in Case 1. The model was trained with moderately noisy data, where the standard deviation of the noise was set to 5% of the concentrations. A posterior set of 1,000 samples was chosen for the estimation.</p>
</caption>
<graphic xlink:href="fsysb-04-1338518-g006.tif"/>
</fig>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>Recovered score metrics for each of the four reactions shown in case 1. Score metrics are calculated using equation <xref ref-type="disp-formula" rid="e6">6</xref> and represent the weighted probability that a species is a reactant of each reaction. The model was trained with moderately noisy data, where the standard deviation of the noise is set to 5% of the concentrations.</p>
</caption>
<graphic xlink:href="fsysb-04-1338518-g007.tif"/>
</fig>
</sec>
<sec id="s3-1-2">
<title>3.1.2 Highly noisy data</title>
<p>To test the limits of the Bayesian Neural ODE framework, we subsequently train on highly noisy data with the standard deviation of the added noise being 50% of the concentrations. The Bayesian Neural ODE predictions are visualized in <xref ref-type="sec" rid="s10">Supplementary Figure S1</xref> alongside the data. We observe that even with 50% noise, our methodology captures the time course data well.</p>
<p>The reactant recovery probability and the score metric plots are shown in <xref ref-type="sec" rid="s10">Supplementary Figures S2, S3</xref> respectively. Even with the addition of 50% noise, the Bayesian CRNN still accurately predicts the reaction pathways. Similarly, the posterior distributions of the reaction rates shown in <xref ref-type="sec" rid="s10">Supplementary Figure S2</xref> contain the true reaction rates, however they are no longer centered at the true reaction rate, with the exception of reaction 4.</p>
</sec>
<sec id="s3-1-3">
<title>3.1.3 Comparison to SGLD</title>
<p>We compare our results to the standard SGLD optimizer to demonstrate the effect of the preconditioner. <xref ref-type="sec" rid="s10">Supplementary Figure S4</xref> shows training and validation loss over epochs for both the pSGLD and the standard SGLD. The preconditioned algorithm allows for faster entry into the sampling phase, entering around 6,500 epochs before the SGLD.</p>
<p>
<xref ref-type="fig" rid="F8">Figure 8</xref> compares the learned reaction rates in the case where 5% noise is added to the training data between the pSGLD and the SGLD. From this figure we can see that in general the pSGLD achieved a posterior that more closely matched the true reaction rates. The average percent deviation of the samples from the true reaction rates for the pSGLD were 50.49% &#xb1; 20.01, 7.44% &#xb1; 7.73, 6.70% &#xb1; 4.68, and 9.50% &#xb1; 6.14 for reactions 1, 2, 3, and 4 respectively. The average percent deviation from the true reaction rates for the standard SGLD were 3.78% &#xb1; 2.60, 6.96% &#xb1; 2.17, 13.27% &#xb1; 2.49, and 25.65% &#xb1; 4.52 for reactions 1, 2, 3, and 4 respectively. The pSGLD is similar or better in all reactions except for reaction 1. In reaction 1, the pSGLD has lower confidence, as shown in the low density, wide coverage of the posterior samples. In the other reactions, the SGLD demonstrates higher confidence in incorrect reaction rates whereas the pSGLD better represents the uncertainty by estimating a posterior that is centered at the true reaction rate.</p>
<fig id="F8" position="float">
<label>FIGURE 8</label>
<caption>
<p>Comparison of estimated posterior of recovered reaction rates between the preconditioned SGLD and SGLD optimizer for the four reactions in case 1 where noise with a standard deviation of 5% was added.</p>
</caption>
<graphic xlink:href="fsysb-04-1338518-g008.tif"/>
</fig>
</sec>
<sec id="s3-1-4">
<title>3.1.4 Comparison to LSTM</title>
<p>We also compared the Bayesian CRNN to a purely data-driven machine learning approach, in this case a Long Short-Term Memory (LSTM) model. This architecture predicts a sequence, in this case the timecourses for the chemical reaction species. We utilize the LSTM modules from Flux.jl <xref ref-type="bibr" rid="B10">Innes (2018)</xref>, and connect two LSTM modules with 200 hidden nodes each to a densely connected linear output layer. Similarly to the neural ODE, we train for 2000 epochs with the ADAM optimizer (learning rate tuned to 0.001).</p>
<p>From <xref ref-type="fig" rid="F9">Figure 9</xref>, we can see that the results of the CRNN and LSTM model are nearly indistinguishable in the training region, but the LSTM model diverges from the true concentration values in the extrapolation region while the CRNN matches the true values. The mean absolute percent error in the extrapolation region was 9.0% &#xb1; 3.2 for the CRNN in comparison to 90.93% &#xb1; 58.83 for the LSTM-based model.</p>
<fig id="F9" position="float">
<label>FIGURE 9</label>
<caption>
<p>Comparison of LSTM model with Bayesian CRNN (chemical reaction neural network) for chemical species <bold>(A&#x2013;E)</bold>. Only time points 0&#x2013;30 were used for training as depicted by the training boundary. Time points 30&#x2013;40 were used for comparison of the accuracy of extrapolation beyond training time points.</p>
</caption>
<graphic xlink:href="fsysb-04-1338518-g009.tif"/>
</fig>
</sec>
<sec id="s3-1-5">
<title>3.1.5 Comparison to neural ODE</title>
<p>Additionally, we compared the Bayesian CRNN to a purely data-driven neural ODE approach, where the structure does not include embedded scientific knowledge. For the neural ODE, we built a densely connected neural network with two hidden layers with 50 nodes each and hyperbolic tangent activation functions. The neural network was trained for 2000 epochs with the ADAM optimizer (learning rate set to 0.001). Here we compare the ability of the learned networks to extrapolate beyond the time points used for training. We use timepoints 0&#x2013;30s for training and then use the trained system to predict concentrations 30&#x2013;40 extrapolate to timepoint 40.</p>
<p>The results are summarized in <xref ref-type="sec" rid="s10">Supplementary Figure S5</xref> and demonstrate the superior ability of the CRNN to accurately predict beyond region used for training. The mean absolute percent error in the extrapolation region was 9.0% &#xb1; 3.2 for the CRNN in comparison to 254.7% &#xb1; 109.8 for the neural ODE.</p>
</sec>
</sec>
<sec id="s3-2">
<title>3.2 Case 2: EGFR- STAT3 pathway</title>
<p>To further test the capabilities of the Bayesian CRNN, we attempted to recover the reaction network governing the EGFR- STAT3 signaling pathway <xref ref-type="bibr" rid="B1">Bidkhori et al. (2012)</xref>. Our simplified representation of this pathway consists of seven chemical species and six reactions listed in <xref ref-type="table" rid="T2">Table 2</xref>. Here we omit the reverse reactions because they cannot be identified from the forward reactions with this framework.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Case 2 EGFR-STAT3 reactions.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Equation</th>
<th align="center">Rate</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">
<italic>EGF</italic> &#x2212; <italic>EGFR</italic> &#x2b; <italic>EGF</italic> &#x2212; <italic>EGFR</italic> &#x2192; <italic>EGF</italic> &#x2212; <italic>EGFR</italic>2</td>
<td align="center">10<italic>uM</italic>
<sup>&#x2212;</sup>1<italic>s</italic>
<sup>&#x2212;</sup>1</td>
</tr>
<tr>
<td align="center">
<italic>EGF</italic> &#x2212; <italic>EGFR</italic>2 &#x2192; <italic>pEGF</italic> &#x2212; <italic>EGFR</italic>2</td>
<td align="center">2.014 s<sup>&#x2212;</sup>1</td>
</tr>
<tr>
<td align="center">
<italic>pEGF</italic> &#x2212; <italic>EGFR</italic>2 &#x2b; <italic>STAT</italic>3 &#x2192; <italic>pEGF</italic> &#x2212; <italic>EGFR</italic>2 &#x2212; <italic>STAT</italic>3</td>
<td align="center">5.5<italic>uM</italic>
<sup>&#x2212;</sup>1<italic>s</italic>
<sup>&#x2212;</sup>1</td>
</tr>
<tr>
<td align="center">
<italic>pEGF</italic> &#x2212; <italic>EGFR</italic>2 &#x2212; <italic>STAT</italic>3 &#x2192; <italic>pEGF</italic> &#x2212; <italic>EGFR</italic>2 &#x2b; <italic>STAT</italic>3</td>
<td align="center">11.74 s<sup>&#x2212;</sup>1</td>
</tr>
<tr>
<td align="center">
<italic>pEGF</italic> &#x2212; <italic>EGFR</italic>2 &#x2212; <italic>STAT</italic>3 &#x2192; <italic>pEGF</italic> &#x2212; <italic>EGFR</italic>2 &#x2b; <italic>pSTAT</italic>3</td>
<td align="center">0.4 s<sup>&#x2212;</sup>1</td>
</tr>
<tr>
<td align="center">
<italic>pSTAT</italic>3 &#x2b; <italic>pSTAT</italic>3 &#x2192; <italic>pSTAT</italic>3 &#x2212; <italic>pSTAT</italic>3</td>
<td align="center">20<italic>uM</italic>
<sup>&#x2212;</sup>1<italic>s</italic>
<sup>&#x2212;</sup>1</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>In this case, unlike the simple system in case one, the reaction rates and concentrations vary by one or two orders of magnitude. To account for this difference in scale, we adjust our loss function to use mean absolute percent error (MAPE) instead of MAE. Additionally to allow for more reactions and chemical species, we use L1 regularization on the weights that correspond to the stoichiometric coefficients of the reactions <italic>&#x3b8;</italic>
<sub>
<italic>v</italic>
</sub>, as these would comprise a sparse matrix as the reaction network grows. We continue to use L2 regularization for the weights that correspond to the reaction rates, <italic>&#x3b8;</italic>
<sub>
<italic>r</italic>
</sub>. This change to the loss function is depicted in the equation below.<disp-formula id="e7">
<mml:math id="m12">
<mml:mi>L</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="normal">M</mml:mi>
<mml:mi mathvariant="normal">A</mml:mi>
<mml:mi mathvariant="normal">P</mml:mi>
<mml:mi mathvariant="normal">E</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>Y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>CRNN</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>Y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mtext>data</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x7c;</mml:mo>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x22c5;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:math>
<label>(7)</label>
</disp-formula>
</p>
<p>We sample from the initial conditions ranging from 0 to 1 to obtain 300 training examples. With 5% Gaussian noise added to simulated training data. For training, we set <italic>&#x3bb;</italic> &#x3d; 1<italic>e</italic> &#x2212; 8, <italic>&#x3b2;</italic> &#x3d; 0.9, the L1 coefficient (<italic>&#x3bb;</italic>
<sub>
<italic>L</italic>1</sub>) &#x3d; 1e-4 and the L2 coefficient (<italic>&#x3bb;</italic>
<sub>
<italic>L</italic>2</sub>) &#x3d; 1e-5. Step-size hyperparameters <italic>&#x3b1;</italic>, b, and <italic>&#x3b3;</italic> were set to 0.001, 0.15, and 0.005 respectively.</p>
<p>After training, the Bayesian CRNN accurately captures the time course for all species in the simplified EGFR-STAT3 pathway and is robust to noise as shown in <xref ref-type="fig" rid="F10">Figure 10</xref> and from the score plots in <xref ref-type="fig" rid="F11">Figure 11</xref>, we can see the CRNN also correctly identifies the reactants of all the reactions. <xref ref-type="fig" rid="F12">Figure 12</xref> shows the uncorrected reactant probabilities and the probability densities for the reaction rates. The true reaction rates are contained within the probability density functions of reactions 1 and 6 depicted in <xref ref-type="fig" rid="F12">Figure 12</xref> a and f, but the other reactions do not include the true rates despite the time courses of the reactants and products being accurately predicted, demonstrating a potential identifiability issue. However, the average percent deviation from the true rates across a posterior set of 1,000 samples remains low, below 40%, for all reactions except reaction 5 which overestimated the rate six-fold, as shown in <xref ref-type="table" rid="T3">Table 3</xref>. Reactions 1, 2, and 6 did particularly well with an average percent deviation of 1.22% &#xb1; 0.81, 9.74% &#xb1; 0.64, and 4.08% &#xb1; 1.28 respectively.</p>
<fig id="F10" position="float">
<label>FIGURE 10</label>
<caption>
<p>Comparison of Bayesian chemical reaction neural network prediction compared to training data for EGFR-STAT3 pathway described in case 2. Gaussian noise with standard deviation of 5% was added to the training data. 500 posterior sample predictions are superimposed on the data.</p>
</caption>
<graphic xlink:href="fsysb-04-1338518-g010.tif"/>
</fig>
<fig id="F11" position="float">
<label>FIGURE 11</label>
<caption>
<p>Recovered score metric for each of the six reactions included in case 2. Score metrics are calculated using equation <xref ref-type="disp-formula" rid="e6">6</xref> and represent the weighted probability that a species is a reactant of each reaction. Gaussian noise with standard deviation of 5% was added to the training data.</p>
</caption>
<graphic xlink:href="fsysb-04-1338518-g011.tif"/>
</fig>
<fig id="F12" position="float">
<label>FIGURE 12</label>
<caption>
<p>Reactant recovery probability for each of the seven species included in case 2 and posterior distributions of the learned reaction rates for each of the case 2 reactions. Gaussian noise with standard deviation of 5% was added to the training data. A posterior set of 1,000 samples was chosen for the estimation.</p>
</caption>
<graphic xlink:href="fsysb-04-1338518-g012.tif"/>
</fig>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Average percent deviations from true reaction rates for case 2.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Equation</th>
<th align="left">Average percent deviation</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">
<italic>EGF</italic> &#x2212; <italic>EGFR</italic> &#x2b; <italic>EGF</italic> &#x2212; <italic>EGFR</italic> &#x2192; <italic>EGF</italic> &#x2212; <italic>EGFR</italic>2</td>
<td align="left">1.22% &#xb1; 0.81</td>
</tr>
<tr>
<td align="center">
<italic>EGF</italic> &#x2212; <italic>EGFR</italic>2 &#x2192; <italic>pEGF</italic> &#x2212; <italic>EGFR</italic>2</td>
<td align="left">9.74% &#xb1; 0.64</td>
</tr>
<tr>
<td align="center">
<italic>pEGF</italic> &#x2212; <italic>EGFR</italic>2 &#x2b; <italic>STAT</italic>3 &#x2192; <italic>pEGF</italic> &#x2212; <italic>EGFR</italic>2 &#x2212; <italic>STAT</italic>3</td>
<td align="left">22.41% &#xb1; 0.77</td>
</tr>
<tr>
<td align="center">
<italic>pEGF</italic> &#x2212; <italic>EGFR</italic>2 &#x2212; <italic>STAT</italic>3 &#x2192; <italic>pEGF</italic> &#x2212; <italic>EGFR</italic>2 &#x2b; <italic>STAT</italic>3</td>
<td align="left">38.59% &#xb1; 0.56</td>
</tr>
<tr>
<td align="center">
<italic>pEGF</italic> &#x2212; <italic>EGFR</italic>2 &#x2212; <italic>STAT</italic>3 &#x2192; <italic>pEGF</italic> &#x2212; <italic>EGFR</italic>2 &#x2b; <italic>pSTAT</italic>3</td>
<td align="left">643.32% &#xb1; 8.92</td>
</tr>
<tr>
<td align="center">
<italic>pSTAT</italic>3 &#x2b; <italic>pSTAT</italic>3 &#x2192; <italic>pSTAT</italic>3 &#x2212; <italic>pSTAT</italic>3</td>
<td align="left">4.08% &#xb1; 1.28</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec sec-type="discussion" id="s4">
<title>4 Discussion</title>
<p>In this study, we combine the previously published chemical reaction neural network (CRNN) <xref ref-type="bibr" rid="B12">Ji and Deng (2021)</xref> with a preconditioned Stochastic Gradient Langevin Descent (pSGLD) Markov Chain Monte Carlo (MCMC) stepper <xref ref-type="bibr" rid="B14">Li et al. (2015)</xref> of neural ODEs, allowing for the efficient discovery of chemical reaction networks from concentration time course data and quantification of the uncertainty in learned parameters. The specialized form of the CRNN is constrained to satisfy the kinetic equations of reactions, which enables the identification of the learned neural network parameters as the stoichiometric coefficients and reaction rates of the reactions while the Bayesian optimizer provides an estimate of the uncertainty of those parameters.</p>
<p>We tested the algorithm with a simple system of four reactions and five species. With no noise added to the training data, the algorithm perfectly captured the reaction network and accurately matched the time course for each species. Additionally, the posterior distributions of the reaction rates were centered around the true reaction rates. With the addition of noise (standard deviation of 5% of the true concentration) to the training data, the reaction networks were also accurately captured, however the posterior distributions of the reaction rates were no longer centered at the true value but still contained within the distribution. Even with the addition of a large amount of noise (standard deviation of 50%), the reactants of each reaction were correctly identified, and posterior distributions still contained the true reaction rates. To obtain posterior distributions centered at the true value in cases with added noise it is likely that more data is needed as there are various combinations of reaction rates that minimize the loss between the training data and the learned time courses.</p>
<p>We demonstrated that the preconditioned SGLD is necessary to increase training robustness by comparing the results of our Bayesian CRNN trained with the preconditioned SGLD optimizer to a Bayesian CRNN trained with a standard SGLD optimizer and found that not only is the algorithm more efficient and requires less epochs to train but it also more accurately approximates the posterior. The standard SGLD shows high confidence in the incorrect value while the pSGLD in general shows lower confidence but includes the true values.</p>
<p>Additionally, we demonstrated that the Bayesian CRNN in addition to being interpretable, improves upon prediction accuracy when extrapolated beyond the time range used for training by comparing the CRNN to a purely data-driven neural ODE model and an LSTM-based model that contain no prior scientific knowledge about the structure of chemical reactions.</p>
<p>Application of this model to a larger system, a simplified representation of the EGFR-STAT3 pathway containing seven species and six reactions revealed the limitations of this model. While the model was able to accurately predict the time course of all species and learn the correct reactions, as evaluated by correct prediction of reactants and products, the posterior distributions of the reaction rates for four out of six reactions did not include the true rates. This is not completely unexpected since as reaction networks become larger, different combinations of reaction parameters may result in the same concentration dynamics. Future work will be needed to solve this identifiability issue, allowing for the use of this model on larger reaction systems.</p>
<p>The combination of a preconditioned SGLD optimizer with the CRNN allows for efficient training and posterior sampling as well as reliable estimates of parametric uncertainty while accounting for epistemic uncertainty in a way that builds confidence in the learned reaction network and can help determine if more data is needed. This work demonstrates that knowledge-embedded machine learning techniques via SciML approaches may greatly outperform purely deep learning methods in a small-medium data regime that is common in Quantitative Systems Pharmacology (QSP) and demonstrates viable techniques for the automated discovery of QSP models directly from timeseries data.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s5">
<title>Data availability statement</title>
<p>Publicly available datasets were analyzed in this study. This data can be found here: <ext-link ext-link-type="uri" xlink:href="https://github.com/nievesemily8/Bayesian_CRNN">https://github.com/nievesemily8/Bayesian_CRNN</ext-link>.</p>
</sec>
<sec id="s6">
<title>Author contributions</title>
<p>EN: Writing&#x2013;review and editing, Writing&#x2013;original draft, Visualization, Software, Methodology, Investigation, Writing&#x2013;review and editing, Writing&#x2013;original draft, Visualization, Software, Methodology, Investigation, RD: Writing&#x2013;review and editing, Writing&#x2013;original draft, Visualization, Software, Methodology, Investigation, Writing&#x2013;review and editing, Writing&#x2013;original draft, Visualization, Software, Methodology, Investigation, CR: Writing&#x2013;review and editing, Supervision, Investigation, Funding acquisition, Conceptualization, Writing&#x2013;review and editing, Supervision, Investigation, Funding acquisition, Conceptualization.</p>
</sec>
<sec sec-type="funding-information" id="s7">
<title>Funding</title>
<p>The author(s) declare that no financial support was received for the research, authorship, and/or publication of this article.</p>
</sec>
<sec sec-type="COI-statement" id="s8">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s9">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s10">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fsysb.2024.1338518/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fsysb.2024.1338518/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="DataSheet1.PDF" id="SM1" mimetype="application/PDF" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bidkhori</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Moeini</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Masoudi-Nejad</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Modeling of tumor progression in nsclc and intrinsic resistance to tki in loss of pten expression</article-title>. <source>PLoS One</source> <volume>7</volume>, <fpage>e48004</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0048004</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>R. T.</given-names>
</name>
<name>
<surname>Rubanova</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Bettencourt</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Duvenaud</surname>
<given-names>D. K.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Neural ordinary differential equations</article-title>,&#x201d; in <source>Advances in neural information processing systems</source>, <fpage>6571</fpage>&#x2013;<lpage>6583</lpage>.</citation>
</ref>
<ref id="B3">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Fox</surname>
<given-names>E. B.</given-names>
</name>
<name>
<surname>Guestrin</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2014</year>). <source>Stochastic gradient Hamiltonian Monte Carlo</source>. <pub-id pub-id-type="doi">10.48550/ARXIV.1402.4102</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Dandekar</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Chung</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Dixit</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Tarek</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Garcia-Valadez</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Vemula</surname>
<given-names>K. V.</given-names>
</name>
<etal/>
</person-group> (<year>2020a</year>). <source>Bayesian neural ordinary differential equations</source>. <comment>
<italic>arXiv preprint arXiv:2012.07244</italic>
</comment>.</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dandekar</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Rackauckas</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Barbastathis</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2020b</year>). <article-title>A machine learning-aided global diagnostic and comparative tool to assess effect of quarantine control in covid-19 spread</article-title>. <source>Patterns</source> <volume>1</volume>, <fpage>100145</fpage>. <pub-id pub-id-type="doi">10.1016/j.patter.2020.100145</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Ge</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Ghahramani</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Turing: a language for flexible probabilistic inference</article-title>,&#x201d; in <conf-name>International Conference on Artificial Intelligence and Statistics, AISTATS 2018</conf-name>, <conf-loc>Playa Blanca, Lanzarote, Canary Islands, Spain</conf-loc>, <conf-date>9-11 April 2018</conf-date>, <fpage>1682</fpage>&#x2013;<lpage>1690</lpage>.</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Girolami</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Bayesian inference for differential equations</article-title>. <source>Theor. Comput. Sci.</source> <volume>408</volume>, <fpage>4</fpage>&#x2013;<lpage>16</lpage>. <comment>Computational Methods in Systems Biology</comment>. <pub-id pub-id-type="doi">10.1016/j.tcs.2008.07.005</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hoffman</surname>
<given-names>M. D.</given-names>
</name>
<name>
<surname>Gelman</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>The no-u-turn sampler: adaptively setting path lengths in Hamiltonian Monte Carlo</article-title>. <source>J. Mach. Learn. Res.</source> <volume>15</volume>, <fpage>1593</fpage>&#x2013;<lpage>1623</lpage>. <pub-id pub-id-type="doi">10.48550/arXiv.1111.4246</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Handel</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>A bayesian approach to estimate parameters of ordinary differential equation</article-title>. <source>Comput. Stat.</source> <volume>35</volume>, <fpage>1481</fpage>&#x2013;<lpage>1499</lpage>. <pub-id pub-id-type="doi">10.1007/s00180-020-00962-8</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Innes</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Flux: elegant machine learning with julia</article-title>. <source>J. Open Source Softw.</source> <volume>3</volume>, <fpage>602</fpage>. <pub-id pub-id-type="doi">10.21105/joss.00602</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Izmailov</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Maddox</surname>
<given-names>W. J.</given-names>
</name>
<name>
<surname>Kirichenko</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Garipov</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Vetrov</surname>
<given-names>D. P.</given-names>
</name>
<name>
<surname>Wilson</surname>
<given-names>A. G.</given-names>
</name>
</person-group> (<year>2019</year>). <source>Subspace inference for bayesian deep learning</source>. <comment>CoRR abs/1907.07504</comment>.</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ji</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Deng</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Autonomous discovery of unknown reaction pathways from data by chemical reaction neural network</article-title>. <source>J. Phys. Chem. A</source> <volume>125</volume>, <fpage>1082</fpage>&#x2013;<lpage>1092</lpage>. <pub-id pub-id-type="doi">10.1021/acs.jpca.0c09316</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Jospin</surname>
<given-names>L. V.</given-names>
</name>
<name>
<surname>Buntine</surname>
<given-names>W. L.</given-names>
</name>
<name>
<surname>Boussaid</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Laga</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Bennamoun</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2020</year>). <source>Hands-on bayesian neural networks - a tutorial for deep learning users</source>. <comment>
<italic>ArXiv</italic> abs/2007.06823</comment>.</citation>
</ref>
<ref id="B14">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Carlson</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Carin</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2015</year>). <source>Preconditioned stochastic gradient Langevin dynamics for deep neural networks</source>. <comment>
<italic>arXiv preprint arXiv:1512.07666</italic>
</comment>.</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Koenig</surname>
<given-names>B. C.</given-names>
</name>
<name>
<surname>Deng</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Bayesian chemical reaction neural network for autonomous kinetic uncertainty quantification</article-title>. <source>Phys. Chem. Chem. Phys.</source> <volume>25</volume>, <fpage>3707</fpage>&#x2013;<lpage>3717</lpage>. <pub-id pub-id-type="doi">10.1039/D2CP05083H</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lunn</surname>
<given-names>D. J.</given-names>
</name>
<name>
<surname>Best</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Thomas</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Wakefield</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Spiegelhalter</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>Bayesian analysis of population pk/pd models: general concepts and software</article-title>. <source>J. Pharmacokinet. Pharmacodynamics</source> <volume>29</volume>, <fpage>271</fpage>&#x2013;<lpage>307</lpage>. <pub-id pub-id-type="doi">10.1023/A:1020206907668</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Maddox</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Garipov</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Izmailov</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Vetrov</surname>
<given-names>D. P.</given-names>
</name>
<name>
<surname>Wilson</surname>
<given-names>A. G.</given-names>
</name>
</person-group> (<year>2019</year>). <source>A simple baseline for bayesian uncertainty in deep learning</source>. <comment>
<italic>CoRR</italic> abs/1902.02476</comment>.</citation>
</ref>
<ref id="B18">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Rackauckas</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Edelman</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Fischer</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Innes</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Saba</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Shah</surname>
<given-names>V. B.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). &#x201c;<article-title>Generalized physics-informed learning through language-wide differentiable programming</article-title>,&#x201d; in <source>AAAI spring symposium: mlps</source>.</citation>
</ref>
<ref id="B19">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Rackauckas</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Martensen</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Warner</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Zubov</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Supekar</surname>
<given-names>R.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <source>Universal differential equations for scientific machine learning</source>.</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rackauckas</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Nie</surname>
<given-names>Q.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Differentialequations. jl&#x2013;a performant and feature-rich ecosystem for solving differential equations in julia</article-title>. <source>J. Open Res. Softw.</source> <volume>5</volume>, <fpage>15</fpage>. <pub-id pub-id-type="doi">10.5334/jors.151</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Searson</surname>
<given-names>D. P.</given-names>
</name>
<name>
<surname>Willis</surname>
<given-names>M. J.</given-names>
</name>
<name>
<surname>Wright</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2014</year>). <source>Reverse engineering chemical reaction networks from time series data</source>. <comment>
<italic>arXiv preprint arXiv:1412.6346</italic>
</comment>.</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>von Toussaint</surname>
<given-names>U.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Bayesian inference in physics</article-title>. <source>Rev. Mod. Phys.</source> <volume>83</volume>, <fpage>943</fpage>&#x2013;<lpage>999</lpage>. <pub-id pub-id-type="doi">10.1103/RevModPhys.83.943</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Welling</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Teh</surname>
<given-names>Y. W.</given-names>
</name>
</person-group> (<year>2011</year>). &#x201c;<article-title>Bayesian learning via stochastic gradient Langevin dynamics</article-title>,&#x201d; in <conf-name>Proceedings of the 28th International Conference on Machine Learning</conf-name> (<publisher-loc>Bellevue, WA</publisher-loc>: <publisher-name>ICML-11</publisher-name>), <fpage>681</fpage>&#x2013;<lpage>688</lpage>.</citation>
</ref>
<ref id="B24">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Xu</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Ge</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Tebbutt</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Tarek</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Trapp</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Ghahramani</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2019</year>). <source>Advancedhmc. jl: a robust, modular and efficient implementation of advanced hmc algorithms</source>.</citation>
</ref>
</ref-list>
</back>
</article>