<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Energy Res.</journal-id>
<journal-title>Frontiers in Energy Research</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Energy Res.</abbrev-journal-title>
<issn pub-type="epub">2296-598X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1073271</article-id>
<article-id pub-id-type="doi">10.3389/fenrg.2022.1073271</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Energy Research</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Shaping energy cost management in process industries through clustering and soft sensors</article-title>
<alt-title alt-title-type="left-running-head">Lu et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fenrg.2022.1073271">10.3389/fenrg.2022.1073271</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Lu</surname>
<given-names>Yu</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1904488/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Fang</surname>
<given-names>Gang</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Huang</surname>
<given-names>Daoping</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Cai</surname>
<given-names>Baoping</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1996356/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Chen</surname>
<given-names>Hongtian</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1260273/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Liu</surname>
<given-names>Yiqi</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1057170/overview"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Key Laboratory of Autonomous Systems and Networked Control</institution>, <institution>Ministry of Education</institution>, <institution>The School of Automation Science and Engineering</institution>, <institution>South China University of Technology</institution>, <addr-line>Guangzhou</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Unmanned Aerial Vehicle Systems Engineering Technology Research Center of Guangdong</institution>, <institution>The School of Automation Science and Engineering</institution>, <institution>South China University of Technology</institution>, <addr-line>Guangzhou</addr-line>, <country>China</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>College of Mechanical and Electronic Engineering</institution>, <institution>China University of Petroleum</institution>, <addr-line>Qingdao</addr-line>, <country>China</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Department of Chemical and Materials Engineering</institution>, <institution>University of Alberta</institution>, <addr-line>Edmonton</addr-line>, <country>Canada</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1831009/overview">Yongming Han</ext-link>, Beijing University of Chemical Technology, China</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2063097/overview">Zhiying Wu</ext-link>, Hong Kong Institute of Science and Innovation (CAS), Hong Kong, Hong Kong, SAR China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2066327/overview">Huadong Mo</ext-link>, University of New South Wales, Australia</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2069056/overview">Yuqiu Chen</ext-link>, Technical University of Denmark, Denmark</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Yiqi Liu, <email>aulyq@scut.edu.cn</email>
</corresp>
<fn fn-type="other">
<p>This article was submitted to Process and Energy Systems Engineering, a section of the journal Frontiers in Energy Research</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>09</day>
<month>01</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2022</year>
</pub-date>
<volume>10</volume>
<elocation-id>1073271</elocation-id>
<history>
<date date-type="received">
<day>18</day>
<month>10</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>11</day>
<month>11</month>
<year>2022</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2023 Lu, Fang, Huang, Cai, Chen and Liu.</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Lu, Fang, Huang, Cai, Chen and Liu</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>With the ever-increasing growth of energy demand and costs, process monitoring of operational costs is of great importance for process industries. In this light, both financial budget management and local operational optimization supposed to be guaranteed properly. To achieve this goal, a support vector machine recursive feature elimination (SVM-RFE) method together with clustering algorithm was developed to extract features while serving as importance measurements of each input variable for the sequential prediction model construction. Then, the four variants of autoregressive and moving average (ARMA), i.e., ARMA with exogenous input (ARMAX) based on recursive least squares algorithm (RLS), ARMAX based on recursive extended least squares algorithm (RELS), nonlinear auto-regressive neural network (NARNN) and nonlinear auto-regressive neural network with exogenous input (NARXNN), were applied, respectively, to predict the costs incurred in the daily production for process industries. The methods were validated in the Benchmark Simulation Model No.2-P (BSM2-P) and a practical data set about steel industry energy consumption from an open access database (University of California, Irvine (UCI)), respectively. The nonlinear model, NARXNN, was validated to achieve better performance in terms of mean square error (MSE) and correlation coefficient (R), when used for multi-step prediction of the aforementioned datasets with strong nonlinear and coupled characteristics.</p>
</abstract>
<kwd-group>
<kwd>industry</kwd>
<kwd>operational costs prediction</kwd>
<kwd>ARMAX</kwd>
<kwd>NARNN</kwd>
<kwd>NARXNN</kwd>
<kwd>SVM-RFE combined with clustering algorithm</kwd>
</kwd-group>
<contract-num rid="cn001">62273151 61873096 62073145</contract-num>
<contract-num rid="cn002">891627</contract-num>
<contract-num rid="cn003">2020A1515011057 2021B1515420003</contract-num>
<contract-sponsor id="cn001">National Natural Science Foundation of China<named-content content-type="fundref-id">10.13039/501100001809</named-content>
</contract-sponsor>
<contract-sponsor id="cn002">Horizon 2020 Framework Programme<named-content content-type="fundref-id">10.13039/100010661</named-content>
</contract-sponsor>
<contract-sponsor id="cn003">Basic and Applied Basic Research Foundation of Guangdong Province<named-content content-type="fundref-id">10.13039/501100021171</named-content>
</contract-sponsor>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>In recent decades, smart industrial concept or industry 4.0 has gained popularity as an initiative to upgrade traditional manufacture to an intelligent facility with the help of artificial intelligence and machine learning. However, smart concept is always focusing on quality control through instrumentations and controllers, without sufficiently focusing on energy consumption management or operational costs reduction prediction (<xref ref-type="bibr" rid="B2">Ansari et al., 2011</xref>). Operational costs reduction, such as minimizing dosage costs, optimizing energy consumption, subsequent optimizing control or operational strategies, can intuitively promote green production of process industries, thereby helping enterprises or sectors achieve sustainable manufacture. With the globalization, the continuous growth trend in energy consumption received significant attentions. As the largest energy end-use sector, industrial currently accounts for nearly 40% of total global final energy consumption (<xref ref-type="bibr" rid="B17">International Energy Agency, 2021</xref>). Moreover, energy consumption accounts for a large proportion of total costs in most industrial processes (<xref ref-type="bibr" rid="B14">Han et al., 2018</xref>). Excessive energy consumption usually implies more environmental pollutions and more production costs due to the environmental regulations. Therefore, given the potentials to improve industrial energy efficiency, substantial research on energy-efficiency indicators has been proposed to support energy-intensive enterprises and governments to assess energy consumptions and optimize management (<xref ref-type="bibr" rid="B6">Chan et al., 2014</xref>; <xref ref-type="bibr" rid="B22">Li and Tao, 2017</xref>).</p>
<p>Specifically, constructing energy consumption or operational costs prediction models can help and support decision-making about costs management properly. The motivation behind establishing a predictive model is essentially to make a model able to reflect and mimic the true system characteristics as closer as possible. In general, two types of approaches are typically used for modeling. One is mechanistic model, also known as the white-box approach, in which the mechanism of the system is completely clear and the model construction generally depends on the specific physical, chemical, biological and other behaviors of a process. Such a model is intuitively explainable but difficult to be generalized to other fields. <xref ref-type="bibr" rid="B18">Jia et al. (2018)</xref> established an energy consumption model based on motion-study for activities related to equipment and operators, showing the effectiveness of the approach in a case study. Also, <xref ref-type="bibr" rid="B1">Alt&#x131;ntas et al. (2016)</xref> combined mechanistic and empirical models to optimize the machine operations in a milling process. This model was used to estimate the theoretical energy consumption in the milling process of prismatic parts with satisfactory prediction accuracy. In fact, to construct mechanism-based simulation models, a certain number of input data associated with the predicted targets are required, and then assumptions about the distribution of corresponding parameters or features related to these inputs are usually made relying on the prior knowledge (<xref ref-type="bibr" rid="B15">Hsu, 2015</xref>). However, most industrial processes are difficult to derive a specific mechanistic model, because of extremely nonlinear, coupled, multivariate characteristics and even combination of physical, chemical and biological reactions.</p>
<p>The data-driven modeling approach, called the black-box approach, is another way to address the above issues. The data using for prediction validation usually have similar patterns to those exhibited in the historical data. Data-driven methods has gained popularity since the past decades. This is mainly because data-driven methods can achieve better performance without process mechanisms compared to mechanistic models if the sufficient historical data sets are collected (<xref ref-type="bibr" rid="B30">Wei et al., 2018</xref>). With respect to the different types of data, data-driven models can be generally classified into linear and nonlinear forecasting models (<xref ref-type="bibr" rid="B32">Xiao et al., 2018</xref>). The ARMA model and its variants, as typical linear models, are one of the most popular methods in time series forecasting, especially for linear and stationary time series scenarios. Even though non-stationary data can be solved by resorting to de-seasoning and de-trending strategies, ARMA could still fail for most of cases (<xref ref-type="bibr" rid="B19">Juberias et al., 1999</xref>). Non-linearity in data can be approached by resorting to the nonlinear ARMA properly (<xref ref-type="bibr" rid="B21">Kun and Weibing, 2021</xref>). An autoregressive-based time varying model was developed to predict electricity short-term demand, while the performance of the original model depends a lot on the updated coefficients (<xref ref-type="bibr" rid="B29">Vu et al., 2017</xref>). The aforementioned variants mainly focused on autoregression and took other correlated variables unusefulness for granted. <xref ref-type="bibr" rid="B11">Fang and Lahdelma (2016)</xref> applied the ARMA model to predict heating demand by combining weather variations, social components and other exogenous factors, and the results showed that the proposed method outperformed the model only considering weather components. In the actual industrial process, the predicted targets are influenced by other exogenous variables besides themselves. Therefore, ARX and ARMAX were proposed to improve ARMA model by incorporating the impacts of exogenous variables into the time series model, and have been studied by academic communities such as meteorology, finance, etc (<xref ref-type="bibr" rid="B16">Huang and Jane, 2009</xref>; <xref ref-type="bibr" rid="B28">Silva et al., 2022</xref>). Recently, with the rapid development of artificial intelligence, artificial neural network techniques were broadly used to tackle with nonlinear problems (<xref ref-type="bibr" rid="B24">Liu et al., 2020</xref>; <xref ref-type="bibr" rid="B9">Deng et al., 2021</xref>). They perform much better than linear time series model especially when the input data is kept current or the model functions at more than one-step-ahead prediction (<xref ref-type="bibr" rid="B8">De Gooijer and Hyndman, 2006</xref>). Therefore, the neural network model is dominant data-driven model that has been widely applied in modeling and predicting (<xref ref-type="bibr" rid="B14">Han et al., 2018</xref>). In order to cater for working in various circumstances and conditions, diverse neural network structures and algorithms were continuously developed. The network type and the optimization algorithm of undetermined parameters also need to be selected appropriately for different purposes (<xref ref-type="bibr" rid="B5">Car-Pusic et al., 2020</xref>). <xref ref-type="bibr" rid="B27">Shi et al. (2021)</xref> designed a model based on convolutional neural networks to predict coal and electricity consumption simultaneously, and this model also eliminated the negative effects of the coupling between variables. <xref ref-type="bibr" rid="B20">Kahraman et al. (2021)</xref> proposed a data-driven method based on the deep neural network, which provided a highly accurate prediction performance for energy consumption of industry machines. The NARXNN adopted in this study is a neural network that combines autoregression and exogenous input series, and this model has the additional advantage of handling nonlinear time series compared to the ARMAX model.</p>
<p>In general, models with exogenous inputs outperform those using autoregressive methods directly, especially for real industrial processes. However, inappropriate input selection may lead to many problems such as overfitting or collinearity (<xref ref-type="bibr" rid="B31">Wu et al., 2020</xref>; <xref ref-type="bibr" rid="B23">Liu et al., 2021</xref>). Therefore, the selection of features is a critical step before modeling. Principal component analysis (PCA) is one of the most extensively used methods for feature reconstruction, which is able to refine new features by mapping the original high-dimensional vector space onto a new low-dimensional space. However, the use of this method requires to ensure that the collected data must follow Gaussian distributions and also the new features generated by PCA are difficult to interpret. Feature ranking methodologies, as another type of feature selection means, are mainly composed of filter-based, wrapper-based, and embedded methods. These methods rank the importance of each individual feature according to the scores of diverse feature subsets and are effective in interpretability problems (<xref ref-type="bibr" rid="B12">Guyon and Elisseeff, 2003</xref>). SVM-RFE, as an embedded method based on backward elimination, was firstly proposed by <xref ref-type="bibr" rid="B13">Guyon et al. (2002)</xref> for feature ranking of binary classification. In this study, based on this approach together with clustering algorithm, the feature importance of continuous labels can be derived, and then exogenous inputs can be chosen.</p>
<p>The main objective of this research is to develop energy consumption and operational costs prediction models by using variants of ARMA models to optimize management of process industries. The accuracy of the methodologies was validated in two case studies. Different from the traditional ways for energy prediction, the proposed methods are able to make multiple steps ahead prediction, thus supporting energy consumption and operational costs analysis over a short-term period. This will, in turn, facilitate the controller manipulations and management behaviors in advance if the demand from markets changes. Also, due to the collaboration with SVM-RFE in the proposed method, useful features can be well refined and interpreted by the importance measurement.</p>
<p>The rest of the paper is organized as follows. The methods of predictive modeling and input feature selection are briefly introduced to provide the basic knowledge in <xref ref-type="sec" rid="s2">Section 2</xref>. The dataset performance, prediction performance analysis and discussion of the two cases are presented in <xref ref-type="sec" rid="s3">Section 3</xref>. The conclusions are finally drawn in <xref ref-type="sec" rid="s4">Section 4</xref>.</p>
</sec>
<sec sec-type="materials|methods" id="s2">
<title>2 Methods and materials</title>
<sec id="s2-1">
<title>2.1 The autoregressive and moving average with exogenous input model</title>
<p>The ARMA model is usually suitable for short-term forecasts of time series data, and is widely applied in business, economics, engineering and other areas (<xref ref-type="bibr" rid="B3">Box et al., 2008</xref>). The ARMA is usually formulated as the following equations:<disp-formula id="e1">
<mml:math id="m1">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi>&#x3c1;</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3c4;</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>Where,<disp-formula id="equ1">
<mml:math id="m2">
<mml:mrow>
<mml:mi>&#x3c1;</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>&#x3c1;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mi>B</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>&#x3c1;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:msup>
<mml:mi>B</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>&#x2212;</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>&#x3c1;</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
<mml:msup>
<mml:mi>B</mml:mi>
<mml:mi>p</mml:mi>
</mml:msup>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="equ2">
<mml:math id="m3">
<mml:mrow>
<mml:mi>&#x3c4;</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>&#x3c4;</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>&#x3c4;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mi>B</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>&#x3c4;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:msup>
<mml:mi>B</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>&#x2b;</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>&#x3c4;</mml:mi>
<mml:mi>q</mml:mi>
</mml:msub>
<mml:msup>
<mml:mi>B</mml:mi>
<mml:mi>q</mml:mi>
</mml:msup>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
<p>
<inline-formula id="inf1">
<mml:math id="m4">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the output of the model at time <inline-formula id="inf2">
<mml:math id="m5">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula id="inf3">
<mml:math id="m6">
<mml:mrow>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is random shocks, such as white noise of Gaussian distribution. <inline-formula id="inf4">
<mml:math id="m7">
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is defined as the backward shift operator, i.e., <inline-formula id="inf5">
<mml:math id="m8">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>B</mml:mi>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. When <inline-formula id="inf6">
<mml:math id="m9">
<mml:mrow>
<mml:mi>&#x3c1;</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, the ARMA model can be degenerated into the MA model. Similarly, when <inline-formula id="inf7">
<mml:math id="m10">
<mml:mrow>
<mml:mi>&#x3c4;</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, the model will become AR model. It is worth noting that AR and MA models are both special cases of an ARMA model. <inline-formula id="inf8">
<mml:math id="m11">
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c1;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#x22ef;</mml:mo>
<mml:msub>
<mml:mi>&#x3c1;</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3c4;</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3c4;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#x22ef;</mml:mo>
<mml:msub>
<mml:mi>&#x3c4;</mml:mi>
<mml:mi>q</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> represents the weight value of the items corresponding to <inline-formula id="inf9">
<mml:math id="m12">
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x22ef;</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x22ef;</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>. Parameters, <inline-formula id="inf10">
<mml:math id="m13">
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c1;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#x22ef;</mml:mo>
<mml:msub>
<mml:mi>&#x3c1;</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3c4;</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3c4;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#x22ef;</mml:mo>
<mml:msub>
<mml:mi>&#x3c4;</mml:mi>
<mml:mi>q</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, are unknown and need to be estimated by using the collected data. In this paper, RLS and RELS algorithms are used for parameter identifications. The RLS is used to minimize the cost function as follows:<disp-formula id="e2">
<mml:math id="m14">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi>J</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:munderover>
<mml:mstyle displaystyle="true">
<mml:mo>&#x2211;</mml:mo>
</mml:mstyle>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>t</mml:mi>
</mml:munderover>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msup>
<mml:msub>
<mml:mi>&#x3c6;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mi>T</mml:mi>
</mml:msup>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>Where,<disp-formula id="equ3">
<mml:math id="m15">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c6;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
<mml:mi>T</mml:mi>
</mml:msup>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="equ4">
<mml:math id="m16">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c1;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3c1;</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3c4;</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3c4;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3c4;</mml:mi>
<mml:mi>q</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
<mml:mi>T</mml:mi>
</mml:msup>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
<p>The RLS algorithm for estimating parameter <inline-formula id="inf11">
<mml:math id="m17">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> can be expressed as (<xref ref-type="bibr" rid="B10">Ding, 2010</xref>):<disp-formula id="e3">
<mml:math id="m18">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>&#x3c6;</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msup>
<mml:msub>
<mml:mi>&#x3c6;</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mi>T</mml:mi>
</mml:msup>
<mml:msub>
<mml:mover accent="true">
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>
<disp-formula id="e4">
<mml:math id="m19">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>&#x3c6;</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:msup>
<mml:msub>
<mml:mi>&#x3c6;</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mi>T</mml:mi>
</mml:msup>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2b;</mml:mo>
<mml:msup>
<mml:msub>
<mml:mi>&#x3c6;</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mi>T</mml:mi>
</mml:msup>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>&#x3c6;</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>Where <inline-formula id="inf12">
<mml:math id="m20">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:msub>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>q</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf13">
<mml:math id="m21">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:mrow>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>q</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#xd7;</mml:mo>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>q</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> are the covariance matrix and <inline-formula id="inf14">
<mml:math id="m22">
<mml:mrow>
<mml:msub>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>q</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is an identity matrix of the order <inline-formula id="inf15">
<mml:math id="m23">
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>q</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>. <inline-formula id="inf16">
<mml:math id="m24">
<mml:mrow>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is assumed as a large positive number, e.g., <inline-formula id="inf17">
<mml:math id="m25">
<mml:mrow>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mn>10</mml:mn>
<mml:mn>6</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>. <inline-formula id="inf18">
<mml:math id="m26">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the estimated value of <inline-formula id="inf19">
<mml:math id="m27">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> at time <inline-formula id="inf20">
<mml:math id="m28">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. On the basis of RLS, RELS additionally take into account innovation <inline-formula id="inf21">
<mml:math id="m29">
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>. The innovation in this study indicates the difference between the real value and the predictive, where <inline-formula id="inf22">
<mml:math id="m30">
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>y</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msup>
<mml:mi>&#x3c6;</mml:mi>
<mml:mi>T</mml:mi>
</mml:msup>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. The general form of ARMA-RELS can be extended as shown in <xref ref-type="disp-formula" rid="e5">Eq. 5</xref>.<disp-formula id="e5">
<mml:math id="m31">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi>&#x3c1;</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3c4;</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>c</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:msub>
<mml:mi>e</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
<label>(5)</label>
</disp-formula>Where,<disp-formula id="equ5">
<mml:math id="m32">
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mi>B</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:msup>
<mml:mi>B</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>&#x2b;</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>r</mml:mi>
</mml:msub>
<mml:msup>
<mml:mi>B</mml:mi>
<mml:mi>r</mml:mi>
</mml:msup>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
<p>The sets of other parameters also need to be updated,<disp-formula id="equ6">
<mml:math id="m33">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c6;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
<mml:mi>T</mml:mi>
</mml:msup>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="equ7">
<mml:math id="m34">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c1;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3c1;</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3c4;</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3c4;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3c4;</mml:mi>
<mml:mi>q</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>r</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
<mml:mi>T</mml:mi>
</mml:msup>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
<p>
<inline-formula id="inf23">
<mml:math id="m35">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:msub>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>q</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>r</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf24">
<mml:math id="m36">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:mrow>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>q</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>r</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#xd7;</mml:mo>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>q</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>r</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, while the recursive form that is used to estimate the parameter <inline-formula id="inf25">
<mml:math id="m37">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> remains unchanged.</p>
<p>However, aforementioned ARMA models omit the interference of exogenous variables on the prediction results. The ARMAX method was proposed to tackle this nuisance. The ARMAX model takes into account not only the effects from the historical series of output itself, but additionally the effects of exogenous inputs. The general expressions for ARMAX-RLS and ARMAX-RELS can be presented as <xref ref-type="disp-formula" rid="e6">Eqs 6</xref>, <xref ref-type="disp-formula" rid="e7">7</xref>, respectively.<disp-formula id="e6">
<mml:math id="m38">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi>&#x3c1;</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3c4;</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
<label>(6)</label>
</disp-formula>
<disp-formula id="e7">
<mml:math id="m39">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi>&#x3c1;</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3c4;</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>c</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:msub>
<mml:mi>e</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
<label>(7)</label>
</disp-formula>Where,<disp-formula id="equ8">
<mml:math id="m40">
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>&#x3c3;</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>&#x3c3;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mi>B</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>&#x3c3;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:msup>
<mml:mi>B</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>&#x2b;</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:msup>
<mml:mi>B</mml:mi>
<mml:mi>j</mml:mi>
</mml:msup>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
<p>
<inline-formula id="inf26">
<mml:math id="m41">
<mml:mrow>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the exogenous inputs set. This set of exogenous inputs is possibly multidimensional, whose dimensionality depends on the number of input variables selected. The method used in this research to determine the input variables is described in <xref ref-type="sec" rid="s2-3">Section 2.3</xref>. On the other hand, the formulations of the two parameter estimation algorithms, RLS and RELS, for the ARMAX model are basically consistent with the ARMA model, respectively. However, mainly due to the effect of the additional <inline-formula id="inf27">
<mml:math id="m42">
<mml:mrow>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, some parameters are updated accordingly on the original basis as follows:<disp-formula id="equ9">
<mml:math id="m43">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c6;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
<mml:mi>T</mml:mi>
</mml:msup>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="equ10">
<mml:math id="m44">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c1;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3c1;</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3c4;</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3c4;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3c4;</mml:mi>
<mml:mi>q</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3c3;</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3c3;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
<mml:mi>T</mml:mi>
</mml:msup>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
<p>
<inline-formula id="inf28">
<mml:math id="m45">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:msub>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>q</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>j</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf29">
<mml:math id="m46">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:mrow>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>q</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>j</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#xd7;</mml:mo>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>q</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>j</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> for RLS and,<disp-formula id="equ11">
<mml:math id="m47">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c6;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
<mml:mi>T</mml:mi>
</mml:msup>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="equ12">
<mml:math id="m48">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c1;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3c1;</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3c4;</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3c4;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3c4;</mml:mi>
<mml:mi>q</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>r</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3c3;</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3c3;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
<mml:mi>T</mml:mi>
</mml:msup>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
</p>
<p>
<inline-formula id="inf30">
<mml:math id="m49">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:msub>
<mml:mi>I</mml:mi>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>q</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>r</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>j</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf31">
<mml:math id="m50">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:mrow>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>q</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>r</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>j</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#xd7;</mml:mo>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>q</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>r</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>j</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> for RELS. Then, <inline-formula id="inf32">
<mml:math id="m51">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf33">
<mml:math id="m52">
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf34">
<mml:math id="m53">
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf35">
<mml:math id="m54">
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> are calculated by the Akaike Information Criterion method as shown in <xref ref-type="disp-formula" rid="e8">Eq. 8</xref>.<disp-formula id="e8">
<mml:math id="m55">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mi>I</mml:mi>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>q</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>r</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>ln</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>2</mml:mn>
<mml:mi>Q</mml:mi>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
<label>(8)</label>
</disp-formula>Where <inline-formula id="inf36">
<mml:math id="m56">
<mml:mrow>
<mml:mi>Q</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the number of parameters, <inline-formula id="inf37">
<mml:math id="m57">
<mml:mrow>
<mml:mi>L</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the likelihood function. The optimal order of model is the <inline-formula id="inf38">
<mml:math id="m58">
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>q</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>r</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mo>}</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> value satisfying the minimum <inline-formula id="inf39">
<mml:math id="m59">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mi>I</mml:mi>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. Accordingly, due to the introduction of lag <inline-formula id="inf40">
<mml:math id="m60">
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, iterative k-step ahead prediction can be formulated as <xref ref-type="disp-formula" rid="e9">Eqs 9</xref>, <xref ref-type="disp-formula" rid="e10">10</xref>.<disp-formula id="e9">
<mml:math id="m61">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>&#x3c1;</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mo>&#x7c;</mml:mo>
</mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mover accent="true">
<mml:mi>&#x3c4;</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mo>&#x7c;</mml:mo>
</mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mover accent="true">
<mml:mi>&#x3c3;</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mo>&#x7c;</mml:mo>
</mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:munderover>
<mml:mstyle displaystyle="true">
<mml:mo>&#x2211;</mml:mo>
</mml:mstyle>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mi>r</mml:mi>
</mml:munderover>
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>c</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>k</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>k</mml:mi>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>r</mml:mi>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
<label>(9)</label>
</disp-formula>
<disp-formula id="e10">
<mml:math id="m62">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>&#x3c1;</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mo>&#x7c;</mml:mo>
</mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mover accent="true">
<mml:mi>&#x3c4;</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mo>&#x7c;</mml:mo>
</mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mover accent="true">
<mml:mi>&#x3c3;</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>B</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:msub>
<mml:mi>u</mml:mi>
<mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mo>&#x7c;</mml:mo>
</mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
<mml:mo>&#x2265;</mml:mo>
<mml:mi>r</mml:mi>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
<label>(10)</label>
</disp-formula>Where <inline-formula id="inf41">
<mml:math id="m63">
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:mover accent="true">
<mml:mi>&#x3c1;</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mo>,</mml:mo>
<mml:mover accent="true">
<mml:mi>&#x3c4;</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mo>,</mml:mo>
<mml:mover accent="true">
<mml:mi>&#x3c3;</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi>c</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mo>&#x7c;</mml:mo>
</mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> are the corresponding estimated values of <inline-formula id="inf42">
<mml:math id="m64">
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:mi>&#x3c1;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x3c4;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>. If <inline-formula id="inf43">
<mml:math id="m65">
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x2264;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf44">
<mml:math id="m66">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mo>&#x7c;</mml:mo>
</mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, meaning that the estimated output of the prediction model is equal to the real value.</p>
<p>Both RLS and RELS algorithms are simple but powerful to estimate unknown parameters without needing to calculate matrix inversion during iterative learning. These make them suitable for online model identification. RELS is actually a direct extension of RLS, aiming to reduce the influence of colored noise by adding residuals to the information vector <inline-formula id="inf45">
<mml:math id="m67">
<mml:mrow>
<mml:mi>&#x3c6;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and the parameter vector <inline-formula id="inf46">
<mml:math id="m68">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. Compared to the standard least squares method, RLS and RELS algorithm improve the identification performance of time series model at the expense of higher computational complexity.</p>
</sec>
<sec id="s2-2">
<title>2.2 The nonlinear auto-regressive neural network model</title>
<p>Time series data derived from real industrial processes usually exhibit strong nonlinearity and high dynamics, which renders the monitoring of such data unsuitable if using linear models. Therefore, nonlinear methods based on neural networks are highly recommended for modeling such dataset. The standard NARNN is formulated as follows:<disp-formula id="e11">
<mml:math id="m69">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="script">N</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>y</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
<label>(11)</label>
</disp-formula>Where <inline-formula id="inf47">
<mml:math id="m70">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the estimation of the output by a specific neural network at the <inline-formula id="inf48">
<mml:math id="m71">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> moment, <inline-formula id="inf49">
<mml:math id="m72">
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>y</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> is the time series dataset, <inline-formula id="inf50">
<mml:math id="m73">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>y</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the maximum output-memory order and <inline-formula id="inf51">
<mml:math id="m74">
<mml:mrow>
<mml:mi mathvariant="script">N</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mo>&#x2219;</mml:mo>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> means a specific neural network. The distinction between NARNN and ordinary neural networks for multi-step prediction is that several observed data have to be replaced by the estimate of the network, so that <xref ref-type="disp-formula" rid="e11">Eq. 11</xref> can also be reformulated as follows:<disp-formula id="e12">
<mml:math id="m75">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="script">N</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>s</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>y</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
<label>(12)</label>
</disp-formula>Where <inline-formula id="inf52">
<mml:math id="m76">
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> are estimates of the output over the time period from <inline-formula id="inf53">
<mml:math id="m77">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> to <inline-formula id="inf54">
<mml:math id="m78">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, respectively, <inline-formula id="inf55">
<mml:math id="m79">
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the number of delay steps for autoregression, and [<inline-formula id="inf56">
<mml:math id="m80">
<mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>s</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>y</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> are the observations from time <inline-formula id="inf57">
<mml:math id="m81">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>y</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> to time <inline-formula id="inf58">
<mml:math id="m82">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>s</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>. The structure of NARNN is presented as <xref ref-type="fig" rid="F1">Figure 1A</xref>.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Two kinds of neural network structure, where <inline-formula id="inf59">
<mml:math id="m83">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="italic">z</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is the unit time delay. <bold>(A)</bold> Nonlinear auto-regressive neural network. <bold>(B)</bold> Nonlinear auto-regressive neural network with external input.</p>
</caption>
<graphic xlink:href="fenrg-10-1073271-g001.tif"/>
</fig>
<p>The NARNN model is primarily concerned with historical series of the target variables as shown in <xref ref-type="disp-formula" rid="e12">Eq. 12</xref>. The information carried by exogenous inflow data is ignored in this modeling process, and then NARXNN model is proposed to make use of this information. The NARXNN can be formulated as follows:<disp-formula id="e13">
<mml:math id="m84">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="script">N</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>s</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>y</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>x</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
<label>(13)</label>
</disp-formula>Where <inline-formula id="inf60">
<mml:math id="m85">
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>x</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> is a matrix consisting of exogenous input variables, the dimension of <inline-formula id="inf61">
<mml:math id="m86">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> depends on the quantity of exogenous input variables, and <inline-formula id="inf62">
<mml:math id="m87">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>x</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the maximum delay index of exogenous input variables. The structure of NARXNN is different from the NARNN slightly, mainly with the addition of several extra inputs. The structure of NARXNN is presented as <xref ref-type="fig" rid="F1">Figure 1B</xref>.</p>
<p>The two neural networks, NARNN and NARXNN, update weights in each layer by using the Bayesian regularization backpropagation algorithm (<xref ref-type="bibr" rid="B25">MacKay, 1992</xref>). Training samples are shown as following set, <inline-formula id="inf63">
<mml:math id="m88">
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mi>N</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mi>N</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo>}</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula id="inf64">
<mml:math id="m89">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1,2</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf65">
<mml:math id="m90">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1,2</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> represent the input and output of the neural network, respectively. Given a neural network, called <inline-formula id="inf66">
<mml:math id="m91">
<mml:mrow>
<mml:mi mathvariant="script">M</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, let <inline-formula id="inf67">
<mml:math id="m92">
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>;</mml:mo>
<mml:mi>w</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="script">M</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> be the response of network <inline-formula id="inf68">
<mml:math id="m93">
<mml:mrow>
<mml:mi mathvariant="script">M</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> with respect to the input <inline-formula id="inf69">
<mml:math id="m94">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula id="inf70">
<mml:math id="m95">
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> denotes the weight of network. The optimal parameters can be achieved by minimizing the quadratic cost function:<disp-formula id="e14">
<mml:math id="m96">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mi>D</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:munderover>
<mml:mstyle displaystyle="true">
<mml:mo>&#x2211;</mml:mo>
</mml:mstyle>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:munderover>
<mml:msup>
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>;</mml:mo>
<mml:mi>w</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="script">M</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
<label>(14)</label>
</disp-formula>
</p>
<p>The objective function is extended from <inline-formula id="inf71">
<mml:math id="m97">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mi>D</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> to <inline-formula id="inf72">
<mml:math id="m98">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3b1;</mml:mi>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mi>D</mml:mi>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3b2;</mml:mi>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mi>w</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> to prevent the overfitting. The regularization term <inline-formula id="inf73">
<mml:math id="m99">
<mml:mrow>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mi>w</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is denoted as:<disp-formula id="e15">
<mml:math id="m100">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mi>w</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:munder>
<mml:mstyle displaystyle="true">
<mml:mo>&#x2211;</mml:mo>
</mml:mstyle>
<mml:mi>n</mml:mi>
</mml:munder>
<mml:msubsup>
<mml:mi>&#x3c9;</mml:mi>
<mml:mi>n</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
<label>(15)</label>
</disp-formula>
</p>
<p>Note that <inline-formula id="inf74">
<mml:math id="m101">
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf75">
<mml:math id="m102">
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> are unknown parameters of the objective function <inline-formula id="inf76">
<mml:math id="m103">
<mml:mrow>
<mml:mi>F</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. When <inline-formula id="inf77">
<mml:math id="m104">
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is larger, the accuracy of the model to the training samples is enhanced, and similarly when <inline-formula id="inf78">
<mml:math id="m105">
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is larger, the generalization ability of the model is enhanced. The Bayesian regularization generally treats the network weights as random variables and the detailed methods for estimating the values of weights <inline-formula id="inf79">
<mml:math id="m106">
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf80">
<mml:math id="m107">
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf81">
<mml:math id="m108">
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> can be found in (<xref ref-type="bibr" rid="B7">Dan Foresee and Hagan, 1997</xref>).</p>
<p>NARNN and NARXNN are variants of ARMA together with neural network, combing both the dynamic and recurrent properties. Both methods do not require strict stationarity of the target time series. On the other hand, it is should be noted that NARXNN needs more reasonable computational cost (<xref ref-type="bibr" rid="B4">Cadenas et al., 2016</xref>).</p>
</sec>
<sec id="s2-3">
<title>2.3 The selection of features</title>
<p>Feature selection is of vital importance to improve the performance of the model, especially whose predictions depend on a number of extrinsic inputs to some extent. Excellent choices of inputs not only help to provide accurate results, but also speed up calculations and reduce the number of sensor installations, all of which lead to operational cost savings. The SVM-RFE combined with clustering method proposed in this study ranks the features of the continuous processes based on backward elimination.</p>
<p>The first thing worth noting is that the support vector machine classifier achieves the distinction between two classes by searching for the optimal hyperplane in a high-dimensional space (<xref ref-type="bibr" rid="B26">Rakotomamonjy, 2003</xref>). For the binary classification problem with training data set <inline-formula id="inf82">
<mml:math id="m109">
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">X</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">Y</mml:mi>
</mml:mrow>
<mml:mo>}</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula id="inf83">
<mml:math id="m110">
<mml:mrow>
<mml:mi mathvariant="bold-italic">X</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:mi>n</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> are the features and <inline-formula id="inf84">
<mml:math id="m111">
<mml:mrow>
<mml:mi mathvariant="bold-italic">Y</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1,1</mml:mn>
</mml:mrow>
<mml:mo>}</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, there exists a hyperplane or decision function of the following form.<disp-formula id="e16">
<mml:math id="m112">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">X</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mo>&#x2329;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">w</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x3a6;</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">X</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo>&#x232a;</mml:mo>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>b</mml:mi>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
<label>(16)</label>
</disp-formula>Where <inline-formula id="inf85">
<mml:math id="m113">
<mml:mrow>
<mml:mi>&#x3a6;</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">X</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> refers to the mapping relationship from features <inline-formula id="inf86">
<mml:math id="m114">
<mml:mrow>
<mml:mi mathvariant="bold-italic">X</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> to the high dimensional space. The parameters <inline-formula id="inf87">
<mml:math id="m115">
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">w</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>b</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> are determined by minimizing the weights and the distance of each misclassified data to the hyperplane, before which the features <inline-formula id="inf88">
<mml:math id="m116">
<mml:mrow>
<mml:mi mathvariant="bold-italic">X</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> need to be normalized. The optimization problem can be written as:<disp-formula id="e17">
<mml:math id="m117">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:munder>
<mml:mi>min</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">w</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">&#x3be;</mml:mi>
</mml:mrow>
</mml:munder>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mfrac>
<mml:msup>
<mml:mrow>
<mml:mo>&#x2016;</mml:mo>
<mml:mi mathvariant="bold-italic">w</mml:mi>
<mml:mo>&#x2016;</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:munderover>
<mml:mstyle displaystyle="true">
<mml:mo>&#x2211;</mml:mo>
</mml:mstyle>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>m</mml:mi>
</mml:munderover>
<mml:msubsup>
<mml:mi mathvariant="bold-italic">&#x3be;</mml:mi>
<mml:mi>k</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
<label>(17)</label>
</disp-formula>
<disp-formula id="equ13">
<mml:math id="m118">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#x2265;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">&#x3be;</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="equ14">
<mml:math id="m119">
<mml:mrow>
<mml:mi mathvariant="normal">s</mml:mi>
<mml:mo>.</mml:mo>
<mml:mi mathvariant="normal">t</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="bold-italic">Y</mml:mi>
<mml:mo>,</mml:mo>
<mml:mtext>&#x2009;</mml:mtext>
<mml:msub>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="bold-italic">X</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi mathvariant="normal">f</mml:mi>
<mml:mi mathvariant="normal">o</mml:mi>
<mml:mi mathvariant="normal">r</mml:mi>
<mml:mo>&#x2200;</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:math>
</disp-formula>Where <inline-formula id="inf89">
<mml:math id="m120">
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is used as a penalty factor to weigh the importance of misclassification. SVM-RFE compares the impact of different remaining subsets on the classification by backward elimination of features, with the aim of preserving the subset of features that are most beneficial to the classification. The ranking of features is achieved through multiple iterations of elimination in the above work until the remaining feature subset is empty, and the criterion of elimination at each step for a given feature <inline-formula id="inf90">
<mml:math id="m121">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> can be expressed as:<disp-formula id="e18">
<mml:math id="m122">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mi>R</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mo>&#x7c;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold">&#x2207;</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mo>&#x2016;</mml:mo>
<mml:msup>
<mml:mi mathvariant="bold-italic">w</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2016;</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
<mml:mo>&#x7c;</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
<label>(18)</label>
</disp-formula>Where <inline-formula id="inf91">
<mml:math id="m123">
<mml:mrow>
<mml:mi mathvariant="bold">&#x2207;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> denotes the weight difference between the previous subset and the one whose <inline-formula id="inf92">
<mml:math id="m124">
<mml:mrow>
<mml:msup>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> feature is eliminated. The feature that minimizes <inline-formula id="inf93">
<mml:math id="m125">
<mml:mrow>
<mml:msub>
<mml:mi>R</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> will be removed after one round of loops, which means that the remaining feature subset has the least difference in classification performance from the feature subset containing the removed feature. It is worth mentioning that SVM-RFE was widely used in feature selection for binary classification problems, while it was rarely used in continuous classification problems.</p>
<p>In practical industrial processes, the variation in feature values often leads to continuous variation in the output. This begs a question that needs to be addressed. When each output point is treated directly as a separate label, the volume of the feature set corresponding to each label is so small that each feature set does not have the ability to characterize a specific label. Therefore, such continuous processes cannot be directly classified as a multi-label feature classification problem. In order to rank the features for this type of data, this study proposed SVM-RFE combined with clustering algorithm, as shown in <xref ref-type="fig" rid="F2">Figure 2</xref>.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>SVM-RFE combined with clustering algorithm for sequential feature ranking.</p>
</caption>
<graphic xlink:href="fenrg-10-1073271-g002.tif"/>
</fig>
<p>The successive outputs are first clustered to form new classes, and thus similar outputs can be grouped into homogeneous classes to enhance the differences between the new classes, e.g., <inline-formula id="inf94">
<mml:math id="m126">
<mml:mrow>
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>n</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>}</mml:mo>
</mml:mrow>
<mml:mo>&#x2192;</mml:mo>
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>}</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>n</mml:mi>
<mml:mo>&#x226b;</mml:mo>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. The features corresponding to the same type of label are also merged into the same group, e.g., <inline-formula id="inf95">
<mml:math id="m127">
<mml:mrow>
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>n</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>}</mml:mo>
</mml:mrow>
<mml:mo>&#x2192;</mml:mo>
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>}</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>n</mml:mi>
<mml:mo>&#x226b;</mml:mo>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. In this way, the original problem is successfully transformed into a multi-label feature classification problem. When SVM is used for multi-classification problems, it is usually transformed into a series of binary classification problems that are handled separately and then summarized for the final result. The new data set <inline-formula id="inf96">
<mml:math id="m128">
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo>}</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> is partitioned using the so-called the one-versus-all method, and subsequently the features are individually ranked using SVM-RFE. The final summary of the ranking results for each group is <inline-formula id="inf97">
<mml:math id="m129">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-script">R</mml:mi>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> as shown in <xref ref-type="disp-formula" rid="e19">Eq. 19</xref>.<disp-formula id="e19">
<mml:math id="m130">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi mathvariant="bold-script">R</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mi mathvariant="script">R</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mo>&#x22ee;</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mi mathvariant="script">R</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mn>11</mml:mn>
</mml:msub>
</mml:mtd>
<mml:mtd>
<mml:mo>&#x22ef;</mml:mo>
</mml:mtd>
<mml:mtd>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mo>&#x22ee;</mml:mo>
</mml:mtd>
<mml:mtd>
<mml:mo>&#x22f1;</mml:mo>
</mml:mtd>
<mml:mtd>
<mml:mo>&#x22ee;</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mtd>
<mml:mtd>
<mml:mo>&#x22ef;</mml:mo>
</mml:mtd>
<mml:mtd>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
<label>(19)</label>
</disp-formula>Where <inline-formula id="inf98">
<mml:math id="m131">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">R</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> represents the ranking of the features that distinguish <inline-formula id="inf99">
<mml:math id="m132">
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>}</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> from the rest of dataset using SVM-RFE and the order, <inline-formula id="inf100">
<mml:math id="m133">
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> to <inline-formula id="inf101">
<mml:math id="m134">
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, is decreasing according to the importance of the <inline-formula id="inf102">
<mml:math id="m135">
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> optional features. For the ranking matrix <inline-formula id="inf103">
<mml:math id="m136">
<mml:mrow>
<mml:mi mathvariant="bold-script">R</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, this research considers that the importance index of the same column is consistent, which unifies the weights assigned to the same column. Then the final ranking of all features, <inline-formula id="inf104">
<mml:math id="m137">
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>v</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>}</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, can be obtained by counting the frequency of each feature in each column.</p>
<p>In the final step, the model is trained by sequentially increasing the number of exogenous inputs to the model, depending on the importance of the features. Then, the appropriate training set is obtained by comparing the model test results under the AIC criterion. SVM-RFE combined with clustering algorithm migrates the feature ranking method for binary classification problems to a new application scenario and solves the problem of feature ranking for continuous processes. This approach implements feature selection while keeping the original data of the features intact and visually explaining the input variables selection.</p>
</sec>
</sec>
<sec id="s3">
<title>3 Case studies</title>
<sec id="s3-1">
<title>3.1 Performance evaluation index</title>
<p>In this study, MSE and R are used as the performance evaluation metrics of the model, which can be calculated as follows:<disp-formula id="e20">
<mml:math id="m138">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>E</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:munderover>
<mml:mstyle displaystyle="true">
<mml:mo>&#x2211;</mml:mo>
</mml:mstyle>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:munderover>
<mml:msup>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
<label>(20)</label>
</disp-formula>
<disp-formula id="e21">
<mml:math id="m139">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:munderover>
<mml:mstyle displaystyle="true">
<mml:mo>&#x2211;</mml:mo>
</mml:mstyle>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:munderover>
<mml:mrow>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:msqrt>
<mml:mrow>
<mml:munderover>
<mml:mstyle displaystyle="true">
<mml:mo>&#x2211;</mml:mo>
</mml:mstyle>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:munderover>
<mml:msup>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:msqrt>
<mml:msqrt>
<mml:mrow>
<mml:munderover>
<mml:mstyle displaystyle="true">
<mml:mo>&#x2211;</mml:mo>
</mml:mstyle>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:munderover>
<mml:msup>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:msqrt>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
<label>(21)</label>
</disp-formula>Where <inline-formula id="inf105">
<mml:math id="m140">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf106">
<mml:math id="m141">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> refer to the true values of the system output and the estimated values of the prediction model, respectively; <inline-formula id="inf107">
<mml:math id="m142">
<mml:mrow>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf108">
<mml:math id="m143">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> are the mean values of <inline-formula id="inf109">
<mml:math id="m144">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf110">
<mml:math id="m145">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. <inline-formula id="inf111">
<mml:math id="m146">
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the total number of evaluation samples. The smaller the MSE, the smaller the error of the model. R is in the range of <inline-formula id="inf112">
<mml:math id="m147">
<mml:mrow>
<mml:mo>[</mml:mo>
<mml:mrow>
<mml:mn>0,1</mml:mn>
</mml:mrow>
<mml:mo>]</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, the closer it is to 1, the better the performance of model.</p>
</sec>
<sec id="s3-2">
<title>3.2 Operational cost index from wastewater treatment processes</title>
<sec id="s3-2-1">
<title>3.2.1 Data processing</title>
<p>The data for the case study in this section mainly came from the wastewater treatment platform, BSM2-P Simulink simulation model, which adds the phosphorus treatment process based on BSM2. The actual collected inflow parameters (e.g., Qin: influent flow, SF: readily biodegradable substrate) were input into the simulation platform, and OCI was calculated every 15&#xa0;min according to <xref ref-type="disp-formula" rid="e22">Eq. 22</xref> based on the data collected from the simulation model.<disp-formula id="e22">
<mml:math id="m148">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi>O</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>I</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>3</mml:mn>
<mml:mi>S</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>A</mml:mi>
<mml:mi>E</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>M</mml:mi>
<mml:mi>E</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>P</mml:mi>
<mml:mi>E</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>E</mml:mi>
<mml:mi>C</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>max</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>H</mml:mi>
<mml:mi>E</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>7</mml:mn>
<mml:mi>M</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>24</mml:mn>
<mml:mi>M</mml:mi>
<mml:mi>T</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>6</mml:mn>
<mml:mi>M</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
<label>(22)</label>
</disp-formula>Where SP is the sludge production for disposal, AE is the aeration energy, ME is the mixing energy, PE is the pump energy, EC is external carbon addition, HE is the heating energy for increasing the temperature of the anaerobic digester, MP is the methane production, and MT is the metal salt to be added. A total of 50 days of data are illustrated in <xref ref-type="fig" rid="F3">Figure 3</xref>, which has significant non-linearity. It should be noted that the reason for the negative OCI is that the methane produced in the water treatment process has a certain compensation for the operating cost.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Operational cost index (OCI) over 50&#xa0;days.</p>
</caption>
<graphic xlink:href="fenrg-10-1073271-g003.tif"/>
</fig>
<p>After the data of OCI and influent data were obtained, additional preparation for feature selection was required in addition to outlier removal and normalization. The operational cost of WWTP is closely related to the parameters of influent, but excessive parameters are not conducive to the further selection of features, so more preprocessing of the input data is required. Specifically, the selection of features practically implies exploring the correlation between exogenous inputs and target variables. The dataset with no or little change has little influence but will increase the subsequent computation, so that variables related to this kind of data need to be eliminated. Such problem can be solved by excluding feature arrays with variance less than a certain threshold. Furthermore, variables with strong linear correlation would make the SVM-RFE&#x2019;s judgment of importance seem unreasonable. Therefore, some correlation analysis methods, such as Pearson correlation analysis, need to be used to isolate the variables with strong linear correlation before using SVM-RFE. The variables <inline-formula id="inf113">
<mml:math id="m149">
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>F</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>I</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>O</mml:mi>
</mml:mrow>
<mml:mn>4</mml:mn>
</mml:msub>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>I</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>a</mml:mi>
</mml:msub>
</mml:msub>
</mml:mrow>
<mml:mo>}</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> were finally selected in this study as the alternative input features for predicting OCI, and further ranked by using SVM-RFE combined with clustering algorithm. <xref ref-type="table" rid="T1">Table 1</xref> shows the full overview of these variables.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Exogenous input variables related to OCI and descriptions.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Variables</th>
<th align="left">Description</th>
<th align="left">Units</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">
<inline-formula id="inf114">
<mml:math id="m150">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">Time of simulation</td>
<td align="left">day</td>
</tr>
<tr>
<td align="left">
<inline-formula id="inf115">
<mml:math id="m151">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>F</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">Fermentable substrate</td>
<td align="left">g COD.m<sup>-3</sup>
</td>
</tr>
<tr>
<td align="left">
<inline-formula id="inf116">
<mml:math id="m152">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>I</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">Soluble inert organic matter</td>
<td align="left">g COD.m<sup>-3</sup>
</td>
</tr>
<tr>
<td align="left">
<inline-formula id="inf117">
<mml:math id="m153">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>O</mml:mi>
</mml:mrow>
<mml:mn>4</mml:mn>
</mml:msub>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">Phosphate</td>
<td align="left">g.m<sup>-3</sup>
</td>
</tr>
<tr>
<td align="left">
<inline-formula id="inf118">
<mml:math id="m154">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>I</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">Particulate inert organic matter</td>
<td align="left">g COD.m<sup>-3</sup>
</td>
</tr>
<tr>
<td align="left">
<inline-formula id="inf119">
<mml:math id="m155">
<mml:mrow>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">Influent flow rate</td>
<td align="left">m<sup>3</sup>.day<sup>-1</sup>
</td>
</tr>
<tr>
<td align="left">
<inline-formula id="inf120">
<mml:math id="m156">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>a</mml:mi>
</mml:msub>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td align="left">Sodium</td>
<td align="left">g.m<sup>-3</sup>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>&#x2a;COD, chemical oxygen demand.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>To test the reliability of the method under different number of clusters, the diverse number of OCI clusters was set and the importance of each variable based on different number of clusters is presented in <xref ref-type="fig" rid="F4">Figure 4</xref>. The results indicate that changes in the number of clusters over a wide range have some but little effect on the importance of exogenous variables, such that there is no change in the importance ranking of the variables. The decreasing ranking of variables importance is <inline-formula id="inf121">
<mml:math id="m157">
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>F</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>I</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>I</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>O</mml:mi>
</mml:mrow>
<mml:mn>4</mml:mn>
</mml:msub>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mi>a</mml:mi>
</mml:msub>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mo>}</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, where the importance indicators of <inline-formula id="inf122">
<mml:math id="m158">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>F</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf123">
<mml:math id="m159">
<mml:mrow>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf124">
<mml:math id="m160">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>I</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf125">
<mml:math id="m161">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>O</mml:mi>
</mml:mrow>
<mml:mn>4</mml:mn>
</mml:msub>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> are similar, respectively. It is worth noting that the importance of t behaves unusually at a clustering number of 1000, which is caused by the fact that the number of clusters is close to the raw data labels. As mentioned earlier, in order to accurately identify feature importance, classes with significant differences need to be generated by clustering.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Importance of each variable to OCI with different number of clusters.</p>
</caption>
<graphic xlink:href="fenrg-10-1073271-g004.tif"/>
</fig>
<p>Generally, the model prediction accuracy will improve somewhat as the number of exogenous variables increases, but the rate of improvement is limited when the number of selected variables reaches a specific value. Fewer variables can be selected from alternative variables set to save computational power and avoid overfitting. Finally, according to the AIC criterion, <inline-formula id="inf126">
<mml:math id="m162">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>F</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf127">
<mml:math id="m163">
<mml:mrow>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> were chosen as exogenous input variables in this study case.</p>
</sec>
<sec id="s3-2-2">
<title>3.2.2 Results and discussion</title>
<p>The OCI values for each time period were calculated using the data collected in the BSM2-P simulation model. A total of 4,799 samples from 50 days were retained. The sample set was split, with the data of the first week being the training set and the remaining as the testing set. Each model was applied to predict OCI over four steps ahead.</p>
<p>
<xref ref-type="fig" rid="F5">Figure 5</xref> shows the prediction performance of four ARMA variant models on the last week of OCI values and compares them with the test set, respectively. As shown in <xref ref-type="fig" rid="F5">Figures 5A,B</xref>, the ARMAX models based on two different algorithms are similar in overall prediction performance. Under relatively stable conditions, the predicted values of ARMAX model are in good agreement with some original data with linear characteristics. However, comparing the peak locations marked in <xref ref-type="fig" rid="F5">Figures 5A,C</xref>, the ARMAX model has slightly worse performance. This mainly results from the fact that the linear model is not competitive for data predictions with significant nonlinearity. On the other hand, although the RELS algorithm takes into account the effect of residual information, the performance does not improve significantly compared to RLS. This is due to the limited effect provided by the residuals of the previous moment during the nonlinear change phase of the data.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Prediction results of OCI for the last 7&#xa0;days over four steps ahead. <bold>(A)</bold> Prediction results of the ARMAX-RLS model. <bold>(B)</bold> Prediction results of the ARMAX-RELS model. <bold>(C)</bold> Prediction results of the NARNN model. <bold>(D)</bold> Prediction results of the NARXNN model</p>
</caption>
<graphic xlink:href="fenrg-10-1073271-g005.tif"/>
</fig>
<p>Compared to the ARMAX model, the prediction values of the NARNN model displayed in <xref ref-type="fig" rid="F5">Figure 5C</xref> are more in line with the real values. However, there are still deviations, as shown in the half-day period after the 46th day. The NARXNN model fits the real data better at the locations of the peaks, troughs, as well as under the other linear conditions as shown in <xref ref-type="fig" rid="F5">Figure 5D</xref>. The residual distribution of the prediction results for the four variants is plotted in <xref ref-type="fig" rid="F6">Figure 6</xref>. The NARXNN model produces a smaller span of error, which indicates the better prediction performance of the model.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Prediction residual of OCI for the last 7&#xa0;days over four steps ahead.</p>
</caption>
<graphic xlink:href="fenrg-10-1073271-g006.tif"/>
</fig>
<p>It is worth noting that the training of the neural network model has strong randomness. This can be solved by trials and errors. To well illustrate the performance of the proposed method, the results of NARNN and NARXNN are listed in <xref ref-type="table" rid="T2">Table 2</xref>, average values of which were calculated from the prediction more than 10 times. As can be seen from <xref ref-type="table" rid="T2">Table 2</xref>, the results are as follows: Compared with ARMAX-RLS, ARMAX-RELS and NARNN model, MSE of the NARX model is reduced by 78.50%, 79.52%, 50.75%, respectively, and R is improved by 0.89%, 0.96%, 0.28%. Based on the above results, it can be observed that NARXNN model has a better performance in predicting the wastewater treatment cost index.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Comparison of the Prediction Performance on OCI over four steps ahead.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Models</th>
<th align="left">MSE</th>
<th align="left">R</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">ARMAX-RLS</td>
<td align="left">502.6552</td>
<td align="left">0.9886</td>
</tr>
<tr>
<td align="left">ARMAX-RELS</td>
<td align="left">527.8635</td>
<td align="left">0.9879</td>
</tr>
<tr>
<td align="left">NARNN</td>
<td align="left">219.4406</td>
<td align="left">0.9946</td>
</tr>
<tr>
<td align="left">NARXNN</td>
<td align="left">108.0847</td>
<td align="left">0.9974</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec id="s3-3">
<title>3.3 Energy consumption from a full-scale steel plant</title>
<sec id="s3-3-1">
<title>3.3.1 Data processing</title>
<p>The real data in this subsection, concerning energy consumption in the steel industry, were taken from University of California, Irvine (UCI). The data were collected from DAEWOO Steel Co., Ltd. in Gwangyang, South Korea. Energy consumption information for the industry is stored on the Korea Electric Power Corporation&#x2019;s website (pccs.kepco.go.kr), and daily, monthly and annual data are computed and displayed. <xref ref-type="fig" rid="F7">Figure 7</xref> presents the data of energy consumption for every 15&#xa0;min over a total of 50 days.</p>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>Operational cost index data for 50&#xa0;days.</p>
</caption>
<graphic xlink:href="fenrg-10-1073271-g007.tif"/>
</fig>
<p>Obviously, the energy consumption data in this case are more non-linear due to the fact that they are collected directly from the actual steel plant. This data set even contains some coarse data or outliers, and exhibits significant dramatics over time. The data performance varied obviously at different time intervals, particularly from the 5th to the 7th day, the processes were stable relatively, while the stable processes changed completely from the 20th to the 21st day and from the 45th to the 50th day. The data from the 10th to the 15th day, the 15th to the 20th day and the 21st to the 27th day also changed completely with different fluctuating trends, all of which added the difficulty in the sequential modeling and predicting.</p>
<p>
<xref ref-type="table" rid="T3">Table 3</xref> lists the alternative exogenous variables provided in the data source file that present continuous numeric variation. Similarly, as mentioned earlier, the selection of model inputs is essential before building models with exogenous inputs. This work is based entirely on data relationships without considering the mechanism. The linear correlations between the variables in <xref ref-type="table" rid="T3">Table 3</xref> were first examined by the Pearson correlation analysis, and there was a strong linear relationship between Lead. CRP and Lead. CPF. After removing Lead. CPF, the remaining four variables were analyzed for importance in this study case and the results are shown in <xref ref-type="fig" rid="F8">Figure 8</xref>. It is noticeable that Lead. CRP and tCO2 have significantly higher importance indicators than the other two variables, and therefore Lead. CRP and tCO2 were identified as exogenous input variables for the prediction model of energy consumption in steel plant.</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Exogenous input variables related to the steel plant and descriptions.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Variables</th>
<th align="left">Abbreviations</th>
<th align="left">Units</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Lagging current reactive power</td>
<td align="left">Lag.CRP</td>
<td align="left">kVarh</td>
</tr>
<tr>
<td align="left">Leading current reactive power</td>
<td align="left">Lead.CRP</td>
<td align="left">kVarh</td>
</tr>
<tr>
<td align="left">tCO<sub>2</sub>(CO<sub>2</sub>)</td>
<td align="left">tCO2</td>
<td align="left">ppm</td>
</tr>
<tr>
<td align="left">Lagging current power factor</td>
<td align="left">Lag.CPF</td>
<td align="left">%</td>
</tr>
<tr>
<td align="left">Leading current power factor</td>
<td align="left">Lead.CPF</td>
<td align="left">%</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="F8" position="float">
<label>FIGURE 8</label>
<caption>
<p>Importance of each variable to the energy consumption with different number of clusters.</p>
</caption>
<graphic xlink:href="fenrg-10-1073271-g008.tif"/>
</fig>
</sec>
<sec id="s3-3-2">
<title>3.3.2 Results and discussion</title>
<p>The dataset was split, with the first 7&#xa0;days of data used as the training set and the remaining data used as the test set to evaluate the performance of each model for four steps ahead prediction. The prediction performance of the four variants of the ARMA model is shown in <xref ref-type="fig" rid="F9">Figure 9</xref>, and in general, the prediction performance on real data all deteriorates compared to the prediction on the simulated data in the last study case. The main reason for this occurrence is still that the steel plant data set contains the rough data, as well as its own strong nonlinearity and sharp changes over time.</p>
<fig id="F9" position="float">
<label>FIGURE 9</label>
<caption>
<p>Prediction results of energy consumption over four steps ahead. <bold>(A)</bold> Prediction results of the ARMAX-RLS model. <bold>(B)</bold> Prediction results of the ARMAX-RELS model. <bold>(C)</bold> Prediction results of the NARNN model. <bold>(D)</bold> Prediction results of the NARXNN model</p>
</caption>
<graphic xlink:href="fenrg-10-1073271-g009.tif"/>
</fig>
<p>Nevertheless, in this study case, the neural network models perform much better than the general time series models. Specifically, the difference between the prediction results of two ARMAX models is small, and both have the tendency that the predicted data obviously cannot track the true data with high oscillation, as shown in the dashed rectangular box in <xref ref-type="fig" rid="F9">Figure 9A</xref>. The predicted data from ARMAX-RELS are less overshooting compared to ARMAX-RLS when the energy consumption data change rapidly from a declining state to a flat state, as can be seen in the comparison of <xref ref-type="fig" rid="F9">Figures 9A,B</xref>. On the contrary, two autoregressive neural network models perform much better in these issues.</p>
<p>As shown in <xref ref-type="fig" rid="F9">Figures 9C,D</xref>, the results predicted by the two autoregressive neural network models oscillate less between the 44th and the 49th day, and the data perform more smoothly. However, the NARXNN model has a higher prediction accuracy than other models for parts with steep variations, such as the rising or falling edges indicated by the triangle symbol in the figure. Different from the NARXNN model, NARNN model performs even worse than ARMAX in these regions indicated by the triangle symbol, showing the positive impact of introducing exogenous inputs on the prediction results. Although the prediction for smooth data in the period from the 44th to the 49th day is slightly inferior to that of the NARNN model, the NARXNN still performs better overall.</p>
<p>The results are tabulated in <xref ref-type="table" rid="T4">Table 4</xref> as follows: Compared with ARMAX-RLS, ARMAX-RELS and NARNN model, MSE of the NARX model is reduced by 28.46%, 18.85%, 45.36%, respectively, and R is improved by 6.40%, 1.79%, 14.02%. Based on the above results, it can be observed that NARXNN model has a better performance in predicting the energy consumption from a full-scale steel plant. It is worth noting that the performance of the NARNN model is quite different from that in the previous study case, which is mainly due to the lack of intervention from exogenous inputs. It is difficult for the general neural network model to predict the practical data with complex characteristics such as strong nonlinearity, strong volatility, and outliers in this case.</p>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>Comparison of the Prediction Performance on Energy Consumption over four steps ahead.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Models</th>
<th align="left">MSE</th>
<th align="left">R</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">ARMAX-RLS</td>
<td align="left">522.7977</td>
<td align="left">0.8215</td>
</tr>
<tr>
<td align="left">ARMAX-RELS</td>
<td align="left">460.9122</td>
<td align="left">0.8588</td>
</tr>
<tr>
<td align="left">NARNN</td>
<td align="left">684.5822</td>
<td align="left">0.7666</td>
</tr>
<tr>
<td align="left">NARXNN</td>
<td align="left">374.0288</td>
<td align="left">0.8741</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
</sec>
<sec sec-type="conclusion" id="s4">
<title>4 Conclusion</title>
<p>Process monitoring of operational costs can benefit for the operational costs reduction and other financial budget management in industries. This paper provides a comparative analysis on the performance of four ARMA model variants (i.e., ARMAX-RLS, ARMAX-RELS, NARNN and NARXNN), using operational costs and energy consumption predictions as a baseline for real applications. In addition, a method based on SVM-RFE combined with clustering algorithm was developed to extract useful features that are important for the construction of the above-mentioned models and to provide a way to measure and explain how important the corresponding features are.</p>
<p>The analysis of the data and the evaluation of the prediction results lead to the following conclusions: The two time series models, ARMAX-RLS and ARMAX-RELS, have acceptable prediction performance under conditions where the data exhibit stable patterns. But if the predicted data have strong nonlinearities as well as irregular changes, two ARMAX models can only meet the minimum prediction needs. Compared to the other three variants, the NARXNN model achieves the most accurate prediction results in both study cases, due to the help of the neural network for nonlinear data prediction on the one hand and the choice of exogenous inputs on the other.</p>
<p>In future research, the method of feature selection will be further explored and the interpretability of the method will be enhanced. Another aspect is that predictive models will be further incorporated into control strategies for costs reduction in industrial processes.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s5">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="sec" rid="s10">Supplementary Material</xref>, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec id="s6">
<title>Author contributions</title>
<p>YL was responsible for the specific work of this manuscript. GF and YL collaborated to the analysis of the data and to the writing of the manuscript. DH guided the work of this manuscript. BC and HC reviewed the content of the paper.</p>
</sec>
<sec id="s7">
<title>Funding</title>
<p>This research was funded by the National Natural Science Foundation of China (62273151, 61873096, and 62073145), Guangdong Basic and Applied Basic Research Foundation (2020A1515011057, 2021B1515420003), Guangdong Technology International Cooperation Project Application (2020A0505100024, 2021A0505060001). Fundamental Research Funds for the central Universities, SCUT (2020ZYGXZR034). Yiqi Liu also thanks for the support of Horizon 2020 Framework Programme-Marie Sk&#x142;odowska-Curie Individual Fellowships (891627).</p>
</sec>
<sec sec-type="COI-statement" id="s8">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s9">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s10">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fenrg.2022.1073271/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fenrg.2022.1073271/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="Table1.XLSX" id="SM1" mimetype="application/XLSX" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Alt&#x131;nta&#x15f;</surname>
<given-names>R. S.</given-names>
</name>
<name>
<surname>Kahya</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>&#xdc;nver</surname>
<given-names>H. &#xd6;.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Modelling and optimization of energy consumption for feature based milling</article-title>. <source>Int. J. Adv. Manuf. Technol.</source> <volume>86</volume>, <fpage>3345</fpage>&#x2013;<lpage>3363</lpage>. <pub-id pub-id-type="doi">10.1007/s00170-016-8441-7</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="book">
<person-group person-group-type="editor">
<name>
<surname>Ansari</surname>
<given-names>A. A.</given-names>
</name>
<name>
<surname>Singh Gill</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Lanza</surname>
<given-names>G. R.</given-names>
</name>
<name>
<surname>Rast</surname>
<given-names>W.</given-names>
</name>
</person-group> (Editors) (<year>2011</year>). <source>Eutrophication: Causes, consequences and control</source> (<publisher-loc>Dordrecht</publisher-loc>: <publisher-name>Springer Netherlands</publisher-name>). <pub-id pub-id-type="doi">10.1007/978-90-481-9625-8</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Box</surname>
<given-names>G. E. P.</given-names>
</name>
<name>
<surname>Jenkins</surname>
<given-names>G. M.</given-names>
</name>
<name>
<surname>Reinsel</surname>
<given-names>G. C.</given-names>
</name>
</person-group> (<year>2008</year>). <source>Time series analysis: Forecasting and control</source>. <edition>4th Edition1st ed.</edition> <publisher-loc>Hoboken, New Jersey</publisher-loc>: <publisher-name>John Wiley &#x26; Sons</publisher-name>. <pub-id pub-id-type="doi">10.1002/9781118619193</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cadenas</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Rivera</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Campos-Amezcua</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Cadenas</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Wind speed forecasting using the NARX model, case: La mata, oaxaca, m&#xe9;xico</article-title>. <source>Neural comput. Appl.</source> <volume>27</volume>, <fpage>2417</fpage>&#x2013;<lpage>2428</lpage>. <pub-id pub-id-type="doi">10.1007/s00521-015-2012-y</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Car-Pusic</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Petruseva</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zileska Pancovska</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Zafirovski</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Neural network-based model for predicting preliminary construction cost as part of cost predicting system</article-title>. <source>Adv. Civ. Eng.</source> <volume>2020</volume>, <fpage>1</fpage>&#x2013;<lpage>888617013</lpage>. <pub-id pub-id-type="doi">10.1155/2020/8886170</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chan</surname>
<given-names>D. Y.-L.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>C.-F.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>W.-C.</given-names>
</name>
<name>
<surname>Hong</surname>
<given-names>G.-B.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Energy efficiency benchmarking of energy-intensive industries in Taiwan</article-title>. <source>Energy Convers. Manag.</source> <volume>77</volume>, <fpage>216</fpage>&#x2013;<lpage>220</lpage>. <pub-id pub-id-type="doi">10.1016/j.enconman.2013.09.027</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Dan Foresee</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Hagan</surname>
<given-names>M. T.</given-names>
</name>
</person-group> (<year>1997</year>). &#x201c;<article-title>Gauss-Newton approximation to Bayesian learning</article-title>,&#x201d; in <conf-name>Proceedings of International Conference on Neural Networks (ICNN&#x2019;97)</conf-name>, <conf-loc>Houston, TX, USA</conf-loc>, <conf-date>April 1997</conf-date> (<publisher-name>IEEE</publisher-name>). <pub-id pub-id-type="doi">10.1109/ICNN.1997.614194</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>De Gooijer</surname>
<given-names>J. G.</given-names>
</name>
<name>
<surname>Hyndman</surname>
<given-names>R. J.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>25 years of time series forecasting</article-title>. <source>Int. J. Forecast.</source> <volume>22</volume>, <fpage>443</fpage>&#x2013;<lpage>473</lpage>. <pub-id pub-id-type="doi">10.1016/j.ijforecast.2006.01.001</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Deng</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Yao</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Actively exploring informative data for smart modeling of industrial multiphase flow processes</article-title>. <source>IEEE Trans. Ind. Inf.</source> <volume>17</volume>, <fpage>8357</fpage>&#x2013;<lpage>8366</lpage>. <pub-id pub-id-type="doi">10.1109/TII.2020.3046013</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ding</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Several multi-innovation identification methods</article-title>. <source>Digit. Signal Process.</source> <volume>20</volume>, <fpage>1027</fpage>&#x2013;<lpage>1039</lpage>. <pub-id pub-id-type="doi">10.1016/j.dsp.2009.10.030</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fang</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Lahdelma</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Evaluation of a multiple linear regression model and SARIMA model in forecasting heat demand for district heating system</article-title>. <source>Appl. Energy</source> <volume>179</volume>, <fpage>544</fpage>&#x2013;<lpage>552</lpage>. <pub-id pub-id-type="doi">10.1016/j.apenergy.2016.06.133</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guyon</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Elisseeff</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2003</year>). <article-title>An introduction to variable and feature selection</article-title>. <source>J. Mach. Learn. Res.</source> <volume>3</volume>, <fpage>1157</fpage>&#x2013;<lpage>1182</lpage>. <pub-id pub-id-type="doi">10.1162/153244303322753616</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guyon</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Weston</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Barnhill</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Vapnik</surname>
<given-names>V.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>Gene selection for cancer classification using support vector machines</article-title>. <source>Mach. Learn.</source> <volume>46</volume>, <fpage>389</fpage>&#x2013;<lpage>422</lpage>. <pub-id pub-id-type="doi">10.1023/A:1012487302797</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Han</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zeng</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Geng</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>Q.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Energy management and optimization modeling based on a novel fuzzy extreme learning machine: Case study of complex petrochemical industries</article-title>. <source>Energy Convers. Manag.</source> <volume>165</volume>, <fpage>163</fpage>&#x2013;<lpage>171</lpage>. <pub-id pub-id-type="doi">10.1016/j.enconman.2018.03.049</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hsu</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Identifying key variables and interactions in statistical models of building energy consumption using regularization</article-title>. <source>Energy</source> <volume>83</volume>, <fpage>144</fpage>&#x2013;<lpage>155</lpage>. <pub-id pub-id-type="doi">10.1016/j.energy.2015.02.008</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname>
<given-names>K. Y.</given-names>
</name>
<name>
<surname>Jane</surname>
<given-names>C.-J.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>A hybrid model for stock market forecasting and portfolio selection based on ARX, grey system and RS theories</article-title>. <source>Expert Syst. Appl.</source> <volume>36</volume>, <fpage>5387</fpage>&#x2013;<lpage>5392</lpage>. <pub-id pub-id-type="doi">10.1016/j.eswa.2008.06.103</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="book">
<collab>International Energy Agency</collab> (<year>2021</year>). <source>World energy outlook 2021</source>, <publisher-loc>Paris</publisher-loc>: <publisher-name>International Energy Agency</publisher-name>. <comment>Available at; <ext-link ext-link-type="uri" xlink:href="https://www.iea.org/reports/world-energy-outlook-2021">https://www.iea.org/reports/world-energy-outlook-2021</ext-link>
</comment>.</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jia</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Yuan</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Cai</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Energy modeling method of machine-operator system for sustainable machining</article-title>. <source>Energy Convers. Manag.</source> <volume>172</volume>, <fpage>265</fpage>&#x2013;<lpage>276</lpage>. <pub-id pub-id-type="doi">10.1016/j.enconman.2018.07.030</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Juberias</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Yunta</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Garcia Moreno</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Mendivil</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>1999</year>). &#x201c;<article-title>A new ARIMA model for hourly load forecasting</article-title>,&#x201d; in <conf-name>Proceedings of the 1999 IEEE Transmission and Distribution Conference (Cat. No. 99CH36333)</conf-name>, <conf-loc>New Orleans, LA, USA</conf-loc>, <conf-date>June 1999</conf-date> (<publisher-name>IEEE</publisher-name>), <fpage>314</fpage>&#x2013;<lpage>319</lpage>. <pub-id pub-id-type="doi">10.1109/TDC.1999.755371</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kahraman</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Kantardzic</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Kahraman</surname>
<given-names>M. M.</given-names>
</name>
<name>
<surname>Kotan</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>A data-driven multi-regime approach for predicting energy consumption</article-title>. <source>Energies</source> <volume>14</volume>, <fpage>6763</fpage>. <pub-id pub-id-type="doi">10.3390/en14206763</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kun</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Weibing</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Prediction of China&#x2019;s total energy consumption based on bayesian ARIMA-nonlinear regression model</article-title>. <source>IOP Conf. Ser. Earth Environ. Sci.</source> <volume>657</volume>, <fpage>012056</fpage>. <pub-id pub-id-type="doi">10.1088/1755-1315/657/1/012056</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>M.-J.</given-names>
</name>
<name>
<surname>Tao</surname>
<given-names>W.-Q.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Review of methodologies and polices for evaluation of energy efficiency in high energy-consuming industry</article-title>. <source>Appl. Energy</source> <volume>187</volume>, <fpage>203</fpage>&#x2013;<lpage>215</lpage>. <pub-id pub-id-type="doi">10.1016/j.apenergy.2016.11.039</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Feng</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Cai</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Adaptive ranking based ensemble learning of Gaussian process regression models for quality-related variable prediction in process industries</article-title>. <source>Appl. Soft Comput.</source> <volume>101</volume>, <fpage>107060</fpage>. <pub-id pub-id-type="doi">10.1016/j.asoc.2020.107060</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Dai</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Yao</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Development of adversarial transfer learning soft sensor for multigrade processes</article-title>. <source>Ind. Eng. Chem. Res.</source> <volume>59</volume>, <fpage>16330</fpage>&#x2013;<lpage>16345</lpage>. <pub-id pub-id-type="doi">10.1021/acs.iecr.0c02398</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>MacKay</surname>
<given-names>D. J. C.</given-names>
</name>
</person-group> (<year>1992</year>). <article-title>A practical bayesian Framework for backpropagation networks</article-title>. <source>Neural Comput.</source> <volume>4</volume>, <fpage>448</fpage>&#x2013;<lpage>472</lpage>. <pub-id pub-id-type="doi">10.1162/neco.1992.4.3.448</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rakotomamonjy</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2003</year>). <article-title>Variable selection using SVM-based criteria</article-title>. <source>J. Mach. Learn. Res.</source> <volume>3</volume>, <fpage>1357</fpage>&#x2013;<lpage>1370</lpage>. <pub-id pub-id-type="doi">10.1162/153244303322753706</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shi</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Hao</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>A synchronous prediction model based on multi-channel CNN with moving window for coal and electricity consumption in cement calcination process</article-title>. <source>Sensors</source> <volume>21</volume>, <fpage>4284</fpage>. <pub-id pub-id-type="doi">10.3390/s21134284</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Silva</surname>
<given-names>V. L. G. da</given-names>
</name>
<name>
<surname>Oliveira Filho</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Carlo</surname>
<given-names>J. C.</given-names>
</name>
<name>
<surname>Vaz</surname>
<given-names>P. N.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>An approach to solar radiation prediction using ARX and ARMAX models</article-title>. <source>Front. Energy Res.</source> <volume>10</volume>&#x2013;<lpage>822555</lpage>. <pub-id pub-id-type="doi">10.3389/fenrg.2022.822555</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Vu</surname>
<given-names>D. H.</given-names>
</name>
<name>
<surname>Muttaqi</surname>
<given-names>K. M.</given-names>
</name>
<name>
<surname>Agalgaonkar</surname>
<given-names>A. P.</given-names>
</name>
<name>
<surname>Bouzerdoum</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Short-term electricity demand forecasting using autoregressive based time varying model incorporating representative data adjustment</article-title>. <source>Appl. Energy</source> <volume>205</volume>, <fpage>790</fpage>&#x2013;<lpage>801</lpage>. <pub-id pub-id-type="doi">10.1016/j.apenergy.2017.08.135</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wei</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Shi</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Xia</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Pan</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>A review of data-driven approaches for prediction and classification of building energy consumption</article-title>. <source>Renew. Sustain. Energy Rev.</source> <volume>82</volume>, <fpage>1027</fpage>&#x2013;<lpage>1047</lpage>. <pub-id pub-id-type="doi">10.1016/j.rser.2017.09.108</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Yuan</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Yao</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Learning soft sensors using time difference&#x2013;based multi-kernel relevance vector machine with applications for quality-relevant monitoring in wastewater treatment</article-title>. <source>Environ. Sci. Pollut. Res.</source> <volume>27</volume>, <fpage>28986</fpage>&#x2013;<lpage>28999</lpage>. <pub-id pub-id-type="doi">10.1007/s11356-020-09192-3</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xiao</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Xie</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>A hybrid model based on selective ensemble for energy consumption forecasting in China</article-title>. <source>Energy</source> <volume>159</volume>, <fpage>534</fpage>&#x2013;<lpage>546</lpage>. <pub-id pub-id-type="doi">10.1016/j.energy.2018.06.161</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>