<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Public Health</journal-id>
<journal-title>Frontiers in Public Health</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Public Health</abbrev-journal-title>
<issn pub-type="epub">2296-2565</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpubh.2024.1397260</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Public Health</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Integrating gated recurrent unit in graph neural network to improve infectious disease prediction: an attempt</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author"><name><surname>Liu</surname> <given-names>Xu-dong</given-names></name><xref ref-type="aff" rid="aff1"><sup>1</sup></xref><xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2712101/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author"><name><surname>Hou</surname> <given-names>Bo-han</given-names></name><xref ref-type="aff" rid="aff1"><sup>1</sup></xref><xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2557347/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author"><name><surname>Xie</surname> <given-names>Zhong-jun</given-names></name><xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author" corresp="yes"><name><surname>Feng</surname> <given-names>Ning</given-names></name><xref ref-type="aff" rid="aff3"><sup>3</sup></xref><xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/1535123/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes"><name><surname>Dong</surname> <given-names>Xiao-ping</given-names></name><xref ref-type="aff" rid="aff4"><sup>4</sup></xref><xref ref-type="aff" rid="aff5"><sup>5</sup></xref><xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Faculty of Information Technology, Beijing University of Technology</institution>, <addr-line>Chaoyang District, Beijing</addr-line>, <country>China</country></aff>
<aff id="aff2"><sup>2</sup><institution>Key Laboratory of Computational Intelligence and Intelligent Systems, Beijing University of Technology</institution>, <addr-line>Chaoyang District, Beijing</addr-line>, <country>China</country></aff>
<aff id="aff3"><sup>3</sup><institution>Office of International Cooperation, Chinese Center for Disease Control and Prevention</institution>, <addr-line>Chaoyang District, Beijing</addr-line>, <country>China</country></aff>
<aff id="aff4"><sup>4</sup><institution>National Institute for Viral Disease Control and Prevention, Chinese Center for Disease Control and Prevention</institution>, <addr-line>Chaoyang District, Beijing</addr-line>, <country>China</country></aff>
<aff id="aff5"><sup>5</sup><institution>National Key-Laboratory of Intelligent Tracking and Forecasting for Infectious Disease, National Institute for Viral Disease Control and Prevention, Chinese Center for Disease Control and Prevention</institution>, <addr-line>Chang-Bai, Beijing</addr-line>, <country>China</country></aff>
<author-notes>
<fn fn-type="edited-by" id="fn0003">
<p>Edited by: Dmytro Chumachenko, National Aerospace University &#x2013; Kharkiv Aviation Institute, Ukraine</p>
</fn>
<fn fn-type="edited-by" id="fn0004">
<p>Reviewed by: Luigi Di Biasi, University of Salerno, Italy</p>
<p>Xinqiang Chen, Shanghai Maritime University, China</p>
<p>Ricardo Valentim, Federal University of Rio Grande do Norte, Brazil</p>
</fn>
<corresp id="c001">&#x002A;Correspondence: Ning Feng, <email>fengning@chinacdc.cn</email>; Xiao-ping Dong, <email>Dongxp@chinacdc.cn</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>20</day>
<month>05</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>12</volume>
<elocation-id>1397260</elocation-id>
<history>
<date date-type="received">
<day>07</day>
<month>03</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>04</day>
<month>04</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2024 Liu, Hou, Xie, Feng and Dong.</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Liu, Hou, Xie, Feng and Dong</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<sec id="sec1">
<title>Objective</title>
<p>This study focuses on enhancing the precision of epidemic time series data prediction by integrating Gated Recurrent Unit (GRU) into a Graph Neural Network (GNN), forming the GRGNN. The accuracy of the GNN (Graph Neural Network) network with introduced GRU (Gated Recurrent Units) is validated by comparing it with seven commonly used prediction methods.</p>
</sec>
<sec id="sec2">
<title>Method</title>
<p>The GRGNN methodology involves multivariate time series prediction using a GNN (Graph Neural Network) network improved by the integration of GRU (Gated Recurrent Units). Additionally, Graphical Fourier Transform (GFT) and Discrete Fourier Transform (DFT) are introduced. GFT captures inter-sequence correlations in the spectral domain, while DFT transforms data from the time domain to the frequency domain, revealing temporal node correlations. Following GFT and DFT, outbreak data are predicted through one-dimensional convolution and gated linear regression in the frequency domain, graph convolution in the spectral domain, and GRU (Gated Recurrent Units) in the time domain. The inverse transformation of GFT and DFT is employed, and final predictions are obtained after passing through a fully connected layer. Evaluation is conducted on three datasets: the COVID-19 datasets of 38 African countries and 42 European countries from worldometers, and the chickenpox dataset of 20 Hungarian regions from Kaggle. Metrics include Average Root Mean Square Error (ARMSE) and Average Mean Absolute Error (AMAE).</p>
</sec>
<sec id="sec3">
<title>Result</title>
<p>For African COVID-19 dataset and Hungarian Chickenpox dataset, GRGNN consistently outperforms other methods in ARMSE and AMAE across various prediction step lengths. Optimal results are achieved even at extended prediction steps, highlighting the model&#x2019;s robustness.</p>
</sec>
<sec id="sec4">
<title>Conclusion</title>
<p>GRGNN proves effective in predicting epidemic time series data with high accuracy, demonstrating its potential in epidemic surveillance and early warning applications. However, further discussions and studies are warranted to refine its application and judgment methods, emphasizing the ongoing need for exploration and research in this domain.</p>
</sec>
</abstract>
<kwd-group>
<kwd>artificial intelligence technology</kwd>
<kwd>graph neural network</kwd>
<kwd>gated recurrent unit</kwd>
<kwd>infectious disease</kwd>
<kwd>time series prediction</kwd>
</kwd-group>
<contract-num rid="cn1">2019SKLID603</contract-num>
<contract-sponsor id="cn1">National Natural Science Foundation of China<named-content content-type="fundref-id">10.13039/501100001809</named-content></contract-sponsor>
<counts>
<fig-count count="9"/>
<table-count count="6"/>
<equation-count count="23"/>
<ref-count count="48"/>
<page-count count="21"/>
<word-count count="12600"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Digital Public Health</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="sec5">
<label>1</label>
<title>Introduction</title>
<p>Multivariate time series forecasting plays a crucial role in various real-world scenarios such as transportation forecasting (<xref ref-type="bibr" rid="ref1">1</xref>, <xref ref-type="bibr" rid="ref2">2</xref>), supply chain management (<xref ref-type="bibr" rid="ref3">3</xref>), energy allocation (<xref ref-type="bibr" rid="ref4">4</xref>, <xref ref-type="bibr" rid="ref5">5</xref>) and financial investment (<xref ref-type="bibr" rid="ref6">6</xref>). The time series prediction is involves forecasting future values based on historical data points in a sequential order. This makes the statical method and supervised learning method, comparing with reinforcement learning (<xref ref-type="bibr" rid="ref7">7</xref>, <xref ref-type="bibr" rid="ref8">8</xref>) or unsupervised learning methods, are more suitable for this task. In the field of public health, the problem of acute epidemic forecasting is of great relevance as a typical multivariate time series forecasting: if the future evolution of acute epidemic data can be estimated quickly and accurately for each geographic region, the forecasting results can be used as a reference to help governmental agencies make decisions on policy formulation and material deployment, and thus prevent the development and spread of epidemics.</p>
<p>The field of epidemiology and public health research has witnessed a large number of studies on time series prediction of infectious diseases which revealed the requirement of prediction method in the field of epidemiology and public health research. A selection of notable works has contributed to this progress, showcasing innovative approaches and methodologies for forecasting and managing disease outbreaks. For instance, Pinto et al. (<xref ref-type="bibr" rid="ref9">9</xref>) applied a regressive model to estimate intervention effects over time by comparing rates of congenital syphilis. Cori et al. (<xref ref-type="bibr" rid="ref10">10</xref>) presents a novel tool for tracking the spread of diseases by estimating time-varying reproduction numbers. Du et al. (<xref ref-type="bibr" rid="ref11">11</xref>) focus on the research of serial interval of COVID-19 which contribute to the foundation of transmission dynamics of COVID-19 and is essential for effective prediction and control measures. However, when facing the outbreak of acute epidemic, the traditional transmission dynamics may be uncapable to prediction task. For example, in 2020, Ioannidis et al. (<xref ref-type="bibr" rid="ref12">12</xref>) found that traditional transmission models failed in forecasting of COVID-19. And many research attempt to apply machine learning method to handle the problem. Dairi et al. (<xref ref-type="bibr" rid="ref13">13</xref>) compared 7 kinds of neural network in the prediction of the number of COVID-19 cases. In fact, the neural networks were also applied to the prediction problem of other epidemics. Sanchez-Gendriz et al. (<xref ref-type="bibr" rid="ref14">14</xref>) applied Long Short-Term Memory (LSTM) network in the prediction of dengue outbreak in Natal, demonstrates the potential of neural network in disease surveillance at a local scale. And It is worthwhile to research the potential of neural network in epidemic time series data prediction.</p>
<p>Early time series forecasting research mainly relied on traditional statistical models, including historical average (HA), autoregressive (AR), autoregressive integrated moving average (ARIMA) (<xref ref-type="bibr" rid="ref15">15</xref>), VAR (<xref ref-type="bibr" rid="ref16">16</xref>), and fuzzy methods (<xref ref-type="bibr" rid="ref17">17</xref>). All of these statistical models rely on inherent <italic>a priori</italic> assumptions and require an artificial analysis of the characteristics of the study population to determine the applicability of the forecasting method.</p>
<p>Accurate prediction of multivariate time series data is a challenging type of time series forecasting problem, because both the correlation between the time nodes within each single time series and the correlation between the time series need to be considered comprehensively. During the outbreak of an infectious disease in a certain area, the changes in the number of active cases, on one hand, is related to the number of existing active cases in the locality or previous epidemic data. For instance, the outbreak of some infectious diseases has obvious seasonality, and by referring to the changes in active cases in previous years, one can roughly predict the current trend of active case changes. The data from a certain point or period in the time series is related to the data from the current or future time points, which reflects the correlation between the time nodes within each single time series. On the other hand, the number of active cases in a certain area may be related to the case numbers in neighboring areas or areas with frequent personnel movement. These time series may exhibit leading, lagging, or even synchronous trends, which demonstrates the correlation between different points within the time series. Deep learning models provide new ideas for this problem: on the one hand, Temporal Convolutional Network (TCN) (<xref ref-type="bibr" rid="ref18">18</xref>) has excellent results in single time series prediction. Recurrent Neural Network (RNN) based methods (<xref ref-type="bibr" rid="ref19 ref20 ref21">19&#x2013;21</xref>) such as LSTM (Long Short-Term Memory) (<xref ref-type="bibr" rid="ref22">22</xref>), Gated Recurrent Unit (<xref ref-type="bibr" rid="ref23">23</xref>), Gated Linear Unit (GLU) (<xref ref-type="bibr" rid="ref24">24</xref>) have good results in single time series prediction. GLU can effectively capture and learn the correlation and nonlinear features among time nodes within a time series (<xref ref-type="bibr" rid="ref24">24</xref>). Han et al. (<xref ref-type="bibr" rid="ref25">25</xref>) compared the prediction effects of ARIMA, deep neural network (DNN), and LSTM (Long Short-Term Memory) network for occupational pneumoconiosis data in Tianjin, China, and proved that LSTM (Long Short-Term Memory) can effectively predict occupational pneumoconiosis data, and at the same time has an advantage in prediction accuracy comparing to DNN and ARIMA. There is an advantage in prediction accuracy. However, most of these models ignore the dependencies between multiple variables and can only capture and learn the features within a single time series in isolation, which makes them perform poorly in practical multivariate time series prediction problems.</p>
<p>Meanwhile, in the problem of mining relationships between sequences, Yu et al. used matrix decomposition to model the relationship between multiple time series (<xref ref-type="bibr" rid="ref26">26</xref>). Discrete Fourier Transform (DFT) is also useful in time series analysis by introducing it. For example, State Frequency Memory Network (<xref ref-type="bibr" rid="ref27">27</xref>) combines the advantages of DFT and LSTM (Long Short-Term Memory) for stock price prediction; Spectral Residual model (<xref ref-type="bibr" rid="ref28">28</xref>) utilized DFT to achieve desirable results in time series anomaly detection. Another important aspect of multivariate time series forecasting is modeling the correlation between multiple time series. For example, in traffic prediction tasks, neighboring roads naturally interact with each other. The state-of-the-art models rely heavily on graph convolution networks (GCNs) derived from graph Fourier transform (GFT) theory (<xref ref-type="bibr" rid="ref29">29</xref>). These models (<xref ref-type="bibr" rid="ref1">1</xref>, <xref ref-type="bibr" rid="ref2">2</xref>) directly stack GCNs and temporal modules (e.g., LSTM (Long Short-Term Memory), GRU (Gated Recurrent Unit)), which require predefined graph-structured relationships between sequences. By simultaneously capturing the dependencies between time nodes within each single sequence and between different time series to improve the learning of features of the time series and thus improve the prediction accuracy. Convolutional Neural Network (CNN) has a good performance in learning local features (<xref ref-type="bibr" rid="ref30">30</xref>). There have been several methods to model spatial features using CNNs (<xref ref-type="bibr" rid="ref31 ref32 ref33 ref34 ref35">31&#x2013;35</xref>). Ma et al. (<xref ref-type="bibr" rid="ref34">34</xref>) used deep CNN for traffic speed prediction. Huang et al. (<xref ref-type="bibr" rid="ref36">36</xref>) tried to use transformer to predict multiple time series variables and obtained good prediction results.</p>
<p>The introduction of GRU (Gated Recurrent Unit) units provides better learning and fitting capabilities in the time domain compared to the linear units used in general GNN (Graph Neural Network) research methods. In addition, the above processes are modularized when implemented. Individual modules can be connected in series by shortcut connection to further improve the prediction accuracy of the neural network by constructing a deep network. Due to the advantages of RNN methods, such as LSTM (Long Short-Term Memory) and GRU (Gated Recurrent Unit), comparing with normal feed-forward neural networks, exist clear advantages in time series prediction, there have been a large number of attempts to use RNNs combined with GNNs (Graph Neural Networks), CNNs, or other neural network architectures to predict multivariate time series: Lv et al. (<xref ref-type="bibr" rid="ref33">33</xref>) combined RNN with CNN, where the RNN are responsible for mining and learning intra-sequence time series within single sequence features, and CNN captures the relationships between sequences. Luo et al. (<xref ref-type="bibr" rid="ref37">37</xref>) introduced GRU (Gated Recurrent Unit) into GCN to predict the change of gas composition in transformer oil during transformer operation. Zhang et al. (<xref ref-type="bibr" rid="ref38">38</xref>) proposed ST-ResNet based on residual convolutional network for crowd flow prediction. Shi et al. (<xref ref-type="bibr" rid="ref20">20</xref>) combines convolutional network with LSTM (Long Short-Term Memory) network to extract spatio-temporal information separately.</p>
<p>Graph neural networks have also yielded many results in capturing dependencies among unstructured data (<xref ref-type="bibr" rid="ref1">1</xref>, <xref ref-type="bibr" rid="ref2">2</xref>, <xref ref-type="bibr" rid="ref7">7</xref>, <xref ref-type="bibr" rid="ref29">29</xref>, <xref ref-type="bibr" rid="ref39 ref40 ref41 ref42 ref43">39&#x2013;43</xref>). DCRNN (<xref ref-type="bibr" rid="ref1">1</xref>) and STGCN (<xref ref-type="bibr" rid="ref2">2</xref>) are two of the first studies to introduce graph convolutional networks into spatio-temporal data prediction for better modeling of spatial dependencies. ASTGCN (<xref ref-type="bibr" rid="ref40">40</xref>) adds an additional attention layer to capture the dynamic change of spatio-temporal dependencies. Adaptive learning of adjacency matrices can also be introduced to solve problems that require predefined graphs for adjacency matrices (<xref ref-type="bibr" rid="ref35">35</xref>, <xref ref-type="bibr" rid="ref39">39</xref>, <xref ref-type="bibr" rid="ref41">41</xref>, <xref ref-type="bibr" rid="ref42">42</xref>).</p>
<p>However, the previous studies have never processed the time series data from three domains and they have hardly ever been applied in dealing with epidemic time series data predicting problems. But they provide the fundamental framework of the GNN (Graph Neural Network) and GRU (Gated Recurrent Unit) methods and prove the effectiveness of the methods so that we can reform the methods to cater the requirement that introducing GRU (Gated Recurrent Unit) units into GNN (Graph Neural Network) to achieve better results in time series data prediction problems.</p>
<p>The goal of this study is to try to introduce a GRU (Gated Recurrent Unit) layer in the graph neural network to enable the network to better capture and learn the relationship of each single time node within a sequence and the correlation between individual time series. Specifically, after this change, the neural network is able to learn features and make predictions from multivariate time series data in the frequency, spectral, and time domains: after GFT and DFT, it is easier to perform convolution and graphical convolution operations on the time series in the frequency and spectral domains respectively, which in turn allows for more effective predictions. The introduction of GRU (Gated Recurrent Unit) units provides better learning in the time domain compared to linear units used in the general GNN (Graph Neural Network) research methods.</p>
</sec>
<sec sec-type="methods" id="sec6">
<label>2</label>
<title>Methods</title>
<p>The overall structure of the improved GNN (Graph Neural Network) network (later referred to as GRGNN) with the introduction of GRU (Gated Recurrent Unit) consists of three parts: the preprocessing layer, the GRGNN module layer, and the output layer, and the overall structure is shown in <xref ref-type="fig" rid="fig1">Figure 1</xref>.</p>
<fig position="float" id="fig1">
<label>Figure 1</label>
<caption>
<p>The overall structure of the improved GRGNN network.</p>
</caption>
<graphic xlink:href="fpubh-12-1397260-g001.tif"/>
</fig>
<p>The input is a multivariate time series data <inline-formula>
<mml:math id="M1">
<mml:mi>X</mml:mi>
<mml:mo>=</mml:mo>
<mml:mfenced open="{" close="}">
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi mathvariant="italic">it</mml:mi>
</mml:msub>
</mml:mfenced>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>&#x211D;</mml:mi>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#x00D7;</mml:mo>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula> containing <inline-formula>
<mml:math id="M2">
<mml:mi>T</mml:mi>
</mml:math>
</inline-formula> time nodes in <inline-formula>
<mml:math id="M3">
<mml:mi>N</mml:mi>
</mml:math>
</inline-formula> columns, and before being processed layer by layer by the deep neural network, a graph structure <inline-formula>
<mml:math id="M4">
<mml:mi>G</mml:mi>
<mml:mo>=</mml:mo>
<mml:mfenced open="{" close="}" separators=",">
<mml:mi>X</mml:mi>
<mml:mi>W</mml:mi>
</mml:mfenced>
</mml:math>
</inline-formula> describing the relationship between the input data is first obtained through the smoothing module and the graph building module, where <inline-formula>
<mml:math id="M5">
<mml:mi>X</mml:mi>
</mml:math>
</inline-formula> is the data of each node in the input, and <inline-formula>
<mml:math id="M6">
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#x00D7;</mml:mo>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msub>
</mml:math>
</inline-formula> is the connection weight matrix between each node. <inline-formula>
<mml:math id="M7">
<mml:mi>G</mml:mi>
<mml:mo>=</mml:mo>
<mml:mfenced open="{" close="}" separators=",">
<mml:mi>X</mml:mi>
<mml:mi>W</mml:mi>
</mml:mfenced>
</mml:math>
</inline-formula> is fed into the GRGNN module layer and the output layer after several rounds of training and learning to obtain the final prediction result <inline-formula>
<mml:math id="M8">
<mml:mover>
<mml:mi>X</mml:mi>
<mml:mo>&#x0302;</mml:mo>
</mml:mover>
<mml:mo>=</mml:mo>
<mml:mfenced open="{" close="}" separators=",,,">
<mml:msub>
<mml:mover>
<mml:mi>X</mml:mi>
<mml:mo>&#x0302;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mover>
<mml:mi>X</mml:mi>
<mml:mo>&#x0302;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2026;</mml:mo>
<mml:msub>
<mml:mover>
<mml:mi>X</mml:mi>
<mml:mo>&#x0302;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>H</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mfenced>
</mml:math>
</inline-formula>. Where <inline-formula>
<mml:math id="M9">
<mml:mi>T</mml:mi>
</mml:math>
</inline-formula> is the number of time nodes of the input time series data and <inline-formula>
<mml:math id="M10">
<mml:mi>H</mml:mi>
</mml:math>
</inline-formula> is the prediction step size. A mathematical description of the above process can be expressed in <xref ref-type="disp-formula" rid="EQ1">Equations 1</xref>, <xref ref-type="disp-formula" rid="EQ2">2</xref>:</p>
<disp-formula id="EQ1">
<label>(1)</label>
<mml:math id="M11">
<mml:mi>G</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi mathvariant="italic">graphstruct</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mi>X</mml:mi>
</mml:mfenced>
</mml:math>
</disp-formula>
<disp-formula id="EQ2">
<label>(2)</label>
<mml:math id="M12">
<mml:mfenced open="{" close="}">
<mml:mrow>
<mml:msub>
<mml:mover>
<mml:mi>X</mml:mi>
<mml:mo>&#x0302;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mover>
<mml:mi>X</mml:mi>
<mml:mo>&#x0302;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mover>
<mml:mi>X</mml:mi>
<mml:mo>&#x0302;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>H</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>=</mml:mo>
<mml:mi>F</mml:mi>
<mml:mfenced open="(" close=")" separators=",">
<mml:mi>X</mml:mi>
<mml:mi>G</mml:mi>
</mml:mfenced>
</mml:math>
</disp-formula>
<sec id="sec7">
<label>2.1</label>
<title>Preprocessing layer</title>
<sec id="sec8">
<label>2.1.1</label>
<title>Smoothing processing module</title>
<p>The input data received by the smoothing module are multivariate time series data <inline-formula>
<mml:math id="M13">
<mml:mover accent="true">
<mml:mi>X</mml:mi>
<mml:mo stretchy="true">&#x02DC;</mml:mo>
</mml:mover>
<mml:mo>=</mml:mo>
<mml:mfenced open="{" close="}">
<mml:msub>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo stretchy="true">&#x02DC;</mml:mo>
</mml:mover>
<mml:mi mathvariant="italic">it</mml:mi>
</mml:msub>
</mml:mfenced>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>&#x211D;</mml:mi>
<mml:mi>N</mml:mi>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>&#x211D;</mml:mi>
<mml:mi>T</mml:mi>
</mml:msup>
</mml:math>
</inline-formula>. Due to the different statistical rules of the health statistics departments in each country, some countries will postpone the epidemic data from the weekend to Monday of the following week, which is reflected in the data as a line graph with a weekly cycle showing an obvious &#x201C;sawtooth waveform.&#x201D; In order to eliminate the negative impact of this problem on the neural network prediction, but also to a certain extent to eliminate some of the noise of the input data, the neural network will be used after the input of a moving window average smoothing processing for a data preprocessing.</p>
<p>The principle of sliding window average smoothing processing is shown in <xref ref-type="disp-formula" rid="EQ3">Equation 3</xref>, Finally, we will get the smoothed data <inline-formula>
<mml:math id="M14">
<mml:mi>X</mml:mi>
</mml:math>
</inline-formula> after processing the data on day <inline-formula>
<mml:math id="M15">
<mml:mi>t</mml:mi>
</mml:math>
</inline-formula> of the time series will be equal to the average of its data on that day and the data on the <inline-formula>
<mml:math id="M16">
<mml:mi>n</mml:mi>
</mml:math>
</inline-formula> days before it and the <inline-formula>
<mml:math id="M17">
<mml:mi>n</mml:mi>
</mml:math>
</inline-formula> days after it, and <inline-formula>
<mml:math id="M18">
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mi>n</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula> is called the window size. Considering the characteristics of the data in this experiment, <inline-formula>
<mml:math id="M19">
<mml:mi>n</mml:mi>
</mml:math>
</inline-formula> is set to 3, that is, the window size is 7.</p>
<disp-formula id="EQ3">
<label>(3)</label>
<mml:math id="M20">
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mi>n</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfrac>
<mml:munderover>
<mml:mstyle displaystyle="true">
<mml:mo stretchy="true">&#x2211;</mml:mo>
</mml:mstyle>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msub>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo stretchy="true">&#x02DC;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>=</mml:mo>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>N</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:math>
</disp-formula>
</sec>
<sec id="sec9">
<label>2.1.2</label>
<title>Graph building blocks</title>
<p>GNN (Graph Neural Network)-based methods need to construct a graph structure <inline-formula>
<mml:math id="M21">
<mml:mi>G</mml:mi>
<mml:mo>=</mml:mo>
<mml:mfenced open="{" close="}" separators=",">
<mml:mi>N</mml:mi>
<mml:mi>E</mml:mi>
</mml:mfenced>
</mml:math>
</inline-formula> before forecasting multivariate time series. In this study, the number of active cases in a certain geographical area is taken as the object of the study, and the data of each subregion in the geographical area is taken as the node <inline-formula>
<mml:math id="M22">
<mml:mi>N</mml:mi>
</mml:math>
</inline-formula> of the graph, and the edges <inline-formula>
<mml:math id="M23">
<mml:mi>E</mml:mi>
</mml:math>
</inline-formula> of the graph denote the correlation and the magnitude of the influence of each node on each other. In this study, <inline-formula>
<mml:math id="M24">
<mml:mi>E</mml:mi>
</mml:math>
</inline-formula> is represented by the weight matrix <inline-formula>
<mml:math id="M25">
<mml:mi>W</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>&#x211D;</mml:mi>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#x00D7;</mml:mo>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula>. The element <inline-formula>
<mml:math id="M26">
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>N</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>N</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula> in <inline-formula>
<mml:math id="M27">
<mml:mi>W</mml:mi>
</mml:math>
</inline-formula> represents the magnitude of the influence weight of the <inline-formula>
<mml:math id="M28">
<mml:mi>i</mml:mi>
</mml:math>
</inline-formula>th node on the <inline-formula>
<mml:math id="M29">
<mml:mi>j</mml:mi>
</mml:math>
</inline-formula>th node. The graph structure in this study is denoted by <inline-formula>
<mml:math id="M30">
<mml:mi>G</mml:mi>
<mml:mo>=</mml:mo>
<mml:mfenced open="{" close="}" separators=",">
<mml:mi>X</mml:mi>
<mml:mi>W</mml:mi>
</mml:mfenced>
</mml:math>
</inline-formula>.</p>
<p>Part of the graph structure can be constructed by humans for observation or through experience or knowledge (e.g., road networks in traffic forecasting, grid systems in electrical energy forecasting). However, in general, there is usually no worthwhile sufficient <italic>a priori</italic> experience to accomplish graph construction artificially. For example, in this study, when dealing with data related to epidemics, there may be a situation where the transmission pathways and characteristics of the epidemics under study have yet to be studied, and the existing research and knowledge about them cannot support the construction of the graph. In order to cope with this situation, the correlation between multiple time series is captured in the preprocessing stage through the self-attention mechanism with the GRU (Gated Recurrent Unit) layer before the data is input into the neural network, and the correlation of each time series is determined in a data-driven manner, which then completes the construction of the required graph structure for the neural network (<xref ref-type="bibr" rid="ref42">42</xref>).</p>
<p>A specific description of the self-attention mechanism approach for the composition layer is given below:</p>
<p>First of all, the multivariate time series <inline-formula>
<mml:math id="M31">
<mml:mi>X</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>&#x211D;</mml:mi>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#x00D7;</mml:mo>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula> will be fed into the GRU (Gated Recurrent Unit) layer, which calculates the hidden state corresponding to each time node sequentially. The hidden states corresponding to each time nodes are computed sequentially. Then, we use the last hidden state to calculate the weight matrix through the self-attention mechanism. The mathematical description is as <xref ref-type="disp-formula" rid="EQ4">Equation 4</xref><xref ref-type="disp-formula" rid="EQ5">&#x2013;</xref><xref ref-type="disp-formula" rid="EQ6">6</xref>:</p>
<disp-formula id="EQ4">
<label>(4)</label>
<mml:math id="M32">
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mi>Q</mml:mi>
</mml:msup>
<mml:mo>=</mml:mo>
<mml:mi mathvariant="italic">xaviernormal</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mi>H</mml:mi>
</mml:mfenced>
</mml:math>
</disp-formula>
<disp-formula id="EQ5">
<label>(5)</label>
<mml:math id="M33">
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mi>K</mml:mi>
</mml:msup>
<mml:mo>=</mml:mo>
<mml:mi mathvariant="italic">xaviernormal</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mi>H</mml:mi>
</mml:mfenced>
</mml:math>
</disp-formula>
<disp-formula id="EQ6">
<label>(6)</label>
<mml:math id="M34">
<mml:mo stretchy="true">{</mml:mo>
<mml:mtable columnalign="left">
<mml:mtr>
<mml:mtd>
<mml:mi>Q</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi>R</mml:mi>
<mml:mi>W</mml:mi>
<mml:mi>Q</mml:mi>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mi>K</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi>R</mml:mi>
<mml:mi>W</mml:mi>
<mml:mi>H</mml:mi>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mi>W</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi mathvariant="italic">soft</mml:mi>
<mml:mo>max</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mfrac>
<mml:mrow>
<mml:mi>Q</mml:mi>
<mml:mi>K</mml:mi>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:msqrt>
<mml:mi>d</mml:mi>
</mml:msqrt>
</mml:mfrac>
</mml:mfenced>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math id="M35">
<mml:mi>Q</mml:mi>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math id="M36">
<mml:mi>K</mml:mi>
</mml:math>
</inline-formula> denote the query and key hiding matrices, respectively, and the magnitude of their values are computed by two learnable parameter matrices <inline-formula>
<mml:math id="M37">
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mi>Q</mml:mi>
</mml:msup>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math id="M38">
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mi>K</mml:mi>
</mml:msup>
</mml:math>
</inline-formula>, respectively, whose initial values are obtained by xavier initialization of the input <inline-formula>
<mml:math id="M39">
<mml:mi>H</mml:mi>
</mml:math>
</inline-formula> (<xref ref-type="bibr" rid="ref44">44</xref>); <inline-formula>
<mml:math id="M40">
<mml:mi>d</mml:mi>
</mml:math>
</inline-formula> is the size of the dimensions of the two matrices <inline-formula>
<mml:math id="M41">
<mml:mi>Q</mml:mi>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math id="M42">
<mml:mi>K</mml:mi>
</mml:math>
</inline-formula>. The final output adjacency weight square matrix <inline-formula>
<mml:math id="M43">
<mml:mi>W</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>&#x211D;</mml:mi>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#x00D7;</mml:mo>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula> will be used with the input multidimensional time series <inline-formula>
<mml:math id="M44">
<mml:mi>X</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>&#x211D;</mml:mi>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#x00D7;</mml:mo>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula>, which forms the final graph structure <inline-formula>
<mml:math id="M45">
<mml:mi>G</mml:mi>
<mml:mo>=</mml:mo>
<mml:mfenced open="{" close="}" separators=",">
<mml:mi>X</mml:mi>
<mml:mi>W</mml:mi>
</mml:mfenced>
</mml:math>
</inline-formula>.</p>
</sec>
</sec>
<sec id="sec10">
<label>2.2</label>
<title>GRGNN layer</title>
<p>The GRGNN layer consists of multiple GRGNN modules stacked in a shortcut connection manner, and the data will be captured and extracted features in the GRGNN modules from the three dimensions of the spectral domain, the frequency domain, and the time domain, respectively. The specific structure of the GRGNN block module, as shown in <xref ref-type="fig" rid="fig2">Figure 2</xref>. The features in data will be captured and extracted in three domains of the spectral domain, the frequency domain, and the time domain respectively, in the GRGNN modules. The following is a description of each part of GRGNN block and its functions:</p>
<fig position="float" id="fig2">
<label>Figure 2</label>
<caption>
<p>The overall structure of GRGNN module.</p>
</caption>
<graphic xlink:href="fpubh-12-1397260-g002.tif"/>
</fig>
<p>Spectral domain graph convolution is a method that has been widely used in time series forecasting problems. The method has been widely used in time series forecasting problems due to its excellent results in learning potential representations of multiple time series in the spectral domain. The key to the method is the application of the Graph Fourier Transform (GFT) to capture the relationships between time series in the spectral domain. Its output is also a multivariate time series, and the GFT does not explicitly learn the relationship between the data at each time node within a given time series. Therefore, it is necessary to introduce the Discrete Fourier Transform (DFT) to learn the characterization of the input time series in the frequency domain, for example, to capture repetitive features in periodic data.</p>
<sec id="sec11">
<label>2.2.1</label>
<title>Frequency domain convolution part</title>
<p>The function of the frequency domain convolution part aims to transfer each individual time series into the frequency domain representation after processing it by DFT, and to learn its features by 1DConv layer in the frequency domain. It consists of four sub-parts in order: discrete Fourier transform (DFT), one-dimensional convolution (1DConv), gated linear unit (GLU), and inverse discrete Fourier transform (IDFT), where DFT and IDFT are used to transform the time series data between time and frequency domains, and 1DConv and GLU are used to learn the features of the time series in the frequency domain. The DFT processing of time sequence usually results in a complex sequence, and the frequency domain convolution is performed on the real part (<inline-formula>
<mml:math id="M46">
<mml:msubsup>
<mml:mover>
<mml:mi>X</mml:mi>
<mml:mo>&#x0302;</mml:mo>
</mml:mover>
<mml:mi>u</mml:mi>
<mml:mi>r</mml:mi>
</mml:msubsup>
</mml:math>
</inline-formula>) and imaginary part (<inline-formula>
<mml:math id="M47">
<mml:msubsup>
<mml:mover>
<mml:mi>X</mml:mi>
<mml:mo>&#x0302;</mml:mo>
</mml:mover>
<mml:mi>u</mml:mi>
<mml:mi>i</mml:mi>
</mml:msubsup>
</mml:math>
</inline-formula>) respectively, and the processing can be expressed by <xref ref-type="disp-formula" rid="EQ7">Equation 7</xref> as:</p>
<disp-formula id="EQ7">
<label>(7)</label>
<mml:math id="M48">
<mml:mtable columnalign="left">
<mml:mtr>
<mml:mtd>
<mml:msup>
<mml:mi>M</mml:mi>
<mml:mo>&#x2217;</mml:mo>
</mml:msup>
<mml:mfenced open="(" close=")">
<mml:msubsup>
<mml:mover>
<mml:mi mathvariant="normal">X</mml:mi>
<mml:mo>&#x0302;</mml:mo>
</mml:mover>
<mml:mi>u</mml:mi>
<mml:mo>&#x2217;</mml:mo>
</mml:msubsup>
</mml:mfenced>
<mml:mo>=</mml:mo>
<mml:mi>G</mml:mi>
<mml:mi>L</mml:mi>
<mml:mi>U</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mi>&#x03B8;</mml:mi>
<mml:mi>&#x03C4;</mml:mi>
<mml:mo>&#x2217;</mml:mo>
</mml:msubsup>
<mml:mfenced open="(" close=")">
<mml:msubsup>
<mml:mover>
<mml:mi>X</mml:mi>
<mml:mo>&#x0302;</mml:mo>
</mml:mover>
<mml:mi>&#x03C4;</mml:mi>
<mml:mo>&#x2217;</mml:mo>
</mml:msubsup>
</mml:mfenced>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi>&#x03B8;</mml:mi>
<mml:mi>&#x03C4;</mml:mi>
<mml:mo>&#x2217;</mml:mo>
</mml:msubsup>
<mml:mfenced open="(" close=")">
<mml:msubsup>
<mml:mover>
<mml:mi>X</mml:mi>
<mml:mo>&#x0302;</mml:mo>
</mml:mover>
<mml:mi>u</mml:mi>
<mml:mo>&#x2217;</mml:mo>
</mml:msubsup>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mspace width="6.25em"/>
<mml:mo>=</mml:mo>
<mml:msubsup>
<mml:mi>&#x03B8;</mml:mi>
<mml:mi>&#x03C4;</mml:mi>
<mml:mo>&#x2217;</mml:mo>
</mml:msubsup>
<mml:mfenced open="(" close=")">
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mi>u</mml:mi>
<mml:mo>&#x2217;</mml:mo>
</mml:msubsup>
</mml:mfenced>
<mml:mo>&#x2299;</mml:mo>
<mml:msup>
<mml:mi>&#x03C3;</mml:mi>
<mml:mo>&#x2217;</mml:mo>
</mml:msup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mi>&#x03B8;</mml:mi>
<mml:mi>&#x03C4;</mml:mi>
<mml:mo>&#x2217;</mml:mo>
</mml:msubsup>
<mml:mfenced open="(" close=")">
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mi>u</mml:mi>
<mml:mo>&#x2217;</mml:mo>
</mml:msubsup>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
<mml:mo>&#x2217;</mml:mo>
<mml:mo>&#x2208;</mml:mo>
<mml:mfenced open="{" close="}" separators=",">
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
</mml:mfenced>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:math>
</disp-formula>
<p>Where <inline-formula>
<mml:math id="M49">
<mml:msubsup>
<mml:mi>&#x03B8;</mml:mi>
<mml:mi>&#x03C4;</mml:mi>
<mml:mo>&#x2217;</mml:mo>
</mml:msubsup>
</mml:math>
</inline-formula> denotes the size of the convolution kernel for 1D convolution, <inline-formula>
<mml:math id="M50">
<mml:mo>&#x2299;</mml:mo>
</mml:math>
</inline-formula> denotes the Hadamard product operation, and <inline-formula>
<mml:math id="M51">
<mml:msup>
<mml:mi>&#x03C3;</mml:mi>
<mml:mo>&#x2217;</mml:mo>
</mml:msup>
</mml:math>
</inline-formula> denotes the <inline-formula>
<mml:math id="M52">
<mml:mi mathvariant="italic">sigmoid</mml:mi>
</mml:math>
</inline-formula> activation function. The final result <inline-formula>
<mml:math id="M53">
<mml:msup>
<mml:mi>M</mml:mi>
<mml:mi>r</mml:mi>
</mml:msup>
<mml:mfenced open="(" close=")">
<mml:msubsup>
<mml:mover>
<mml:mi>x</mml:mi>
<mml:mo>&#x0302;</mml:mo>
</mml:mover>
<mml:mi>u</mml:mi>
<mml:mi>r</mml:mi>
</mml:msubsup>
</mml:mfenced>
<mml:mo>+</mml:mo>
<mml:mi>i</mml:mi>
<mml:msup>
<mml:mi>M</mml:mi>
<mml:mi>i</mml:mi>
</mml:msup>
<mml:mfenced open="(" close=")">
<mml:msubsup>
<mml:mover>
<mml:mi>x</mml:mi>
<mml:mo>&#x0302;</mml:mo>
</mml:mover>
<mml:mi>u</mml:mi>
<mml:mi>i</mml:mi>
</mml:msubsup>
</mml:mfenced>
</mml:math>
</inline-formula> is converted back to the time domain after IDFT processing to participate in the subsequent part of the processing.</p>
</sec>
<sec id="sec12">
<label>2.2.2</label>
<title>Spectral domain graph convolution part</title>
<p>Graph Convolution (<xref ref-type="bibr" rid="ref29">29</xref>) consists of three parts.</p>
<p>First, Transformation of multivariate time series inputs to the spectral domain via GFT. Second, performing a graph convolution operation on the spectral domain graph structure using a graph convolution operator with a convolution kernel to learn. Third, performing the inverse graph Fourier transform (IGFT) on the spectral domain convolution result to generate the final output.</p>
<p>The graph Fourier transform (GFT) (<xref ref-type="bibr" rid="ref22">22</xref>) is the basic operator for the convolution of spectral domain graphs. It projects the input graph into a standard orthogonal space where the basis is constructed from the eigenvectors of the normalized graph Laplacian. The normalized graph Laplacian matrix (<xref ref-type="bibr" rid="ref15">15</xref>) can be computed as follows: <inline-formula>
<mml:math id="M54">
<mml:mi>L</mml:mi>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>I</mml:mi>
<mml:mi>N</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msup>
<mml:mi>D</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mn>2</mml:mn>
</mml:mfrac>
</mml:mrow>
</mml:msup>
<mml:mi>W</mml:mi>
<mml:msup>
<mml:mi>D</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mn>2</mml:mn>
</mml:mfrac>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>I</mml:mi>
<mml:mi>N</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>&#x211D;</mml:mi>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#x00D7;</mml:mo>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula> where <inline-formula>
<mml:math id="M55">
<mml:msub>
<mml:mi>I</mml:mi>
<mml:mi>N</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>&#x211D;</mml:mi>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#x00D7;</mml:mo>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula> is the unit matrix and <inline-formula>
<mml:math id="M56">
<mml:mi>D</mml:mi>
</mml:math>
</inline-formula> is the degree matrix with diagonal element <inline-formula>
<mml:math id="M57">
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:munder>
<mml:mstyle displaystyle="true">
<mml:mo stretchy="true">&#x2211;</mml:mo>
</mml:mstyle>
<mml:mi>j</mml:mi>
</mml:munder>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:math>
</inline-formula>. Then, the eigenvalue decomposition of the Laplace matrix is performed to obtain <inline-formula>
<mml:math id="M58">
<mml:mi>L</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi>U</mml:mi>
<mml:mi>&#x039B;</mml:mi>
<mml:msup>
<mml:mi>U</mml:mi>
<mml:mi>T</mml:mi>
</mml:msup>
</mml:math>
</inline-formula>, where <inline-formula>
<mml:math id="M59">
<mml:mi>U</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>&#x211D;</mml:mi>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#x00D7;</mml:mo>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula> is the matrix of eigenvectors and <inline-formula>
<mml:math id="M60">
<mml:mi>&#x039B;</mml:mi>
</mml:math>
</inline-formula> is the diagonal matrix of eigenvalues. After, the GFT, time series will be transformed into complex numbers, for example, three datasets after DFT are shown in <xref ref-type="fig" rid="fig3">Figure 3</xref>. For a detailed introduction to the dataset, see section 2.4.1. Given a multivariate time series <inline-formula>
<mml:math id="M61">
<mml:mi>X</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>&#x211D;</mml:mi>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#x00D7;</mml:mo>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula>, the GFT and IGFT operators and specific operations are, respectively, denoted as <inline-formula>
<mml:math id="M62">
<mml:mi mathvariant="script">G</mml:mi>
<mml:mi mathvariant="script">F</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mi>X</mml:mi>
</mml:mfenced>
<mml:mo>=</mml:mo>
<mml:msup>
<mml:mi>U</mml:mi>
<mml:mi>T</mml:mi>
</mml:msup>
<mml:mi>X</mml:mi>
<mml:mo>=</mml:mo>
<mml:mover>
<mml:mi>X</mml:mi>
<mml:mo>&#x0302;</mml:mo>
</mml:mover>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math id="M63">
<mml:mi mathvariant="script">G</mml:mi>
<mml:msup>
<mml:mi mathvariant="script">F</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mfenced open="(" close=")">
<mml:mover>
<mml:mi>X</mml:mi>
<mml:mo>&#x0302;</mml:mo>
</mml:mover>
</mml:mfenced>
<mml:mo>=</mml:mo>
<mml:mi>U</mml:mi>
<mml:mover>
<mml:mi>X</mml:mi>
<mml:mo>&#x0302;</mml:mo>
</mml:mover>
</mml:math>
</inline-formula>. The graph convolution operator is realized as a function <inline-formula>
<mml:math id="M64">
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>&#x0398;</mml:mi>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mi>&#x039B;</mml:mi>
</mml:mfenced>
</mml:math>
</inline-formula> of the eigenvalue matrix <inline-formula>
<mml:math id="M65">
<mml:mi>&#x039B;</mml:mi>
</mml:math>
</inline-formula>, where <inline-formula>
<mml:math id="M66">
<mml:mi>&#x0398;</mml:mi>
</mml:math>
</inline-formula> is the convolution kernel parameter.</p>
<fig position="float" id="fig3">
<label>Figure 3</label>
<caption>
<p>The overview plot of time series after discrete Fourier transform. <bold>(A1)</bold> The overview plot of real parts in time series for African dataset after discrete Fourier transform. <bold>(A2)</bold> The overview plot of image parts in time series for African dataset after discrete Fourier transform. <bold>(B1)</bold> The overview plot of real parts in time series for European dataset after discrete Fourier transform. <bold>(B2)</bold> The overview plot of image parts in time series for European dataset after discrete Fourier transform. <bold>(C1)</bold> The overview plot of real parts in time series for Hungarian dataset after discrete Fourier transform. <bold>(C2)</bold> The overview plot of image parts in time series for Hungarian dataset after discrete Fourier transform.</p>
</caption>
<graphic xlink:href="fpubh-12-1397260-g003.tif"/>
</fig>
</sec>
<sec id="sec13">
<label>2.2.3</label>
<title>Time domain GRU (gated recurrent units) layer</title>
<p>Recurrent Neural Networks (RNN) are a type of neural networks with an inner recurrent loop structure (<xref ref-type="bibr" rid="ref23">23</xref>). The reformed GRGNN with its introduction and GRGNN&#x2019;s application on the epidemic field is an important innovation in this study. GRU (Gated Recurrent Unit) processes sequences by traversing the sequence elements and generating a hidden state that contains pattern information related to the historical data, which contains the before-and-after relationships of the sequences. GRUs (Gated Recurrent Units) (<xref ref-type="bibr" rid="ref23">23</xref>) are a type of recurrent neural networks in which each loop unit adaptively captures dependencies at different time scales. Similar to LSTM (Long Short-Term Memory) units, GRUs (Gated Recurrent Units) have a gating unit that regulates the information within the unit, but do not have a separate storage unit like LSTM (Long Short-Term Memory).</p>
<disp-formula id="EQ8">
<label>(8)</label>
<mml:math id="M67">
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mi>&#x03C3;</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>z</mml:mi>
</mml:msub>
<mml:mspace width="0.25em"/>
<mml:mo>&#x00B7;</mml:mo>
<mml:mspace width="0.25em"/>
<mml:mfenced open="[" close="]" separators=",">
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:math>
</disp-formula>
<disp-formula id="EQ9">
<label>(9)</label>
<mml:math id="M68">
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mi>&#x03C3;</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>r</mml:mi>
</mml:msub>
<mml:mspace width="0.25em"/>
<mml:mo>&#x00B7;</mml:mo>
<mml:mspace width="0.5em"/>
<mml:mfenced open="[" close="]" separators=",">
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:math>
</disp-formula>
<disp-formula id="EQ10">
<label>(10)</label>
<mml:math id="M69">
<mml:msub>
<mml:mover accent="true">
<mml:mi>h</mml:mi>
<mml:mo stretchy="true">&#x02DC;</mml:mo>
</mml:mover>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mo>tanh</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>W</mml:mi>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mspace width="0.5em"/>
<mml:mo>&#x00B7;</mml:mo>
<mml:mspace width="0.5em"/>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:math>
</disp-formula>
<disp-formula id="EQ11">
<label>(11)</label>
<mml:math id="M70">
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mspace width="0.5em"/>
<mml:mo>&#x00B7;</mml:mo>
<mml:mspace width="0.5em"/>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x00B7;</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi>h</mml:mi>
<mml:mo stretchy="true">&#x02DC;</mml:mo>
</mml:mover>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:math>
</disp-formula>
<p>The specific mathematical description of GRU (Gated Recurrent Unit) is shown in <xref ref-type="disp-formula" rid="EQ8">Equation 8</xref><xref ref-type="disp-formula" rid="EQ9"/><xref ref-type="disp-formula" rid="EQ10">&#x2013;</xref><xref ref-type="disp-formula" rid="EQ11">11</xref>, there are only two gate units in GRU (Gated Recurrent Unit), one is reset gate and the other is update gate, and the role of reset gate is similar to that of input gate and forgetting gate in LSTM (Long Short-Term Memory), <inline-formula>
<mml:math id="M71">
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula> is equivalent to the input gate, and <inline-formula>
<mml:math id="M72">
<mml:mi>z</mml:mi>
</mml:math>
</inline-formula> is equivalent to the forgetting gate. The GRU (Gated Recurrent Unit) method uses fewer threshold units to accomplish a similar task as the LSTM (Long Short-Term Memory) method, so the GRU (Gated Recurrent Unit) method is usually considered when there is a lack of computational power or a desire to improve the training speed and efficiency of neural network learning. The GRU (Gated Recurrent Unit) method uses fewer gate units than the LSTM (Long Short-Term Memory) method and accomplishes a similar task.</p>
</sec>
</sec>
<sec id="sec14">
<label>2.3</label>
<title>Implementation and parameter design</title>
<p>The GRGNN method was developed using the Python language based on Pytorch and MATLAB language, the experiments of GRGNN were performed on a deep-learning server with NVIDIA Quadro GV100L GPU &#x002A;1, Intel Xeon Gold 6,138 CPU &#x002A;1 and DDR4 32G RAM &#x002A;8, the operation system of Ubuntu 18.04.6 LTS. The baseline methods were all implemented using MATLAB language. on clearance version.</p>
<p>Hyperparameters such as input length, learning rate, batch size, training time and number of hidden units needed to be set in the GRGNN. Empirically, normalization method was set to z-score, input length to 15, learning rate to 4.7e-4, batch size to 15 and training epoch to 150 and the number of layers to 7. Additionally, the ADAM optimizer was used in the training process.</p>
</sec>
<sec id="sec15">
<label>2.4</label>
<title>Dataset, baseline methods and evaluation indicators</title>
<sec id="sec16">
<label>2.4.1</label>
<title>Datasets</title>
<p>In this study, the prediction effect of GRGNN was tested using the 42 European countries&#x2019; COVID-19 dataset, the 38 African countries&#x2019; COVID-19 dataset and the 20 Hungarian regions&#x2019; chickenpox dataset, the overview plots of the datasets are shown in <xref ref-type="fig" rid="fig4">Figure 4</xref> both COVID-19 datasets in this study were collected from publicly available data provided by the Worldometers website (<xref ref-type="bibr" rid="ref45">45</xref>). Worldometer is run by an international team of developers, researchers, and volunteers with the goal of making world statistics available in a thought-provoking and time relevant format to a wide audience around the world Government&#x2019;s communication channels which makes the data from it more reliable and realistic. The 42 European countries&#x2019; COVID-19 dataset contains 42 time series, and the length of each time series in the dataset is 776. The 38 African countries&#x2019; COVID-19 dataset contains 38 time series, and the length of each time series in the dataset is 776. The 20 Hungarian regions&#x2019; chickenpox dataset contains 20 time series, and the length of each time series in the dataset is 523. Two COVID-19 datasets analyzed during the current study are available in the [Worldometers] repository.<xref ref-type="fn" rid="fn0001"><sup>1</sup></xref> The daily active case count data of each country were collected for a total of 776&#x2009;days from February 15, 2020 to April 1, 2022, and the data were cleaned to exclude from the data that existed for more than 20&#x2009;days without updating the data, and the data that had a negative number of active cases or other statistical errors, finally we classify the data that met the above requirements to obtain the continental active case dataset. The 20 Hungarian regions&#x2019; chickenpox dataset was chosen to collect weekly chickenpox diagnosis data from 20 regions in Hungary for 523&#x2009;weeks from January 3, 2005 to December 29, 2014. The 20 Hungarian regions&#x2019; chickenpox dataset are available,<xref ref-type="fn" rid="fn0002">
<sup>2</sup></xref> the dataset was downloaded from Kaggle (<xref ref-type="bibr" rid="ref46">46</xref>), a website that focuses on providing developers and data scientists with a platform to hold machine learning competitions, host databases, and write and share code. The Hungarian chickenpox dataset, as a typical multivariate time series prediction problem dataset was consisted by the time series collected from the Hungarian Epidemiological Info, a weekly bulletin of morbidity and mortality of infectious disease in Hungary. This dataset was tested on the Kaggle platform with many time series prediction methods and data visualization methods.</p>
<fig position="float" id="fig4">
<label>Figure 4</label>
<caption>
<p>The overview plot of the datasets. <bold>(A)</bold> The overview plot of ARMSE of the 38 African countries&#x2019; COVID-19 dataset. <bold>(B)</bold> The overview plot of the 42 European countries&#x2019; COVID-19 dataset. <bold>(C)</bold> The overview plot of 20 Hungarian regions&#x2019; chickenpox dataset.</p>
</caption>
<graphic xlink:href="fpubh-12-1397260-g004.tif"/>
</fig>
</sec>
<sec id="sec17">
<label>2.4.2</label>
<title>Baseline methods</title>
<p>Three widely used neural network architectures; LSTM (Long Short-Term Memory), GRU (Gated Recurrent Unit), CNN-LSTM and a statistical method, were chosen as the control group in this study, the statistical methods include, weighted moving average method(WMA) (<xref ref-type="bibr" rid="ref47">47</xref>), Gaussian function method (<xref ref-type="bibr" rid="ref48">48</xref>) and polynomial functions method (<xref ref-type="bibr" rid="ref48">48</xref>):</p>
<p>The following 7 baseline methods were used to compare the performance with the GRGNN:</p>
<p>ARIMA (<xref ref-type="bibr" rid="ref15">15</xref>): ARIMA (Autoregressive Integrated Moving Average Model) is a widely applied time series forecasting method, extensively used across various fields. This paper adopts it as a classical statistical prediction method to compare with machine learning approaches for forecasting COVID-19 data in Africa. Its specific definition is given in <xref ref-type="disp-formula" rid="EQ12">Equation 12</xref>.</p>
<disp-formula id="EQ12">
<label>(12)</label>
<mml:math id="M73">
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:munderover>
<mml:mstyle displaystyle="true">
<mml:mo stretchy="true">&#x2211;</mml:mo>
</mml:mstyle>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>p</mml:mi>
</mml:munderover>
<mml:msub>
<mml:mi>&#x03C6;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:msup>
<mml:mi>L</mml:mi>
<mml:mi>i</mml:mi>
</mml:msup>
</mml:mrow>
</mml:mfenced>
<mml:msup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>L</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mi>d</mml:mi>
</mml:msup>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>+</mml:mo>
<mml:munderover>
<mml:mstyle displaystyle="true">
<mml:mo stretchy="true">&#x2211;</mml:mo>
</mml:mstyle>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>q</mml:mi>
</mml:munderover>
<mml:msub>
<mml:mi>&#x03B8;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:msup>
<mml:mi>L</mml:mi>
<mml:mi>i</mml:mi>
</mml:msup>
</mml:mrow>
</mml:mfenced>
<mml:msub>
<mml:mi>&#x03B5;</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:math>
</disp-formula>
<p>Herein, <inline-formula>
<mml:math id="M74">
<mml:mi>L</mml:mi>
</mml:math>
</inline-formula> represents the lag operator, with <inline-formula>
<mml:math id="M75">
<mml:mi>d</mml:mi>
<mml:mo>&#x003E;</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>d</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>Z</mml:mi>
</mml:math>
</inline-formula>. The main steps of this method are as follows:</p>
<p>The prediction will finish in 4 steps: step 1, Time series preprocessing. The primary purpose here is to make the input to the ARIMA model a stationary time series. If the data series is non-stationary and exhibits certain growth or decline trends, it is necessary to differentiate the data. Step 2, Establishing the model based on identification rules for time series models. If the partial autocorrelation function of the stationary series is truncated while the autocorrelation function is tailed, the series is suitable for an AR model; if the partial autocorrelation function is tailed while the autocorrelation function is truncated, the series is suitable for an MA model; if both the partial autocorrelation and autocorrelation functions are tailed, the series fits an ARIMA model. Step 3, Determining the order of AR and MA. Utilize the Akaike Information Criterion (AIC) and Bayesian Information Criterion (BIC) to determine the orders <inline-formula>
<mml:math id="M76">
<mml:mi>p</mml:mi>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math id="M77">
<mml:mi>q</mml:mi>
</mml:math>
</inline-formula> of AR and MA, respectively. Step 4, ARIMA fitting and forecasting. Fit the ARIMA model, then use the fitted results to forecast the test set. It&#x2019;s worth mentioning that these results are after one differentiation, and the forecasted values need to be restored through inverse differentiation.</p>
<p>weighted moving average method (WMA) (<xref ref-type="bibr" rid="ref47">47</xref>): the weighted moving average (WMA) method is a time series analysis technique that assigns different weights to historical observations based on their relative importance. Unlike the simple moving average (SMA) method, which assigns equal weight to all observations, the WMA method seeks to accentuate the impact of more recent data and reduce the impact of older data points. The WMA method calculates the weighted average of a sequence of observations, with the most recent values carrying the highest weightings. The weightings assigned to each observation are typically determined by a predefined set of coefficients or by subjective judgment based on the characteristics of the data being analyzed. The WMA method is frequently used in financial market analysis to identify trends and forecast future prices. The specific definition of WMA is given in <xref ref-type="disp-formula" rid="EQ13">Equation 13</xref>.</p>
<disp-formula id="EQ13">
<label>(13)</label>
<mml:math id="M78">
<mml:msub>
<mml:mover>
<mml:mi>X</mml:mi>
<mml:mo>&#x0302;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>&#x03C9;</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>&#x03C9;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:mo>&#x22EF;</mml:mo>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>&#x03C9;</mml:mi>
<mml:mi>N</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>N</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:math>
</disp-formula>
<p>Where <inline-formula>
<mml:math id="M79">
<mml:msub>
<mml:mover>
<mml:mi>X</mml:mi>
<mml:mo>&#x0302;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:math>
</inline-formula> denotes the prediction for the time point <inline-formula>
<mml:math id="M80">
<mml:mi>t</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
</mml:math>
</inline-formula>,<inline-formula>
<mml:math id="M81">
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mo>&#x2217;</mml:mo>
</mml:msub>
</mml:math>
</inline-formula> stands for the observation value, and <inline-formula>
<mml:math id="M82">
<mml:msub>
<mml:mi>&#x03C9;</mml:mi>
<mml:mo>&#x2217;</mml:mo>
</mml:msub>
</mml:math>
</inline-formula> stands for the weight of <inline-formula>
<mml:math id="M83">
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mo>&#x2217;</mml:mo>
</mml:msub>
</mml:math>
</inline-formula>.</p>
<p>Gaussian function fitting method (<xref ref-type="bibr" rid="ref48">48</xref>): one of the most popular curve fitting algorithms for fitting the time series with a n-order Gaussian function <inline-formula>
<mml:math id="M84">
<mml:mi>G</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mi>x</mml:mi>
</mml:mfenced>
</mml:math>
</inline-formula>, which has been widely applied in prediction. The specific definition of Gaussian function fitting method is given in <xref ref-type="disp-formula" rid="EQ14">Equation 14</xref>. In this research we applied 3-order Gaussian function to fitting each time series.</p>
<disp-formula id="EQ14">
<label>(14)</label>
<mml:math id="M85">
<mml:mi>G</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mi>x</mml:mi>
</mml:mfenced>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#x00B7;</mml:mo>
<mml:msup>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:msup>
<mml:mfenced open="(" close=")">
<mml:mfrac>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mfrac>
</mml:mfenced>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:msup>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>&#x00B7;</mml:mo>
<mml:msup>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:msup>
<mml:mfenced open="(" close=")">
<mml:mfrac>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mfrac>
</mml:mfenced>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:msup>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mn>3</mml:mn>
</mml:msub>
<mml:mo>&#x00B7;</mml:mo>
<mml:msup>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:msup>
<mml:mfenced open="(" close=")">
<mml:mfrac>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mn>3</mml:mn>
</mml:msub>
</mml:mrow>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mn>3</mml:mn>
</mml:msub>
</mml:mfrac>
</mml:mfenced>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:msup>
</mml:math>
</disp-formula>
<p>Polynomial function fitting method (<xref ref-type="bibr" rid="ref48">48</xref>): one of the most popular curve fitting algorithms for fitting the time series with a n-order polynomial function, which has been widely applied in prediction. The specific definition of polynomial function fitting method is given in <xref ref-type="disp-formula" rid="EQ15">Equation 15</xref>. in this research we applied 5-order polynomial function <inline-formula>
<mml:math id="M86">
<mml:mi>G</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mi>x</mml:mi>
</mml:mfenced>
</mml:math>
</inline-formula> to fitting each time series.</p>
<disp-formula id="EQ15">
<label>(15)</label>
<mml:math id="M87">
<mml:mi>G</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mi>x</mml:mi>
</mml:mfenced>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#x00B7;</mml:mo>
<mml:msup>
<mml:mi>x</mml:mi>
<mml:mn>5</mml:mn>
</mml:msup>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>&#x00B7;</mml:mo>
<mml:msup>
<mml:mi>x</mml:mi>
<mml:mn>4</mml:mn>
</mml:msup>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mn>3</mml:mn>
</mml:msub>
<mml:mo>&#x00B7;</mml:mo>
<mml:msup>
<mml:mi>x</mml:mi>
<mml:mn>3</mml:mn>
</mml:msup>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mn>4</mml:mn>
</mml:msub>
<mml:mo>&#x00B7;</mml:mo>
<mml:msup>
<mml:mi>x</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mn>5</mml:mn>
</mml:msub>
<mml:mo>&#x00B7;</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mn>6</mml:mn>
</mml:msub>
</mml:math>
</disp-formula>
<p>LSTM (Long Short-Term Memory): Long Short-Term Memory networks were first introduced by Hochreiter in 1997 (<xref ref-type="bibr" rid="ref22">22</xref>). They are a specific form of RNN (Recurrent Neural Network), which is a general term for a series of neural networks that can process sequential data.</p>
<p>Generally, RNNs possess three characteristics: first, they can generate an output at each time step, with connections between hidden units being cyclic; second, they produce an output at each time step, where the output at a given time step is only cyclically connected to the hidden unit of the next time step; third, RNNs contain hidden units with cyclic connections and can process sequential data to produce a single prediction.</p>
<p>LSTM (Long Short-Term Memory) is such a gated RNN. The ingenuity of LSTM (Long Short-Term Memory) lies in the addition of input, forget, and output gates, allowing the self-recurrent weights to vary. Thus, the integration scale at different moments can dynamically change even when the model parameters are fixed, thereby avoiding problems of gradient vanishing or exploding.</p>
<p>Each LSTM (Long Short-Term Memory) unit is composed of a memory cell and three gating units: the input gate, the output gate, and the forget gate. Within this architecture, LSTM (Long Short-Term Memory) attempts to create a controlled flow of information by deciding what information to &#x201C;forget&#x201D; and what to &#x201C;remember,&#x201D; thereby learning long-term dependencies.</p>
<disp-formula id="EQ16">
<label>(16)</label>
<mml:math id="M88">
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mi>&#x03C3;</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>z</mml:mi>
</mml:msub>
<mml:mo>&#x00B7;</mml:mo>
<mml:mfenced open="[" close="]" separators=",">
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:math>
</disp-formula>
<disp-formula id="EQ17">
<label>(17)</label>
<mml:math id="M89">
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mi>&#x03C3;</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mi>U</mml:mi>
<mml:mi>g</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>g</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>g</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:math>
</disp-formula>
<disp-formula id="EQ18">
<label>(18)</label>
<mml:math id="M90">
<mml:msub>
<mml:mover accent="true">
<mml:mi>c</mml:mi>
<mml:mo stretchy="true">&#x02DC;</mml:mo>
</mml:mover>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mo>tanh</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mi>U</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:math>
</disp-formula>
<disp-formula id="EQ19">
<label>(19)</label>
<mml:math id="M91">
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2299;</mml:mo>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>i</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2299;</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi>c</mml:mi>
<mml:mo stretchy="true">&#x02DC;</mml:mo>
</mml:mover>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:math>
</disp-formula>
<disp-formula id="EQ20">
<label>(20)</label>
<mml:math id="M92">
<mml:msub>
<mml:mi>o</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mi>&#x03C3;</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mi>U</mml:mi>
<mml:mi>o</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>o</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>o</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:math>
</disp-formula>
<disp-formula id="EQ21">
<label>(21)</label>
<mml:math id="M93">
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>o</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2299;</mml:mo>
<mml:mo>tanh</mml:mo>
<mml:mfenced open="(" close=")">
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mfenced>
</mml:math>
</disp-formula>
<p>More specifically, the input gate <inline-formula>
<mml:math id="M94">
<mml:msub>
<mml:mi>i</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> alongside the second gate <inline-formula>
<mml:math id="M95">
<mml:msub>
<mml:mover accent="true">
<mml:mi>c</mml:mi>
<mml:mo stretchy="true">&#x02DC;</mml:mo>
</mml:mover>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> control the new information stored in the memory state <inline-formula>
<mml:math id="M96">
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> at a certain time <inline-formula>
<mml:math id="M97">
<mml:mi>t</mml:mi>
</mml:math>
</inline-formula>. The forget gate <inline-formula>
<mml:math id="M98">
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> controls the disappearance or retention of information from time <inline-formula>
<mml:math id="M99">
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:math>
</inline-formula> in the storage unit, while the output gate <inline-formula>
<mml:math id="M100">
<mml:msub>
<mml:mi>o</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> controls which information can be outputted by the storage unit. <xref ref-type="disp-formula" rid="EQ16">Equation 16</xref><xref ref-type="disp-formula" rid="EQ17"/><xref ref-type="disp-formula" rid="EQ18"/><xref ref-type="disp-formula" rid="EQ19"/><xref ref-type="disp-formula" rid="EQ20">&#x2013;</xref><xref ref-type="disp-formula" rid="EQ21">21</xref> succinctly describe the operations performed by an LSTM (Long Short-Term Memory) unit.</p>
<p>Herein, <inline-formula>
<mml:math id="M101">
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> represents the input at a certain moment, <inline-formula>
<mml:math id="M102">
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mo>&#x2217;</mml:mo>
</mml:msub>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math id="M103">
<mml:msub>
<mml:mi>U</mml:mi>
<mml:mo>&#x2217;</mml:mo>
</mml:msub>
</mml:math>
</inline-formula> represent weight matrices, <inline-formula>
<mml:math id="M104">
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mo>&#x2217;</mml:mo>
</mml:msub>
</mml:math>
</inline-formula> denotes the bias vector, <inline-formula>
<mml:math id="M105">
<mml:mi>&#x03C3;</mml:mi>
</mml:math>
</inline-formula> is the sigmoid function, and the operator <inline-formula>
<mml:math id="M106">
<mml:mo>&#x2299;</mml:mo>
</mml:math>
</inline-formula> represents element-wise multiplication. Finally, the hidden state unit <inline-formula>
<mml:math id="M107">
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:math>
</inline-formula>, which forms part of the memory cell&#x2019;s output, is calculated as shown in <xref ref-type="disp-formula" rid="EQ21">Equation 21</xref>.</p>
<p>It is noteworthy that if multiple LSTM (Long Short-Term Memory) layers are stacked together, the memory state <inline-formula>
<mml:math id="M108">
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> and hidden state <inline-formula>
<mml:math id="M109">
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> of each LSTM (Long Short-Term Memory) layer will serve as inputs to the next LSTM (Long Short-Term Memory) layer.</p>
<p>In this paper, the main hyperparameters for the LSTM (Long Short-Term Memory) method are set as follows: the number of iterations is 150, the number of hidden units is 400, the initial learning rate is 0.001, and the optimizer used is ADAM.</p>
<p>GRU (Gated Recurrent Unit): The GRU (Gated Recurrent Unit) is also a type of recurrent neural network. Like LSTM (Long Short-Term Memory), it was developed to address issues related to long-term memory and gradients in backpropagation. Compared to LSTM (Long Short-Term Memory), using GRU (Gated Recurrent Unit) can achieve comparable results and is easier to train, significantly enhancing training efficiency. Therefore, GRU (Gated Recurrent Unit) is often preferred, especially in scenarios with limited computational power or when there is a need to conserve computational resources.</p>
<p>GRU (Gated Recurrent Unit) has only two gating units: a reset gate and an update gate, as shown in <xref ref-type="disp-formula" rid="EQ8">Equation 8</xref><xref ref-type="disp-formula" rid="EQ9"/><xref ref-type="disp-formula" rid="EQ10">&#x2013;</xref><xref ref-type="disp-formula" rid="EQ11">11</xref>, where <inline-formula>
<mml:math id="M110">
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> represents the input at a given time, <inline-formula>
<mml:math id="M111">
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mo>&#x2217;</mml:mo>
</mml:msub>
</mml:math>
</inline-formula> represents a weight matrix, <inline-formula>
<mml:math id="M112">
<mml:mi>&#x03C3;</mml:mi>
</mml:math>
</inline-formula> denotes the tanh function, <inline-formula>
<mml:math id="M113">
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> is the state of the update gate, and <inline-formula>
<mml:math id="M114">
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> is the reset gate. The function of the reset gate is similar to the input and forget gates in LSTM (Long Short-Term Memory), where <inline-formula>
<mml:math id="M115">
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> acts like the input gate, and <inline-formula>
<mml:math id="M116">
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> functions as the forget gate. Given that GRU (Gated Recurrent Unit) uses fewer gating units to accomplish tasks similar to those of LSTM (Long Short-Term Memory), GRU (Gated Recurrent Unit) is typically considered in situations where computational capacity is limited.</p>
<p>In this paper, the hyper parameters for the GRU (Gated Recurrent Unit) method are set as follows: the number of maximum training epoch is 150, the batch size is 12, the number of hidden units is 400, the initial learning rate is 0.001, and the optimizer used is ADAM.</p>
<p>CNN-LSTM: CNN-LSTM is an advanced neural network architecture that combines Convolutional Neural Networks (CNNs) and LSTMs (Long Short-Term Memory networks) to harness the strengths of both in processing sequential data. This hybrid model is particularly effective for tasks where the input data involves both spatial and temporal dimensions, making it popular in areas such as video analysis, natural language processing, and time series forecasting.</p>
<p>Crucially, to adapt the time series data for the CNN-LSTM architecture, we employ lag features transformation. This involves creating new datasets where each feature corresponds to the original data shifted by values within a specified lag range, effectively capturing temporal dependencies across multiple time steps. These transformed datasets are then organized into matrices, with each column representing a different lagged version of the data, making it suitable for sequential processing by the model.</p>
<p>For the LSTM (Long Short-Term Memory) component, it is the same like the LSTM (Long Short-Term Memory) methods we introduced above. And for the CNN component, the data is initially processed through a sequence folding layer, transforming the sequential input into a format amenable to convolutional operations. This step is pivotal for extracting spatial features from the lagged inputs, which are then unfolded and flattened to preserve the temporal sequence structure, allowing the subsequent LSTM (Long Short-Term Memory) layers to learn long-term dependencies from these extracted features effectively. By meticulously mapping our datasets through these preparatory stages, we ensure that the CNN-LSTM architecture leverages both spatial and temporal dimensions of the data, thereby enhancing the model&#x2019;s forecasting accuracy.</p>
<p>In this paper, the hyper parameters for the CNN-LSTM method are set as follows: the number of maximum training epoch is 150, the batch size is 12, the lag is 8, the number of hidden units [LSTM (Long Short-Term Memory) component] is 150, the initial learning rate is 0.001, and the optimizer used is ADAM.</p>
</sec>
<sec id="sec18">
<label>2.4.3</label>
<title>Evaluation indicators</title>
<p>Average RMSE and average MAE are used as evaluation metrics to measure the magnitude of error in the prediction results:</p>
<p>The average RMSE is calculated by sequentially calculating the RMSE for each of the <inline-formula>
<mml:math id="M117">
<mml:mi>N</mml:mi>
</mml:math>
</inline-formula> countries in the prediction result of the sequence prediction step <inline-formula>
<mml:math id="M118">
<mml:mi>H</mml:mi>
</mml:math>
</inline-formula>. The specific mathematical description is as following <xref ref-type="disp-formula" rid="EQ22">Equation 22</xref>:</p>
<disp-formula id="EQ22">
<label>(22)</label>
<mml:math id="M119">
<mml:mo stretchy="true">{</mml:mo>
<mml:mtable columnalign="left">
<mml:mtr>
<mml:mtd>
<mml:mi>R</mml:mi>
<mml:mi>M</mml:mi>
<mml:mi>S</mml:mi>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msqrt>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mstyle displaystyle="true">
<mml:mo stretchy="true">&#x2211;</mml:mo>
</mml:mstyle>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>H</mml:mi>
</mml:msubsup>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi mathvariant="italic">pred</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>o</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>s</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mi>H</mml:mi>
</mml:mfrac>
</mml:msqrt>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mi>R</mml:mi>
<mml:mi>M</mml:mi>
<mml:mi>S</mml:mi>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mstyle displaystyle="true">
<mml:mo stretchy="true">&#x2211;</mml:mo>
</mml:mstyle>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:msubsup>
<mml:mi>R</mml:mi>
<mml:mi>M</mml:mi>
<mml:mi>S</mml:mi>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:mfrac>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:math>
</disp-formula><p>The average MAE is calculated by sequentially calculating the MAE for each of the N countries in the prediction result of the sequence prediction step <inline-formula>
<mml:math id="M120">
<mml:mi>H</mml:mi>
</mml:math>
</inline-formula>, and then calculating the average value, which is mathematically described as following <xref ref-type="disp-formula" rid="EQ23">Equation 23</xref>:</p>
<disp-formula id="EQ23">
<label>(23)</label>
<mml:math id="M121">
<mml:mo stretchy="true">{</mml:mo>
<mml:mtable columnalign="left">
<mml:mtr>
<mml:mtd>
<mml:mi>M</mml:mi>
<mml:mi>A</mml:mi>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mstyle displaystyle="true">
<mml:mo stretchy="true">&#x2211;</mml:mo>
</mml:mstyle>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>H</mml:mi>
</mml:msubsup>
<mml:mo stretchy="true">|</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi mathvariant="italic">pred</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>o</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>s</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="true">|</mml:mo>
</mml:mrow>
<mml:mi>H</mml:mi>
</mml:mfrac>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mi>M</mml:mi>
<mml:mi>A</mml:mi>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mstyle displaystyle="true">
<mml:mo stretchy="true">&#x2211;</mml:mo>
</mml:mstyle>
<mml:mrow>
<mml:mi mathvariant="normal">i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:msubsup>
<mml:mi>M</mml:mi>
<mml:mi>A</mml:mi>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:mfrac>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:math>
</disp-formula>
</sec>
</sec>
</sec>
<sec sec-type="results" id="sec19">
<label>3</label>
<title>Results</title>
<p>Predictions were made using GRGNN, LSTM (Long Short-Term Memory), GRU (Gated Recurrent Unit), CNN-LSTM, and ARIMA for 42 countries in Europe, 38 countries in Africa, two continents&#x2019; COVID-19 active case datasets, and Hungary&#x2019;s 20 regions&#x2019; varicella datasets, respectively. The last 2&#x2009;weeks (14&#x2009;days), 3&#x2009;weeks (21&#x2009;days), 4&#x2009;weeks (28&#x2009;days), 5&#x2009;weeks (35&#x2009;days), and 6&#x2009;weeks (42&#x2009;days) data were taken as the test set in the prediction, and after dividing the test set, all the data prior to the test set data were divided into the training set and validation set in the ratio of 10:1.</p>
<p>The prediction results of each method for each dataset at different step sizes are shown in <xref ref-type="table" rid="tab1">Tables 1</xref>&#x2013;<xref ref-type="table" rid="tab5">5</xref>.</p>
<table-wrap position="float" id="tab1">
<label>Table 1</label>
<caption>
<p>Prediction results for each prediction method for each dataset for 2&#x2009;weeks (14&#x2009;days).</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th/>
<th align="center" valign="top" colspan="2">African dataset</th>
<th align="center" valign="top" colspan="2">European dataset</th>
<th align="center" valign="top" colspan="2">Hungarian dataset</th>
</tr>
<tr>
<th/>
<th align="center" valign="top">ARMSE</th>
<th align="center" valign="top">AMAE</th>
<th align="center" valign="top">ARMSE</th>
<th align="center" valign="top">AMAE</th>
<th align="center" valign="top">ARMSE</th>
<th align="center" valign="top">AMAE</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">GRGNN</td>
<td align="center" valign="middle">683.27</td>
<td align="center" valign="middle">621.38</td>
<td align="center" valign="middle">54568.57</td>
<td align="center" valign="middle">49345.78</td>
<td align="center" valign="middle">28.82</td>
<td align="center" valign="middle">23.64</td>
</tr>
<tr>
<td align="left" valign="middle">LSTM</td>
<td align="center" valign="middle">1288.20</td>
<td align="center" valign="middle">1071.58</td>
<td align="center" valign="middle">78093.59</td>
<td align="center" valign="middle">64940.05</td>
<td align="center" valign="middle">29.69</td>
<td align="center" valign="middle">24.57</td>
</tr>
<tr>
<td align="left" valign="middle">CNN-LSTM</td>
<td align="center" valign="middle">812.45</td>
<td align="center" valign="middle">790.14</td>
<td align="center" valign="middle">38421.52</td>
<td align="center" valign="middle">31634.68</td>
<td align="center" valign="middle">32.85</td>
<td align="center" valign="middle">26.29</td>
</tr>
<tr>
<td align="left" valign="middle">GRU</td>
<td align="center" valign="middle">1115.73</td>
<td align="center" valign="middle">907.52</td>
<td align="center" valign="middle">56406.04</td>
<td align="center" valign="middle">47197.14</td>
<td align="center" valign="middle">32.21</td>
<td align="center" valign="middle">27.66</td>
</tr>
<tr>
<td align="left" valign="middle">ARIMA</td>
<td align="center" valign="middle">783.04</td>
<td align="center" valign="middle">657.50</td>
<td align="center" valign="middle">40086.60</td>
<td align="center" valign="middle">42310.69</td>
<td align="center" valign="middle">29.61</td>
<td align="center" valign="middle">23.83</td>
</tr>
<tr>
<td align="left" valign="middle">Poly</td>
<td align="center" valign="middle">4620.15</td>
<td align="center" valign="middle">4480.89</td>
<td align="center" valign="middle">301141.17</td>
<td align="center" valign="middle">298245.73</td>
<td align="center" valign="middle">44.11</td>
<td align="center" valign="middle">36.52</td>
</tr>
<tr>
<td align="left" valign="middle">Gauss</td>
<td align="center" valign="middle">2289.51</td>
<td align="center" valign="middle">2214.87</td>
<td align="center" valign="middle">109168.62</td>
<td align="center" valign="middle">103422.19</td>
<td align="center" valign="middle">41.55</td>
<td align="center" valign="middle">34.48</td>
</tr>
<tr>
<td align="left" valign="middle">WMA</td>
<td align="center" valign="middle">820.68</td>
<td align="center" valign="middle">691.10</td>
<td align="center" valign="middle">70424.89</td>
<td align="center" valign="middle">62469.22</td>
<td align="center" valign="middle">35.13</td>
<td align="center" valign="middle">29.24</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap position="float" id="tab2">
<label>Table 2</label>
<caption>
<p>Prediction results for each prediction method for each dataset for 3&#x2009;weeks (21&#x2009;days).</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th/>
<th align="center" valign="top" colspan="2">African dataset</th>
<th align="center" valign="top" colspan="2">European dataset</th>
<th align="center" valign="top" colspan="2">Hungarian dataset</th>
</tr>
<tr>
<th/>
<th align="center" valign="top">ARMSE</th>
<th align="center" valign="top">AMAE</th>
<th align="center" valign="top">ARMSE</th>
<th align="center" valign="top">AMAE</th>
<th align="center" valign="top">ARMSE</th>
<th align="center" valign="top">AMAE</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">GRGNN</td>
<td align="center" valign="middle">836.26</td>
<td align="center" valign="middle">770.61</td>
<td align="center" valign="middle">75623.18</td>
<td align="center" valign="middle">83044.94</td>
<td align="center" valign="middle">31.47</td>
<td align="center" valign="middle">28.47</td>
</tr>
<tr>
<td align="left" valign="middle">LSTM</td>
<td align="center" valign="middle">1375.33</td>
<td align="center" valign="middle">1116.70</td>
<td align="center" valign="middle">113619.62</td>
<td align="center" valign="middle">135365.11</td>
<td align="center" valign="middle">33.62</td>
<td align="center" valign="middle">28.75</td>
</tr>
<tr>
<td align="left" valign="middle">CNN-LSTM</td>
<td align="center" valign="middle">915.06</td>
<td align="center" valign="middle">892.35</td>
<td align="center" valign="middle">48978.62</td>
<td align="center" valign="middle">55363.46</td>
<td align="center" valign="middle">35.65</td>
<td align="center" valign="middle">31.46</td>
</tr>
<tr>
<td align="left" valign="middle">GRU</td>
<td align="center" valign="middle">1608.06</td>
<td align="center" valign="middle">1260.72</td>
<td align="center" valign="middle">115653.55</td>
<td align="center" valign="middle">144957.77</td>
<td align="center" valign="middle">34.41</td>
<td align="center" valign="middle">28.90</td>
</tr>
<tr>
<td align="left" valign="middle">ARIMA</td>
<td align="center" valign="middle">997.03</td>
<td align="center" valign="middle">848.51</td>
<td align="center" valign="middle">68989.77</td>
<td align="center" valign="middle">82938.58</td>
<td align="center" valign="middle">35.22</td>
<td align="center" valign="middle">29.77</td>
</tr>
<tr>
<td align="left" valign="middle">Poly</td>
<td align="center" valign="middle">5428.18</td>
<td align="center" valign="middle">5195.21</td>
<td align="center" valign="middle">409270.15</td>
<td align="center" valign="middle">401718.39</td>
<td align="center" valign="middle">29.61</td>
<td align="center" valign="middle">28.93</td>
</tr>
<tr>
<td align="left" valign="middle">Gauss</td>
<td align="center" valign="middle">2641.67</td>
<td align="center" valign="middle">2531.15</td>
<td align="center" valign="middle">188754.28</td>
<td align="center" valign="middle">181754.93</td>
<td align="center" valign="middle">36.31</td>
<td align="center" valign="middle">28.60</td>
</tr>
<tr>
<td align="left" valign="middle">WMA</td>
<td align="center" valign="middle">1007.55</td>
<td align="center" valign="middle">831.20</td>
<td align="center" valign="middle">119667.10</td>
<td align="center" valign="middle">104058.45</td>
<td align="center" valign="middle">29.70</td>
<td align="center" valign="middle">24.21</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap position="float" id="tab3">
<label>Table 3</label>
<caption>
<p>Prediction results for each prediction method for each dataset for 4&#x2009;weeks (28&#x2009;days).</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th/>
<th align="center" valign="top" colspan="2">African dataset</th>
<th align="center" valign="top" colspan="2">European dataset</th>
<th align="center" valign="top" colspan="2">Hungarian dataset</th>
</tr>
<tr>
<th/>
<th align="center" valign="top">ARMSE</th>
<th align="center" valign="top">AMAE</th>
<th align="center" valign="top">ARMSE</th>
<th align="center" valign="top">AMAE</th>
<th align="center" valign="top">ARMSE</th>
<th align="center" valign="top">AMAE</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">GRGNN</td>
<td align="center" valign="middle">748.42</td>
<td align="center" valign="middle">858.05</td>
<td align="center" valign="middle">111743.17</td>
<td align="center" valign="middle">123580.61</td>
<td align="center" valign="middle">27.48</td>
<td align="center" valign="middle">21.75</td>
</tr>
<tr>
<td align="left" valign="middle">LSTM</td>
<td align="center" valign="middle">2296.05</td>
<td align="center" valign="middle">2775.97</td>
<td align="center" valign="middle">125159.58</td>
<td align="center" valign="middle">151888.46</td>
<td align="center" valign="middle">28.02</td>
<td align="center" valign="middle">22.36</td>
</tr>
<tr>
<td align="left" valign="middle">CNN-LSTM</td>
<td align="center" valign="middle">882.52</td>
<td align="center" valign="middle">921.98</td>
<td align="center" valign="middle">88773.80</td>
<td align="center" valign="middle">97859.95</td>
<td align="center" valign="middle">29.10</td>
<td align="center" valign="middle">22.21</td>
</tr>
<tr>
<td align="left" valign="middle">GRU</td>
<td align="center" valign="middle">1718.08</td>
<td align="center" valign="middle">2188.22</td>
<td align="center" valign="middle">188863.87</td>
<td align="center" valign="middle">161955.56</td>
<td align="center" valign="middle">28.88</td>
<td align="center" valign="middle">23.04</td>
</tr>
<tr>
<td align="left" valign="middle">ARIMA</td>
<td align="center" valign="middle">921.32</td>
<td align="center" valign="middle">1082.60</td>
<td align="center" valign="middle">136034.82</td>
<td align="center" valign="middle">112387.49</td>
<td align="center" valign="middle">27.55</td>
<td align="center" valign="middle">20.98</td>
</tr>
<tr>
<td align="left" valign="middle">Poly</td>
<td align="center" valign="middle">6628.94</td>
<td align="center" valign="middle">6351.37</td>
<td align="center" valign="middle">534460.56</td>
<td align="center" valign="middle">520915.42</td>
<td align="center" valign="middle">35.01</td>
<td align="center" valign="middle">26.59</td>
</tr>
<tr>
<td align="left" valign="middle">Gauss</td>
<td align="center" valign="middle">3254.86</td>
<td align="center" valign="middle">3078.51</td>
<td align="center" valign="middle">214257.05</td>
<td align="center" valign="middle">194298.74</td>
<td align="center" valign="middle">33.07</td>
<td align="center" valign="middle">25.51</td>
</tr>
<tr>
<td align="left" valign="middle">WMA</td>
<td align="center" valign="middle">1437.04</td>
<td align="center" valign="middle">1228.77</td>
<td align="center" valign="middle">152546.44</td>
<td align="center" valign="middle">132098.87</td>
<td align="center" valign="middle">28.13</td>
<td align="center" valign="middle">22.02</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap position="float" id="tab4">
<label>Table 4</label>
<caption>
<p>Prediction results for each prediction method for each dataset for 5&#x2009;weeks (35&#x2009;days).</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th/>
<th align="center" valign="top" colspan="2">African dataset</th>
<th align="center" valign="top" colspan="2">European dataset</th>
<th align="center" valign="top" colspan="2">Hungarian dataset</th>
</tr>
<tr>
<th/>
<th align="center" valign="top">ARMSE</th>
<th align="center" valign="top">AMAE</th>
<th align="center" valign="top">ARMSE</th>
<th align="center" valign="top">AMAE</th>
<th align="center" valign="top">ARMSE</th>
<th align="center" valign="top">AMAE</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">GRGNN</td>
<td align="center" valign="middle">820.70</td>
<td align="center" valign="middle">1004.61</td>
<td align="center" valign="middle">120230.14</td>
<td align="center" valign="middle">127749.64</td>
<td align="center" valign="middle">27.27</td>
<td align="center" valign="middle">21.45</td>
</tr>
<tr>
<td align="left" valign="middle">LSTM</td>
<td align="center" valign="middle">2507.72</td>
<td align="center" valign="middle">3072.21</td>
<td align="center" valign="middle">255698.55</td>
<td align="center" valign="middle">219314.79</td>
<td align="center" valign="middle">27.40</td>
<td align="center" valign="middle">21.55</td>
</tr>
<tr>
<td align="left" valign="middle">CNN-LSTM</td>
<td align="center" valign="middle">1536.70</td>
<td align="center" valign="middle">1593.65</td>
<td align="center" valign="middle">128824.08</td>
<td align="center" valign="middle">111020.42</td>
<td align="center" valign="middle">28.14</td>
<td align="center" valign="middle">21.91</td>
</tr>
<tr>
<td align="left" valign="middle">GRU</td>
<td align="center" valign="middle">2234.65</td>
<td align="center" valign="middle">2667.76</td>
<td align="center" valign="middle">250900.80</td>
<td align="center" valign="middle">213550.93</td>
<td align="center" valign="middle">28.26</td>
<td align="center" valign="middle">22.00</td>
</tr>
<tr>
<td align="left" valign="middle">ARIMA</td>
<td align="center" valign="middle">1537.67</td>
<td align="center" valign="middle">1731.40</td>
<td align="center" valign="middle">150250.91</td>
<td align="center" valign="middle">125333.12</td>
<td align="center" valign="middle">29.96</td>
<td align="center" valign="middle">22.37</td>
</tr>
<tr>
<td align="left" valign="middle">Poly</td>
<td align="center" valign="middle">8436.73</td>
<td align="center" valign="middle">8093.56</td>
<td align="center" valign="middle">652543.68</td>
<td align="center" valign="middle">625536.10</td>
<td align="center" valign="middle">34.23</td>
<td align="center" valign="middle">26.48</td>
</tr>
<tr>
<td align="left" valign="middle">Gauss</td>
<td align="center" valign="middle">4526.36</td>
<td align="center" valign="middle">4227.41</td>
<td align="center" valign="middle">212263.48</td>
<td align="center" valign="middle">193738.61</td>
<td align="center" valign="middle">32.04</td>
<td align="center" valign="middle">25.00</td>
</tr>
<tr>
<td align="left" valign="middle">WMA</td>
<td align="center" valign="middle">2525.29</td>
<td align="center" valign="middle">2301.08</td>
<td align="center" valign="middle">238699.26</td>
<td align="center" valign="middle">209711.46</td>
<td align="center" valign="middle">30.51</td>
<td align="center" valign="middle">22.38</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap position="float" id="tab5">
<label>Table 5</label>
<caption>
<p>Prediction results for each prediction method for each dataset for 6&#x2009;weeks (42&#x2009;days).</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th/>
<th align="center" valign="top" colspan="2">African dataset</th>
<th align="center" valign="top" colspan="2">European dataset</th>
<th align="center" valign="top" colspan="2">Hungarian dataset</th>
</tr>
<tr>
<th/>
<th align="center" valign="top">ARMSE</th>
<th align="center" valign="top">AMAE</th>
<th align="center" valign="top">ARMSE</th>
<th align="center" valign="top">AMAE</th>
<th align="center" valign="top">ARMSE</th>
<th align="center" valign="top">AMAE</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">GRGNN</td>
<td align="center" valign="middle">1545.62</td>
<td align="center" valign="middle">1763.28</td>
<td align="center" valign="middle">124665.83</td>
<td align="center" valign="middle">133453.75</td>
<td align="center" valign="middle">25.51</td>
<td align="center" valign="middle">19.17</td>
</tr>
<tr>
<td align="left" valign="middle">LSTM</td>
<td align="center" valign="middle">3418.20</td>
<td align="center" valign="middle">4090.81</td>
<td align="center" valign="middle">308407.33</td>
<td align="center" valign="middle">367230.08</td>
<td align="center" valign="middle">27.58</td>
<td align="center" valign="middle">22.07</td>
</tr>
<tr>
<td align="left" valign="middle">CNN-LSTM</td>
<td align="center" valign="middle">1657.79</td>
<td align="center" valign="middle">1810.84</td>
<td align="center" valign="middle">124829.94</td>
<td align="center" valign="middle">153435.48</td>
<td align="center" valign="middle">26.18</td>
<td align="center" valign="middle">20.08</td>
</tr>
<tr>
<td align="left" valign="middle">GRU</td>
<td align="center" valign="middle">4648.19</td>
<td align="center" valign="middle">5709.85</td>
<td align="center" valign="middle">232157.67</td>
<td align="center" valign="middle">269820.41</td>
<td align="center" valign="middle">25.72</td>
<td align="center" valign="middle">20.48</td>
</tr>
<tr>
<td align="left" valign="middle">ARIMA</td>
<td align="center" valign="middle">2673.66</td>
<td align="center" valign="middle">3035.86</td>
<td align="center" valign="middle">188922.10</td>
<td align="center" valign="middle">229932.70</td>
<td align="center" valign="middle">27.45</td>
<td align="center" valign="middle">20.27</td>
</tr>
<tr>
<td align="left" valign="middle">Poly</td>
<td align="center" valign="middle">10843.29</td>
<td align="center" valign="middle">10305.87</td>
<td align="center" valign="middle">739501.24</td>
<td align="center" valign="middle">697691.22</td>
<td align="center" valign="middle">33.02</td>
<td align="center" valign="middle">24.93</td>
</tr>
<tr>
<td align="left" valign="middle">Gauss</td>
<td align="center" valign="middle">4735.57</td>
<td align="center" valign="middle">4382.75</td>
<td align="center" valign="middle">251950.31</td>
<td align="center" valign="middle">218247.07</td>
<td align="center" valign="middle">30.33</td>
<td align="center" valign="middle">23.09</td>
</tr>
<tr>
<td align="left" valign="middle">WMA</td>
<td align="center" valign="middle">3435.84</td>
<td align="center" valign="middle">3093.05</td>
<td align="center" valign="middle">426603.38</td>
<td align="center" valign="middle">363924.52</td>
<td align="center" valign="middle">27.98</td>
<td align="center" valign="middle">20.14</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>As can be seen from <xref ref-type="table" rid="tab1">Table 1</xref>, with a prediction step of 2&#x2009;weeks (14&#x2009;days), GRGNN achieves optimal results for both the African and Hungarian datasets, and slightly underperforms the CNN-LSTM method and the ARIMA method for the European dataset. The LSTM (Long Short-Term Memory) method and the GRU (Gated Recurrent Unit) method underperform in all datasets. The CNN-LSTM method performs best in the prediction of the European dataset, and underperforms GRGNN and ARIMA in the African dataset, and performs worse in the Hungarian dataset. The ARIMA method has the best prediction accuracy of the eight methods. The CNN-LSTM method performs best in the prediction of the European dataset, while it does not perform as well as GRGNN and ARIMA on the African dataset, and performs even worse on the Hungarian dataset. The prediction accuracy of the ARIMA method is in the middle of the range of the eight methods. The WMA method can achieve predictions with an accuracy approximately equal to that of ARIMA. Conversely, the Gaussian function method and the polynomial function method produce predictions significantly deviating from the real data, obtaining the lowest accuracies among all eight methods across all three datasets.</p>
<p>As can be seen from <xref ref-type="table" rid="tab2">Table 2</xref>, the comparison of the overall prediction results when extending the prediction step to 3&#x2009;weeks (21&#x2009;days) is not much different from that of the prediction step of 2&#x2009;weeks. The GRGNN method still achieves the best results in the prediction of both the African and Hungarian datasets, and is slightly less accurate in the prediction of the European dataset than the CNN-LSTM and the ARIMA methods. The prediction accuracy of the LSTM (Long Short-Term Memory) method and the GRU (Gated Recurrent Unit) method is the worst two of the eight methods in the African and European datasets. The prediction errors of LSTM (Long Short-Term Memory) and GRU (Gated Recurrent Unit) methods in the African and European datasets are the worst two out of the eight methods. The CNN-LSTM method still performs the best in the prediction of the European dataset. The ARIMA method does not achieve the optimal prediction accuracy but outperforms LSTM (Long Short-Term Memory) and GRU (Gated Recurrent Unit) in the African and European datasets, and outperforms CNN-LSTM in the Hungarian dataset in terms of prediction error. The WMA method still yields slightly inferior results compared to ARIMA and marginally better outcomes than the LSTM (Long Short-Term Memory) method. However, the Gaussian function method and the polynomial function method continue to exhibit the poorest two results.</p>
<p>As can be seen from <xref ref-type="table" rid="tab3">Table 3</xref>, with a prediction step of 4&#x2009;weeks (28&#x2009;days), GRGNN still maintains the optimal prediction in the prediction of the African and Hungarian datasets, and the prediction results in the European dataset are only slightly inferior to those of the CNN-LSTM method. The prediction errors of the LSTM (Long Short-Term Memory) method and the GRU (Gated Recurrent Unit) method are still poor in the African and European datasets. The CNN-LSTM method still performs optimally in the prediction of the European dataset, but poorly in the European dataset. The ARIMA method is still in the mid-range of the eight prediction mid-range levels. Still performs the best in prediction, but has poor prediction in the Hungarian dataset. The prediction accuracy of the ARIMA method is still in the middle of the range of the 5 prediction mid-range. The performance of the WMA method is slightly inferior to the ARIMA method but slightly superior to the GRU (Gated Recurrent Unit) and LSTM (Long Short-Term Memory) methods. However, the Gaussian method and the polynomial method remain the least effective, exhibiting significant errors in their prediction results.</p>
<p>As can be seen from <xref ref-type="table" rid="tab4">Table 4</xref>, when the prediction step size is set to 5&#x2009;weeks (35&#x2009;days), the ranking of the prediction results of each method is not much different from that of the case with a step size of 4&#x2009;weeks, and it is worth noting that: the main change occurs in the prediction results for European data, and the average index of GRGNN exceeds that of CNN-LSTM as the smallest among the prediction methods. The performance of the WMA method deteriorates rapidly, reaching a point where it only outperforms two other methods. The Gaussian function method and the polynomial function method still remain the poorest performers, with their accuracy indices worsening even further as the prediction steps increase.</p>
<p>As can be seen from <xref ref-type="table" rid="tab5">Table 5</xref>, when the prediction step size is 6&#x2009;weeks (42&#x2009;days), the average of the prediction results of GRGNN in the prediction of the European dataset exceeds that of the CNN-LSTM (Long Short-Term Memory) method to become the smallest among the results of each prediction method, and realizes the prediction accuracy of the prediction of each data to be the highest among all eight prediction methods. The prediction error of WMA only slightly exceeds that of LSTM (Long Short-Term Memory) and GRU (Gated Recurrent Unit), placing its results ahead of both LSTM (Long Short-Term Memory) and GRU (Gated Recurrent Unit). However, it falls short compared to GRGNN, CNN-LSTM, and ARIMA methods. The polynomial method and Gaussian function method persist as the least effective, exhibiting the highest ARMSE and AMAE values.</p>
<p>The average indictors of the prediction results of each method in each dataset are plotted at different step sizes, as shown in <xref ref-type="fig" rid="fig5">Figure 5</xref>.</p>
<fig position="float" id="fig5">
<label>Figure 5</label>
<caption>
<p>The overview plot of evaluation indicator of datasets <bold>(A)</bold> the overview plot of ARMSE of the 38 African countries&#x2019; COVID-19 dataset. <bold>(B)</bold> The overview plot of AMAE of the 38 African countries&#x2019; COVID-19 dataset. <bold>(C)</bold> the overview plot of ARMSE of the 42 European countries&#x2019; COVID-19 dataset. <bold>(D)</bold> The overview plot of AMAE of the 42 European countries&#x2019; COVID-19 dataset. <bold>(E)</bold> the overview plot of ARMSE of the20 Hungarian regions&#x2019; Chickenpox dataset. <bold>(F)</bold> The overview plot of AMAE of the 20 Hungarian regions&#x2019; Chickenpox dataset.</p>
</caption>
<graphic xlink:href="fpubh-12-1397260-g005.tif"/>
</fig>
<p>To enhance the clarity and simplicity of conveying the prediction results, we have selected 5 time series from each dataset, focusing on a prediction step set to 6&#x2009;weeks (42&#x2009;days) for visualization. Specifically, we depict the time series data of 5 countries from the 38 African countries&#x2019; COVID-19 dataset in <xref ref-type="fig" rid="fig6">Figure 6</xref>, and the time series of 5 countries from the 42 European countries&#x2019; COVID-19 dataset in <xref ref-type="fig" rid="fig7">Figure 7</xref>, and illustrate the time series of 5 regions from the 20 Hungarian regions&#x2019; chickenpox dataset in <xref ref-type="fig" rid="fig8">Figure 8</xref>. Through these figures, it becomes evident that GRGNN generally captures and mirrors the trends observed in the majority of the time series from the original real-world data.</p>
<fig position="float" id="fig6">
<label>Figure 6</label>
<caption>
<p>The plots of original data and prediction result for countries from the 38 African countries&#x2019; COVID-19 dataset of GRGNN. <bold>(A)</bold> The plot of original data and prediction result for the total cases of the 38 African countries&#x2019; COVID-19 dataset of GRGNN. <bold>(B)</bold> The plot of original data and prediction result for Country1 from the 38 African countries&#x2019; COVID-19 dataset of GRGNN. <bold>(C)</bold> The plot of original data and prediction result for County2 from the 38 African countries&#x2019; COVID-19 dataset of GRGNN. <bold>(D)</bold> The plot of original data and prediction result for Country3 from the 38 African countries&#x2019; COVID-19 dataset of GRGNN. <bold>(E)</bold> The plot of original data and prediction result for Country4 from the 38 African countries&#x2019; COVID-19 dataset of GRGNN. <bold>(F)</bold> The plot of original data and prediction result for Country5 from the 38 African countries&#x2019; COVID-19 dataset of GRGNN.</p>
</caption>
<graphic xlink:href="fpubh-12-1397260-g006.tif"/>
</fig>
<fig position="float" id="fig7">
<label>Figure 7</label>
<caption>
<p>The plots of original data and prediction result for countries from the 42 European countries&#x2019; COVID-19 dataset of GRGNN. <bold>(A)</bold> The plot of original data and prediction result for the total cases of the 42 European countries&#x2019; COVID-19 dataset of GRGNN. <bold>(B)</bold> The plot of original data and prediction result for Country1 from the 42 European countries&#x2019; COVID-19 dataset of GRGNN. <bold>(C)</bold> The plot of original data and prediction result for County2 from the 42 European countries&#x2019; COVID-19 dataset of GRGNN. <bold>(D)</bold> The plot of original data and prediction result for Country3 from the 42 European countries&#x2019; COVID-19 dataset of GRGNN. <bold>(E)</bold> The plot of original data and prediction result for Country4 from the 42 European countries&#x2019; COVID-19 dataset of GRGNN. <bold>(F)</bold> The plot of original data and prediction result for Country5 from the 42 European countries&#x2019; COVID-19 dataset of GRGNN.</p>
</caption>
<graphic xlink:href="fpubh-12-1397260-g007.tif"/>
</fig>
<fig position="float" id="fig8">
<label>Figure 8</label>
<caption>
<p>The plots of original data and prediction result for regions from the 20 Hungarian regions&#x2019; Chickenpox dataset of GRGNN. <bold>(A)</bold> The plot of original data and prediction result for the total cases of the 20 Hungarian regions&#x2019; Chickenpox dataset of GRGNN. <bold>(B)</bold> The plot of original data and prediction result for Region1 from the 20 Hungarian regions&#x2019; Chickenpox dataset of GRGNN. <bold>(C)</bold> The plot of original data and prediction result for Region2 from the 20 Hungarian regions&#x2019; Chickenpox dataset of GRGNN. <bold>(D)</bold> The plot of original data and prediction result for Region3 from the 20 Hungarian regions&#x2019; Chickenpox dataset of GRGNN. <bold>(E)</bold> The plot of original data and prediction result for Region4 from the 20 Hungarian regions&#x2019; Chickenpox dataset of GRGNN. <bold>(F)</bold> The plot of original data and prediction result for Region5 from the 20 Hungarian regions&#x2019; Chickenpox dataset of GRGNN.</p>
</caption>
<graphic xlink:href="fpubh-12-1397260-g008.tif"/>
</fig>
</sec>
<sec sec-type="discussion" id="sec20">
<label>4</label>
<title>Discussion</title>
<p>Observing <xref ref-type="table" rid="tab1">Tables 1</xref>&#x2013;<xref ref-type="table" rid="tab5">5</xref>, it can be found that for the prediction results of the data of the 38 African countries&#x2019; COVID-19 dataset and the 20 Hungarian regions&#x2019; chickenpox dataset, GRGNN is able to achieve better prediction results compared with other prediction methods at different prediction steps, and the average RMSE and average MAE of its prediction results are the smallest among the prediction methods at different steps, which indicates that GRGNN is able to capture and learn the features in the data better than the three neural network methods and statistical methods in the baseline methods, and make accurate predictions.</p>
<p>Observing <xref ref-type="fig" rid="fig6">Figures 6</xref>, <xref ref-type="fig" rid="fig8">8</xref>, it becomes apparent that for African dataset and Hungarian dataset, the prediction results of GRGNN consistently align with the developmental trend of the original time series, albeit with varying degrees of error. This observation suggests that GRGNN, to a certain extent, can predict the developmental trends within the datasets.</p>
<p>The prediction errors at different step lengths are compared with the step lengths on each dataset, as shown in <xref ref-type="fig" rid="fig5">Figure 5</xref> and it can be found that the prediction errors for the African data generally increase with the extension of the prediction step lengths, and the errors of the GRGNN method increase relatively less with the extension of the prediction step lengths compared with the others, which indicates that the GRGNN compared with the three neural network in the baseline methods and statistical methods to capture and learn more adequately the relationships and features among the temporal nodes of the time series. This also indicates that GRGNN learns the data in three dimensions: time domain, frequency domain and spectral domain, compared to the seven comparative forecasting methods that only learn and capture the data in the time domain, which proves that GRGNN can capture more features in the data, better grasp the overall trend of the data, and realize more accurate medium- and long-term forecasting results for the two datasets, namely, the data of the 38 countries in Africa and the data of the 20 regions in Hungary. The results demonstrate that this allows GRGNN to explore more features in the data, better grasp the general trend of the data, and thus achieve more accurate medium-term and long-term predictions for the 38 African countries&#x2019; COVID-19 dataset and the 20 Hungarian regions&#x2019; chickenpox dataset.</p>
<p>For the 20 Hungarian regions&#x2019; chickenpox dataset, it should be separately stated that since the data in this dataset are weekly collected, the actual predictions obtained at the same prediction step size are less than other two dataset. Therefore, as shown in <xref ref-type="fig" rid="fig5">Figures 5E</xref>,<xref ref-type="fig" rid="fig5">F</xref>, when the prediction step length is extended from 2&#x2009;weeks to 3&#x2009;weeks, each prediction method shows an increase in prediction error, whereas the error of each prediction method except ARIMA method shows a decreasing trend when the step length is extended from 4&#x2009;weeks to 6&#x2009;weeks. Meanwhile, GRGNN was able to achieve better results than the other seven comparison methods in both average RMSE and average MAE. This indicates that GRGNN and the neural network prediction methods in the baseline methods can realize the capture of the overall trend characteristics of the data, which in turn shows that the prediction accuracy will be improved when the data prediction step length is extended to a certain length, and compared with the seven comparative methods, GRGNN achieves more accurate prediction results, which indicates that GRGNN is more adequate than the other seven methods for the capture and learning of the overall trend characteristics of the data. This indicates that GRGNN is more adequate than the other seven methods for capturing and learning the general trend features of the data.</p>
<p>Finally, the GRGNN do not always make the most accurate prediction, as can be seen from <xref ref-type="fig" rid="fig5">Figures 5C</xref>,<xref ref-type="fig" rid="fig5">D</xref>, for the prediction experiments of 42 European countries, the errors of each prediction method are much larger than the errors of the prediction results for the African data, and the indicators of each prediction result under the same hyper-parameters mostly reaches 10,000 counts or even 100,000 counts, in which case the CNN-LSTM method has the best prediction results in the experiments with the prediction step lengths of 14, 21, and 28&#x2009;days, and its indicators are the smallest values among the eight prediction methods, but these two metrics of CNN-LSTM become larger with the increase of the prediction step. When the prediction step is extended to 35&#x2009;days, the average of CNN-LSTM is still the smallest among the eight methods, but the mean becomes sub-optimal, and the optimal value is obtained from the prediction results of GRGNN. When the prediction step size is increased to 42&#x2009;days, the prediction result of GRGNN becomes optimal in both indicators. The prediction results of each prediction method in the experiment are not satisfactory in the European dataset, which may be caused by the inadequacy of the type of data collected and the insufficient amount of data collected for this phenomenon. Data inapplicability is an insurmountable problem for data-driven methods, and if the applicability of the prediction methods to the data cannot be assessed, this will greatly limit the application prospects of the prediction methods. Therefore, there is a need to discuss the applicability of GRGNN to different data:</p>
<p>Plotting the heatmap of the weight matrix (<inline-formula>
<mml:math id="M122">
<mml:mi>W</mml:mi>
</mml:math>
</inline-formula>) for each dataset in <xref ref-type="fig" rid="fig9">Figure 9</xref>, where the blocks in the plot represent the correlation between the time series marked by the x-axis and y-axis the lighter the color of the block is the related closer the time series are. it can be observed that the accuracy of GRGNN is linked to the correlation among time series in the datasets. In cases such as the African and Hungarian datasets in this research, where the correlation between time series is relatively close, GRGNN exhibits accurate predictions and the ability to forecast the developmental trend of the time series. However, when facing datasets like the European dataset in this research, where the correlation among time series is less pronounced, GRGNN struggles to achieve a more accurate prediction compared to other neural network methods.</p>
<fig position="float" id="fig9">
<label>Figure 9</label>
<caption>
<p>The heat maps of the weight matrices of datasets <bold>(A)</bold> The heat map of the weight matric of the 38 African countries&#x2019; COVID-19 dataset. <bold>(B)</bold> The heat map of the weight matric of the 42 European countries&#x2019; COVID-19 dataset. <bold>(C)</bold> The heat map of the weight matric of the 20 Hungarian regions&#x2019; Chickenpox dataset.</p>
</caption>
<graphic xlink:href="fpubh-12-1397260-g009.tif"/>
</fig>
<p>We find that for the weight matrix <inline-formula>
<mml:math id="M123">
<mml:mi>W</mml:mi>
</mml:math>
</inline-formula> obtained after preprocessing of the dataset, the average of the sum of the weights of each node over the other nodes is calculated, as shown in <xref ref-type="table" rid="tab6">Table 6</xref>, and it can be found that when the average value tends to 1 then the dataset yields better prediction results by GRGNN.</p>
<table-wrap position="float" id="tab6">
<label>Table 6</label>
<caption>
<p>The average node sum weights of each dataset.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th/>
<th align="center" valign="top">African dataset</th>
<th align="center" valign="top">European dataset</th>
<th align="center" valign="top">Hungarian dataset</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">average node sum weights</td>
<td align="center" valign="top">1.10</td>
<td align="center" valign="top">0.78</td>
<td align="center" valign="top">0.96</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Therefore, we hypothesize that if the average value of the sum of the weights of each node in the weight matrix over the other nodes converges to 1, then the dataset will yield better prediction results by GRGNN. As a matter of fact, there are some researches to construct the graph by SoftMax and other methods to make the average value of the sum of the weights of each node in the weight matrix of each node to other nodes converge to 1 (<xref ref-type="bibr" rid="ref40">40</xref>), but this hypothesis is only based on the observation of the phenomenon shown in the experimental results, and the mathematical proofs and the verification of the actual additional experiments are still need to be further supplemented.</p>
<p>This paper is significantly innovative: the main focus of this study is to realize the ability of the network to analyze datasets in multiple dimensions in the time, spectral, and frequency domains by introducing a GRU (Gated Recurrent Unit) layer in the GNN (Graph Neural Network) network. This gives the following advantages to the neural network used in this study: Firstly, the multiple-input multiple-output temporal prediction of multiple time series variables is more efficient compared to the single-input single-output prediction method of a single time series variable; Secondly, due to the introduction of the GRU (Gated Recurrent Unit) layer, it yields a more accurate prediction in terms of prediction accuracy; and Thirdly, as a data-driven method, it does not require human <italic>a priori</italic> knowledge as a basis, which makes it easy to migrate the application to the other data processing.</p>
</sec>
<sec sec-type="conclusions" id="sec21">
<label>5</label>
<title>Conclusion</title>
<p>In this paper, gated recurrent units are attempted to be introduced into graph neural network, enabling graph neural networks to capture and learn features from data in three dimensions, namely, null, frequency, and time domains, which is utilized to produce notable results in the epidemic data prediction problem, which is a typical multivariate time series prediction problem. Compared with classical prediction methods, graph neural networks, as an multiple-input-multiple-output method, can quickly and easily construct graphs for multiple time series and realize effective prediction in a data-driven manner. In terms of prediction accuracy, when the predicted multivariate correlation reaches a certain level (specifically, the phenomenon observed in this study is that the closer the average of the sum of the connection weights of each node to the other nodes tends to be 1, the better the prediction results obtained from the GRGNN for the dataset), the graph neural network with the introduction of gated recurrent units can achieve more accurate predictions in medium-term or long-term forecasting.</p>
</sec>
<sec sec-type="data-availability" id="sec22">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material, further inquiries can be directed to the corresponding authors.</p>
</sec>
<sec sec-type="author-contributions" id="sec23">
<title>Author contributions</title>
<p>X-dL: Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing. B-hH: Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing. Z-jX: Writing &#x2013; original draft. NF: Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing. X-pD: Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing.</p>
</sec>
</body>
<back>
<sec sec-type="funding-information" id="sec25">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research, authorship, and/or publication of this article. This work was supported by National Natural Science Foundation of China (82341035), the Grant (2019SKLID603) from the State Key Laboratory for Infectious Disease Prevention and Control (Current name is National Key-Laboratory of Intelligent Tracking and Forecasting for Infectious Disease), China CDC.</p>
</sec>
<ack>
<p>The authors gratefully acknowledge Tao Hong, Mingkuan Feng, Yi Yang, for their assistance with data collection and inspiration of the idea in this study. And sincerely acknowledge Olasehinde Toba Stephen, for helping improve the manuscript from language perspective.</p>
</ack>
<sec sec-type="COI-statement" id="sec26">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="sec100" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<fn-group>
<fn id="fn0001">
<p>
<sup>1</sup>
<ext-link xlink:href="https://www.worldometers.info/coronavirus" ext-link-type="uri">https://www.worldometers.info/coronavirus</ext-link>
</p>
</fn>
<fn id="fn0002">
<p>
<sup>2</sup>
<ext-link xlink:href="https://www.kaggle.com/datasets/die9origephit/chickenpox-cases-hungary" ext-link-type="uri">https://www.kaggle.com/datasets/die9origephit/chickenpox-cases-hungary</ext-link>
</p>
</fn>
</fn-group>
<ref-list>
<title>References</title>
<ref id="ref1">
<label>1.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>Y</given-names></name> <name><surname>Yu</surname> <given-names>R</given-names></name> <name><surname>Shahabi</surname> <given-names>C</given-names></name> <name><surname>Liu</surname> <given-names>Y</given-names></name></person-group>. <article-title>Diffusion convolutional recurrent neural network: data-driven traffic forecasting</article-title>. <source>arXiv preprint arXiv</source>. (<year>2017</year>) <volume>1707</volume>:<fpage>01926</fpage>. doi: <pub-id pub-id-type="doi">10.48550/arXiv.1707.01926</pub-id></citation>
</ref>
<ref id="ref2">
<label>2.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yu</surname> <given-names>B</given-names></name> <name><surname>Yin</surname> <given-names>H</given-names></name> <name><surname>Zhu</surname> <given-names>Z</given-names></name></person-group>. <article-title>Spatio-temporal graph convolutional networks: A deep learning framework for traffic forecasting</article-title>. <source>arXiv preprint arXiv</source>. (<year>2017</year>) <volume>1709</volume>:<fpage>04875</fpage>. doi: <pub-id pub-id-type="doi">10.48550/arXiv.1709.04875</pub-id></citation>
</ref>
<ref id="ref3">
<label>3.</label>
<citation citation-type="other"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>S</given-names></name> <name><surname>Zhang</surname> <given-names>Z</given-names></name> <name><surname>Zhou</surname> <given-names>J</given-names></name> <name><surname>Wang</surname> <given-names>Y</given-names></name> <name><surname>Sun</surname> <given-names>W</given-names></name> <name><surname>Zhong</surname> <given-names>X</given-names></name> <etal/></person-group>. Financial risk analysis for SMEs with graph-based supply chain mining. In <italic>Proceedings of the Twenty-Ninth International Conference on International Joint Conferences on Artificial Intelligence</italic> (<year>2021</year>) (pp. 4661&#x2013;4667).</citation>
</ref>
<ref id="ref4">
<label>4.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Khodayar</surname> <given-names>M</given-names></name> <name><surname>Wang</surname> <given-names>J</given-names></name></person-group>. <article-title>Spatio-temporal graph deep neural network for short-term wind speed forecasting</article-title>. <source>IEEE Transactions on Sustain Energy</source>. (<year>2018</year>) <volume>10</volume>:<fpage>670</fpage>&#x2013;<lpage>81</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TSTE.2018.2844102</pub-id></citation>
</ref>
<ref id="ref5">
<label>5.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wu</surname> <given-names>Q</given-names></name> <name><surname>Zheng</surname> <given-names>H</given-names></name> <name><surname>Guo</surname> <given-names>X</given-names></name> <name><surname>Liu</surname> <given-names>G</given-names></name></person-group>. <article-title>Promoting wind energy for sustainable development by precise wind speed prediction based on graph neural networks</article-title>. <source>Renew Energy</source>. (<year>2022</year>) <volume>199</volume>:<fpage>977</fpage>&#x2013;<lpage>92</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.renene.2022.09.036</pub-id></citation>
</ref>
<ref id="ref6">
<label>6.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>J</given-names></name> <name><surname>Zhang</surname> <given-names>S</given-names></name> <name><surname>Xiao</surname> <given-names>Y</given-names></name> <name><surname>Song</surname> <given-names>R</given-names></name></person-group>. <article-title>A review on graph neural network methods in financial applications</article-title>. <source>arXiv preprint arXiv</source>. (<year>2021</year>) <volume>2111</volume>:<fpage>15367</fpage>. doi: <pub-id pub-id-type="doi">10.48550/arXiv.2111.15367</pub-id></citation>
</ref>
<ref id="ref7">
<label>7.</label>
<citation citation-type="other"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>W</given-names></name> <name><surname>Chen</surname> <given-names>L</given-names></name> <name><surname>Xie</surname> <given-names>Y</given-names></name> <name><surname>Cao</surname> <given-names>W</given-names></name> <name><surname>Gao</surname> <given-names>Y</given-names></name> <name><surname>Feng</surname> <given-names>X</given-names></name></person-group>. Multi-range attentive bicomponent graph convolutional network for traffic forecasting. In <italic>Proceedings of the AAAI conference on artificial intelligence</italic> (<year>2020</year>) (Vol. 34, pp. 3529&#x2013;3536).</citation>
</ref>
<ref id="ref8">
<label>8.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>X</given-names></name> <name><surname>Liu</surname> <given-names>S</given-names></name> <name><surname>Zhao</surname> <given-names>J</given-names></name> <name><surname>Wu</surname> <given-names>H</given-names></name> <name><surname>Xian</surname> <given-names>J</given-names></name> <name><surname>Montewka</surname> <given-names>J</given-names></name></person-group>. <article-title>Autonomous port management based AGV path planning and optimization via an ensemble reinforcement learning framework</article-title>. <source>Ocean Coast Manag</source>. (<year>2024</year>) <volume>251</volume>:<fpage>107087</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.ocecoaman.2024.107087</pub-id></citation>
</ref>
<ref id="ref9">
<label>9.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pinto</surname> <given-names>R</given-names></name> <name><surname>Valentim</surname> <given-names>R</given-names></name> <name><surname>da Silva</surname> <given-names>LF</given-names></name> <name><surname>de Souza</surname> <given-names>GF</given-names></name> <name><surname>de Moura Santos</surname> <given-names>TG</given-names></name> <name><surname>de Oliveira</surname> <given-names>CA</given-names></name> <etal/></person-group>. <article-title>Use of interrupted time series analysis in understanding the course of the congenital syphilis epidemic in Brazil</article-title>. <source>Lancet Regional Health&#x2013;Americas</source>. (<year>2022</year>) <volume>7</volume>:<fpage>100163</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.lana.2021.100163</pub-id></citation>
</ref>
<ref id="ref10">
<label>10.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cori</surname> <given-names>A</given-names></name> <name><surname>Ferguson</surname> <given-names>NM</given-names></name> <name><surname>Fraser</surname> <given-names>C</given-names></name> <name><surname>Cauchemez</surname> <given-names>S</given-names></name></person-group>. <article-title>A new framework and software to estimate time-varying reproduction numbers during epidemics</article-title>. <source>Am J Epidemiol</source>. (<year>2013</year>) <volume>178</volume>:<fpage>1505</fpage>&#x2013;<lpage>12</lpage>. doi: <pub-id pub-id-type="doi">10.1093/aje/kwt133</pub-id>, PMID: <pub-id pub-id-type="pmid">24043437</pub-id></citation>
</ref>
<ref id="ref11">
<label>11.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Du</surname> <given-names>Z</given-names></name> <name><surname>Xu</surname> <given-names>X</given-names></name> <name><surname>Wu</surname> <given-names>Y</given-names></name> <name><surname>Wang</surname> <given-names>L</given-names></name> <name><surname>Cowling</surname> <given-names>BJ</given-names></name> <name><surname>Meyers</surname> <given-names>LA</given-names></name></person-group>. <article-title>Serial interval of COVID-19 among publicly reported confirmed cases</article-title>. <source>Emerg Infect Dis</source>. (<year>2020</year>) <volume>26</volume>:<fpage>1341</fpage>&#x2013;<lpage>3</lpage>. doi: <pub-id pub-id-type="doi">10.3201/eid2606.200357</pub-id>, PMID: <pub-id pub-id-type="pmid">32191173</pub-id></citation>
</ref>
<ref id="ref12">
<label>12.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ioannidis</surname> <given-names>JP</given-names></name> <name><surname>Cripps</surname> <given-names>S</given-names></name> <name><surname>Tanner</surname> <given-names>MA</given-names></name></person-group>. <article-title>Forecasting for COVID-19 has failed</article-title>. <source>Int J Forecast</source>. (<year>2022</year>) <volume>38</volume>:<fpage>423</fpage>&#x2013;<lpage>38</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.ijforecast.2020.08.004</pub-id>, PMID: <pub-id pub-id-type="pmid">32863495</pub-id></citation>
</ref>
<ref id="ref13">
<label>13.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dairi</surname> <given-names>A</given-names></name> <name><surname>Harrou</surname> <given-names>F</given-names></name> <name><surname>Zeroual</surname> <given-names>A</given-names></name> <name><surname>Hittawe</surname> <given-names>MM</given-names></name> <name><surname>Sun</surname> <given-names>Y</given-names></name></person-group>. <article-title>Comparative study of machine learning methods for COVID-19 transmission forecasting</article-title>. <source>J Biomed Inform</source>. (<year>2021</year>) <volume>118</volume>:<fpage>103791</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jbi.2021.103791</pub-id>, PMID: <pub-id pub-id-type="pmid">33915272</pub-id></citation>
</ref>
<ref id="ref14">
<label>14.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sanchez-Gendriz</surname> <given-names>I</given-names></name> <name><surname>de Souza</surname> <given-names>GF</given-names></name> <name><surname>de Andrade</surname> <given-names>IG</given-names></name> <name><surname>Neto</surname> <given-names>AD</given-names></name> <name><surname>de Medeiros</surname> <given-names>TA</given-names></name> <name><surname>Barros</surname> <given-names>DM</given-names></name> <etal/></person-group>. <article-title>Data-driven computational intelligence applied to dengue outbreak forecasting: a case study at the scale of the city of Natal, RN-Brazil</article-title>. <source>Sci Rep</source>. (<year>2022</year>) <volume>12</volume>:<fpage>6550</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41598-022-10512-5</pub-id>, PMID: <pub-id pub-id-type="pmid">35449179</pub-id></citation>
</ref>
<ref id="ref15">
<label>15.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>GP</given-names></name>
</person-group>. <article-title>Time series forecasting using a hybrid ARIMA and neural network model</article-title>. <source>Neurocomputing</source>. (<year>2003</year>) <volume>50</volume>:<fpage>159</fpage>&#x2013;<lpage>75</lpage>. doi: <pub-id pub-id-type="doi">10.1016/S0925-2312(01)00702-0</pub-id></citation>
</ref>
<ref id="ref16">
<label>16.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zivot</surname> <given-names>E</given-names></name> <name><surname>Wang</surname> <given-names>J</given-names></name></person-group>. <article-title>Vector autoregressive models for multivariate time series</article-title>. <source>Modeling financial time series with S-PLUS&#x00AE;</source>. New York, NY: Springer (<year>2006</year>). doi: <pub-id pub-id-type="doi">10.1007/978-0-387-21763-5_11</pub-id></citation>
</ref>
<ref id="ref17">
<label>17.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>H</given-names></name> <name><surname>Jiang</surname> <given-names>Z</given-names></name> <name><surname>Lu</surname> <given-names>H</given-names></name></person-group>. <article-title>A hybrid wind speed forecasting system based on a &#x2018;decomposition and ensemble&#x2019; strategy and fuzzy time series</article-title>. <source>Energies</source>. (<year>2017</year>) <volume>10</volume>:<fpage>1422</fpage>. doi: <pub-id pub-id-type="doi">10.3390/en10091422</pub-id></citation>
</ref>
<ref id="ref18">
<label>18.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bai</surname> <given-names>S</given-names></name> <name><surname>Kolter</surname> <given-names>JZ</given-names></name> <name><surname>Koltun</surname> <given-names>V</given-names></name></person-group>. <article-title>An empirical evaluation of generic convolutional and recurrent networks for sequence modeling</article-title>. <source>arXiv preprint arXiv</source>. (<year>2018</year>) <volume>1803</volume>:<fpage>01271</fpage>. doi: <pub-id pub-id-type="doi">10.48550/arXiv.1803.01271</pub-id></citation>
</ref>
<ref id="ref19">
<label>19.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yu</surname> <given-names>R</given-names></name> <name><surname>Zheng</surname> <given-names>S</given-names></name> <name><surname>Anandkumar</surname> <given-names>A</given-names></name> <name><surname>Yue</surname> <given-names>Y</given-names></name></person-group>. <article-title>Long-term forecasting using higher order tensor RNNs</article-title>. <source>arXiv preprint arXiv</source>. (<year>2017</year>) <volume>1711</volume>:<fpage>00073</fpage>. doi: <pub-id pub-id-type="doi">10.48550/arXiv.1711.00073</pub-id></citation>
</ref>
<ref id="ref20">
<label>20.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Shi</surname> <given-names>X</given-names></name> <name><surname>Chen</surname> <given-names>Z</given-names></name> <name><surname>Wang</surname> <given-names>H</given-names></name> <name><surname>Yeung</surname> <given-names>DY</given-names></name> <name><surname>Wong</surname> <given-names>WK</given-names></name> <name><surname>Woo</surname> <given-names>WC</given-names></name></person-group>. <article-title>Convolutional LSTM network: a machine learning approach for precipitation nowcasting</article-title>. <source>Adv Neural Inf Proces Syst</source>. <publisher-loc>Berlin, Heidelberg</publisher-loc>: <publisher-name>Springer</publisher-name>. (<year>2015</year>):<fpage>28</fpage>:<fpage>802</fpage>&#x2013;<lpage>810</lpage>. doi: <pub-id pub-id-type="doi">10.5555/2969239.2969329</pub-id></citation>
</ref>
<ref id="ref21">
<label>21.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Qin</surname> <given-names>Y</given-names></name> <name><surname>Song</surname> <given-names>D</given-names></name> <name><surname>Chen</surname> <given-names>H</given-names></name> <name><surname>Cheng</surname> <given-names>W</given-names></name> <name><surname>Jiang</surname> <given-names>G</given-names></name> <name><surname>Cottrell</surname> <given-names>G</given-names></name></person-group>. <article-title>A dual-stage attention-based recurrent neural network for time series prediction</article-title>. <source>arXiv preprint arXiv</source>. (<year>2017</year>) <volume>1704</volume>:<fpage>02971</fpage>. doi: <pub-id pub-id-type="doi">10.48550/arXiv.1704.02971</pub-id></citation>
</ref>
<ref id="ref22">
<label>22.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Graves</surname> <given-names>A</given-names></name>
</person-group>. <article-title>Long short-term memory</article-title>. <source>Supervised sequence labelling with recurrent neural networks</source>. <publisher-loc>Berlin, Heidelberg</publisher-loc>: <publisher-name>Springer</publisher-name> (<year>2012</year>).</citation>
</ref>
<ref id="ref23">
<label>23.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cho</surname> <given-names>K</given-names></name> <name><surname>Van Merri&#x00EB;nboer</surname> <given-names>B</given-names></name> <name><surname>Gulcehre</surname> <given-names>C</given-names></name> <name><surname>Bahdanau</surname> <given-names>D</given-names></name> <name><surname>Bougares</surname> <given-names>F</given-names></name> <name><surname>Schwenk</surname> <given-names>H</given-names></name> <etal/></person-group>. <article-title>Learning phrase representations using RNN encoder-decoder for statistical machine translation</article-title>. <source>arXiv preprint arXiv</source>. (<year>2014</year>) <volume>1406</volume>:<fpage>1078</fpage>. doi: <pub-id pub-id-type="doi">10.48550/arXiv.1406.1078</pub-id></citation>
</ref>
<ref id="ref24">
<label>24.</label>
<citation citation-type="other"><person-group person-group-type="author"><name><surname>Dauphin</surname> <given-names>YN</given-names></name> <name><surname>Fan</surname> <given-names>A</given-names></name> <name><surname>Auli</surname> <given-names>M</given-names></name> <name><surname>Grangier</surname> <given-names>D</given-names></name></person-group>. Language modeling with gated convolutional networks. In <italic>International conference on machine learning.</italic> (<year>2017</year>) (pp. 933&#x2013;941). PMLR.</citation>
</ref>
<ref id="ref25">
<label>25.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lou</surname> <given-names>HR</given-names></name> <name><surname>Wang</surname> <given-names>X</given-names></name> <name><surname>Gao</surname> <given-names>Y</given-names></name> <name><surname>Zeng</surname> <given-names>Q</given-names></name></person-group>. <article-title>Comparison of ARIMA model, DNN model and LSTM model in predicting disease burden of occupational pneumoconiosis in Tianjin, China</article-title>. <source>BMC Public Health</source>. (<year>2022</year>) <volume>22</volume>:<fpage>2167</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s12889-022-14642-3</pub-id>, PMID: <pub-id pub-id-type="pmid">36434563</pub-id></citation>
</ref>
<ref id="ref26">
<label>26.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yu</surname> <given-names>HF</given-names></name> <name><surname>Rao</surname> <given-names>N</given-names></name> <name><surname>Dhillon</surname> <given-names>IS</given-names></name></person-group>. <article-title>Temporal regularized matrix factorization for high-dimensional time series prediction</article-title>. <source>Adv Neural Inf Proces Syst</source>. (<year>2016</year>) <volume>29</volume>:<fpage>847</fpage>&#x2013;<lpage>855</lpage>. doi: <pub-id pub-id-type="doi">10.5555/3157096.3157191</pub-id></citation>
</ref>
<ref id="ref27">
<label>27.</label>
<citation citation-type="other"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>L</given-names></name> <name><surname>Aggarwal</surname> <given-names>C</given-names></name> <name><surname>Qi</surname> <given-names>GJ</given-names></name></person-group>. Stock price prediction via discovering multi-frequency trading patterns. In <italic>Proceedings of the 23rd ACM SIGKDD international conference on knowledge discovery and data mining</italic> (<year>2017</year>) (pp. 2141&#x2013;2149).</citation>
</ref>
<ref id="ref28">
<label>28.</label>
<citation citation-type="other"><person-group person-group-type="author"><name><surname>Ren</surname> <given-names>H</given-names></name> <name><surname>Xu</surname> <given-names>B</given-names></name> <name><surname>Wang</surname> <given-names>Y</given-names></name> <name><surname>Yi</surname> <given-names>C</given-names></name> <name><surname>Huang</surname> <given-names>C</given-names></name> <name><surname>Kou</surname> <given-names>X</given-names></name> <etal/></person-group>. Time-series anomaly detection service at microsoft. In <italic>Proceedings of the 25th ACM SIGKDD international conference on knowledge discovery &#x0026; data mining</italic> (<year>2019</year>) (pp. 3009&#x2013;3017).</citation>
</ref>
<ref id="ref29">
<label>29.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kipf</surname> <given-names>TN</given-names></name> <name><surname>Welling</surname> <given-names>M</given-names></name></person-group>. <article-title>Semi-supervised classification with graph convolutional networks</article-title>. <source>arXiv preprint arXiv</source>. (<year>2016</year>) <volume>1609</volume>:<fpage>02907</fpage>. doi: <pub-id pub-id-type="doi">10.1145/3292500.3330680</pub-id></citation>
</ref>
<ref id="ref30">
<label>30.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Krizhevsky</surname> <given-names>A</given-names></name> <name><surname>Sutskever</surname> <given-names>I</given-names></name> <name><surname>Hinton</surname> <given-names>GE</given-names></name></person-group>. <article-title>ImageNet classification with deep convolutional neural networks</article-title>. <source>Communications of the ACM</source>. (<year>2017</year>) <volume>60</volume>:<fpage>84</fpage>&#x2013;<lpage>90</lpage>. doi: <pub-id pub-id-type="doi">10.1145/3065386</pub-id></citation>
</ref>
<ref id="ref31">
<label>31.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>D</given-names></name> <name><surname>Li</surname> <given-names>S</given-names></name> <name><surname>Peng</surname> <given-names>Z</given-names></name> <name><surname>Wang</surname> <given-names>P</given-names></name> <name><surname>Wang</surname> <given-names>J</given-names></name> <name><surname>Yang</surname> <given-names>H</given-names></name></person-group>. <article-title>MF-CNN: traffic flow prediction using convolutional neural network and multi-features fusion</article-title>. <source>IEICE Trans Inf Syst</source>. (<year>2019</year>) <volume>E102.D</volume>:<fpage>1526</fpage>&#x2013;<lpage>36</lpage>. doi: <pub-id pub-id-type="doi">10.1587/transinf.2018EDP7330</pub-id></citation>
</ref>
<ref id="ref32">
<label>32.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yu</surname> <given-names>H</given-names></name> <name><surname>Wu</surname> <given-names>Z</given-names></name> <name><surname>Wang</surname> <given-names>S</given-names></name> <name><surname>Wang</surname> <given-names>Y</given-names></name> <name><surname>Ma</surname> <given-names>X</given-names></name></person-group>. <article-title>Spatiotemporal recurrent convolutional networks for traffic prediction in transportation networks</article-title>. <source>Sensors</source>. (<year>2017</year>) <volume>17</volume>:<fpage>1501</fpage>. doi: <pub-id pub-id-type="doi">10.3390/s17071501</pub-id>, PMID: <pub-id pub-id-type="pmid">28672867</pub-id></citation>
</ref>
<ref id="ref33">
<label>33.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lv</surname> <given-names>Z</given-names></name> <name><surname>Xu</surname> <given-names>J</given-names></name> <name><surname>Zheng</surname> <given-names>K</given-names></name> <name><surname>Yin</surname> <given-names>H</given-names></name> <name><surname>Zhao</surname> <given-names>P</given-names></name> <name><surname>Zhou</surname> <given-names>X</given-names></name></person-group>. <article-title>Lc-rnn: a deep learning model for traffic speed prediction</article-title>. <source>IJCAI</source>. (<year>2018</year>) <volume>2018</volume>:<fpage>27</fpage>. doi: <pub-id pub-id-type="doi">10.24963/ijcai.2018/482</pub-id></citation>
</ref>
<ref id="ref34">
<label>34.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ma</surname> <given-names>X</given-names></name> <name><surname>Dai</surname> <given-names>Z</given-names></name> <name><surname>He</surname> <given-names>Z</given-names></name> <name><surname>Ma</surname> <given-names>J</given-names></name> <name><surname>Wang</surname> <given-names>Y</given-names></name> <name><surname>Wang</surname> <given-names>Y</given-names></name></person-group>. <article-title>Learning traffic as images: a deep convolutional neural network for large-scale transportation network speed prediction</article-title>. <source>Sensors</source>. (<year>2017</year>) <volume>17</volume>:<fpage>818</fpage>. doi: <pub-id pub-id-type="doi">10.3390/s17040818</pub-id>, PMID: <pub-id pub-id-type="pmid">28394270</pub-id></citation>
</ref>
<ref id="ref35">
<label>35.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wu</surname> <given-names>Z</given-names></name> <name><surname>Pan</surname> <given-names>S</given-names></name> <name><surname>Long</surname> <given-names>G</given-names></name> <name><surname>Jiang</surname> <given-names>J</given-names></name> <name><surname>Zhang</surname> <given-names>C</given-names></name></person-group>. <article-title>Graph wavenet for deep spatial-temporal graph modeling</article-title>. <source>arXiv preprint arXiv</source>. (<year>2019</year>) <volume>1906</volume>:<fpage>00121</fpage>. doi: <pub-id pub-id-type="doi">10.48550/arXiv.1906.00121</pub-id></citation>
</ref>
<ref id="ref36">
<label>36.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Huang</surname> <given-names>L</given-names></name> <name><surname>Mao</surname> <given-names>F</given-names></name> <name><surname>Zhang</surname> <given-names>K</given-names></name> <name><surname>Li</surname> <given-names>Z</given-names></name></person-group>. <article-title>Spatial-temporal convolutional transformer network for multivariate time series forecasting</article-title>. <source>Sensors</source>. (<year>2022</year>) <volume>22</volume>:<fpage>841</fpage>. doi: <pub-id pub-id-type="doi">10.3390/s22030841</pub-id>, PMID: <pub-id pub-id-type="pmid">35161585</pub-id></citation>
</ref>
<ref id="ref37">
<label>37.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Luo</surname> <given-names>D</given-names></name> <name><surname>Chen</surname> <given-names>W</given-names></name> <name><surname>Fang</surname> <given-names>J</given-names></name> <name><surname>Liu</surname> <given-names>J</given-names></name> <name><surname>Yang</surname> <given-names>J</given-names></name> <name><surname>Zhang</surname> <given-names>K</given-names></name></person-group>. <article-title>GRU-AGCN model for the content prediction of gases in power transformer oil</article-title>. <source>Front Energy Res</source>. (<year>2023</year>) <volume>11</volume>:<fpage>1135330</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fenrg.2023.1135330</pub-id></citation>
</ref>
<ref id="ref38">
<label>38.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>J</given-names></name> <name><surname>Zheng</surname> <given-names>Y</given-names></name> <name><surname>Qi</surname> <given-names>D</given-names></name> <name><surname>Li</surname> <given-names>R</given-names></name> <name><surname>Yi</surname> <given-names>X</given-names></name> <name><surname>Li</surname> <given-names>T</given-names></name></person-group>. <article-title>Predicting citywide crowd flows using deep spatio-temporal residual networks</article-title>. <source>Artif Intell</source>. (<year>2018</year>) <volume>259</volume>:<fpage>147</fpage>&#x2013;<lpage>66</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.artint.2018.03.002</pub-id></citation>
</ref>
<ref id="ref39">
<label>39.</label>
<citation citation-type="other"><person-group person-group-type="author"><name><surname>Song</surname> <given-names>C</given-names></name> <name><surname>Lin</surname> <given-names>Y</given-names></name> <name><surname>Guo</surname> <given-names>S</given-names></name> <name><surname>Wan</surname> <given-names>H</given-names></name></person-group>. Spatial-temporal synchronous graph convolutional networks: a new framework for spatial-temporal network data forecasting. In <italic>Proceedings of the AAAI conference on artificial intelligence</italic> (<year>2020</year>) (Vol. 34, pp. 914&#x2013;921).</citation>
</ref>
<ref id="ref40">
<label>40.</label>
<citation citation-type="other"><person-group person-group-type="author"><name><surname>Guo</surname> <given-names>S</given-names></name> <name><surname>Lin</surname> <given-names>Y</given-names></name> <name><surname>Feng</surname> <given-names>N</given-names></name> <name><surname>Song</surname> <given-names>C</given-names></name> <name><surname>Wan</surname> <given-names>H</given-names></name></person-group>. Attention based spatial-temporal graph convolutional networks for traffic flow forecasting. In <italic>Proceedings of the AAAI conference on artificial intelligence</italic> (<year>2019</year>) (Vol. 33, pp. 922&#x2013;929).</citation>
</ref>
<ref id="ref41">
<label>41.</label>
<citation citation-type="other"><person-group person-group-type="author"><name><surname>Wu</surname> <given-names>Z</given-names></name> <name><surname>Pan</surname> <given-names>S</given-names></name> <name><surname>Long</surname> <given-names>G</given-names></name> <name><surname>Jiang</surname> <given-names>J</given-names></name> <name><surname>Chang</surname> <given-names>X</given-names></name> <name><surname>Zhang</surname> <given-names>C</given-names></name></person-group>. Connecting the dots: multivariate time series forecasting with graph neural networks. In <italic>Proceedings of the 26th ACM SIGKDD international conference on knowledge discovery &#x0026; data mining</italic> (<year>2020</year>) (pp. 753&#x2013;763).</citation>
</ref>
<ref id="ref42">
<label>42.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cao</surname> <given-names>D</given-names></name> <name><surname>Wang</surname> <given-names>Y</given-names></name> <name><surname>Duan</surname> <given-names>J</given-names></name> <name><surname>Zhang</surname> <given-names>C</given-names></name> <name><surname>Zhu</surname> <given-names>X</given-names></name> <name><surname>Huang</surname> <given-names>C</given-names></name> <etal/></person-group>. <article-title>Spectral temporal graph neural network for multivariate time-series forecasting</article-title>. <source>Adv Neural Inf Proces Syst</source>. (<year>2020</year>) <volume>33</volume>:<fpage>17766</fpage>&#x2013;<lpage>78</lpage>. doi: <pub-id pub-id-type="doi">10.48550/arXiv.2103.07719</pub-id></citation>
</ref>
<ref id="ref43">
<label>43.</label>
<citation citation-type="other"><person-group person-group-type="author"><name><surname>Zheng</surname> <given-names>C</given-names></name> <name><surname>Fan</surname> <given-names>X</given-names></name> <name><surname>Wang</surname> <given-names>C</given-names></name> <name><surname>Qi</surname> <given-names>J</given-names></name></person-group>. Gman: a graph multi-attention network for traffic prediction. In <italic>Proceedings of the AAAI conference on artificial intelligence</italic> (<year>2020</year>) (Vol. 34, pp. 1234&#x2013;1241).</citation>
</ref>
<ref id="ref44">
<label>44.</label>
<citation citation-type="other"><person-group person-group-type="author"><name><surname>Glorot</surname> <given-names>X</given-names></name> <name><surname>Bengio</surname> <given-names>Y</given-names></name></person-group>. Understanding the difficulty of training deep feedforward neural networks. In <italic>Proceedings of the thirteenth international conference on artificial intelligence and statistics</italic> (<year>2010</year>) (pp. 249&#x2013;256). <italic>JMLR Workshop and Conference Proceedings</italic>.</citation>
</ref>
<ref id="ref45">
<label>45.</label>
<citation citation-type="other"><person-group person-group-type="author"><collab id="coll1">COVID - Coronavirus Statistics</collab></person-group>. Worldometer. (<year>2023</year>). Available from: <ext-link xlink:href="https://www.worldometers.info/coronavirus/" ext-link-type="uri">https://www.worldometers.info/coronavirus/</ext-link></citation>
</ref>
<ref id="ref46">
<label>46.</label>
<citation citation-type="other"><person-group person-group-type="author"><collab id="coll2">Kaggle</collab></person-group>: Your Machine Learning and Data Science Community. (<year>2023</year>). Available from: <ext-link xlink:href="https://www.kaggle.com/" ext-link-type="uri">https://www.kaggle.com/</ext-link></citation>
</ref>
<ref id="ref47">
<label>47.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Anggraini</surname> <given-names>P</given-names></name> <name><surname>Amin</surname> <given-names>M</given-names></name> <name><surname>Marpaung</surname> <given-names>N</given-names></name></person-group>. <article-title>Comparison of weighted moving average method with double exponential smoothing in estimating production of oil palm fruit</article-title>. <source>Building of Informatics, Technology and Science (BITS)</source>. (<year>2022</year>) <volume>4</volume>:<fpage>705</fpage>&#x2013;<lpage>22</lpage>. doi: <pub-id pub-id-type="doi">10.47065/bits.v4i2.2066</pub-id></citation>
</ref>
<ref id="ref48">
<label>48.</label>
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>XD</given-names></name> <name><surname>Wang</surname> <given-names>W</given-names></name> <name><surname>Yang</surname> <given-names>Y</given-names></name> <name><surname>Hou</surname> <given-names>BH</given-names></name> <name><surname>Olasehinde</surname> <given-names>TS</given-names></name> <name><surname>Feng</surname> <given-names>N</given-names></name> <etal/></person-group>. <article-title>Nesting the SIRV model with NAR, LSTM and statistical methods to fit and predict COVID-19 epidemic trend in Africa</article-title>. <source>BMC Public Health</source>. (<year>2023</year>) <volume>23</volume>:<fpage>138</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s12889-023-14992-6</pub-id>, PMID: <pub-id pub-id-type="pmid">36658494</pub-id></citation>
</ref>
</ref-list>
<glossary>
<def-list>
<title>Glossary</title>
<def-item><term>AMAE</term><def><p>Average Mean Absolute Error</p></def></def-item>
<def-item><term>ARIMA</term><def><p>Autoregressive Integrated Moving Average</p></def></def-item>
<def-item><term>ARMSE</term><def><p>Average Root Mean Square Error</p></def></def-item>
<def-item><term>AR</term><def><p>Autoregressive</p></def></def-item>
<def-item><term>CNN</term><def><p>Convolutional Neural Network</p></def></def-item>
<def-item><term>DFT</term><def><p>Discrete Fourier Transform</p></def></def-item>
<def-item><term>DNN</term><def><p>Deep Neural Network</p></def></def-item>
<def-item><term>GFT</term><def><p>Graph Fourier Transform</p></def></def-item>
<def-item><term>GLU</term><def><p>Gated Linear Unit</p></def></def-item>
<def-item><term>GNN</term><def><p>Graph Neural Network</p></def></def-item>
<def-item><term>GRU</term><def><p>Gated Recurrent Unit</p></def></def-item>
<def-item><term>HA</term><def><p>Historical Average</p></def></def-item>
<def-item><term>IDFT</term><def><p>Inverse Discrete Fourier Transform</p></def></def-item>
<def-item><term>IGFT</term><def><p>Inverse Discrete Fourier Transform</p></def></def-item>
<def-item><term>LSTM</term><def><p>Long Short-Term Memory</p></def></def-item>
<def-item><term>MAE</term><def><p>Mean Absolute Error</p></def></def-item>
<def-item><term>RMSE</term><def><p>Root Mean Square Error</p></def></def-item>
<def-item><term>RNN</term><def><p>Recurrent Neural Network</p></def></def-item>
<def-item><term>SMA</term><def><p>Simple Moving Average</p></def></def-item>
<def-item><term>VAR</term><def><p>Vector Autoregressive Model</p></def></def-item>
<def-item><term>WMA</term><def><p>Weighted Moving Average</p></def></def-item>
</def-list>
</glossary>
</back>
</article>