<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Astron. Space Sci.</journal-id>
<journal-title>Frontiers in Astronomy and Space Sciences</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Astron. Space Sci.</abbrev-journal-title>
<issn pub-type="epub">2296-987X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1082737</article-id>
<article-id pub-id-type="doi">10.3389/fspas.2023.1082737</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Astronomy and Space Sciences</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>A machine learning-based model for the next 3-day geomagnetic index (Kp) forecast</article-title>
<alt-title alt-title-type="left-running-head">Wang et&#xa0;al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fspas.2023.1082737">10.3389/fspas.2023.1082737</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Wang</surname>
<given-names>Jingjing</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1893438/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Luo</surname>
<given-names>Bingxian</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Liu</surname>
<given-names>Siqing</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Shi</surname>
<given-names>Liqin</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>State Key Laboratory of Space Weather</institution>, <institution>National Space Science Center</institution>, <institution>Chinese Academy of Sciences</institution>, <addr-line>Beijing</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Key Laboratory of Science and Technology on Environmental Space Situation Awareness</institution>, <institution>Chinese Academy of Sciences</institution>, <addr-line>Beijing</addr-line>, <country>China</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>University of Chinese Academy of Sciences</institution>, <addr-line>Beijing</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1667254/overview">Hua-Liang Wei</ext-link>, The University of Sheffield, United Kingdom</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1599656/overview">Sampad Kumar Panda</ext-link>, K L University, India</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2118547/overview">Ezequiel Echer</ext-link>, National Institute of Space Research (INPE), Brazil</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Jingjing Wang, <email>wangjingjing@nssc.ac.cn</email>
</corresp>
<fn fn-type="other">
<p>This article was submitted to Space Physics, a section of the journal Frontiers in Astronomy and Space Sciences</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>15</day>
<month>02</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2023</year>
</pub-date>
<volume>10</volume>
<elocation-id>1082737</elocation-id>
<history>
<date date-type="received">
<day>28</day>
<month>10</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>31</day>
<month>01</month>
<year>2023</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2023 Wang, Luo, Liu and Shi.</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Wang, Luo, Liu and Shi</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>The 3-day Kp forecast product is important and necessary for space weather forecasts. There is some essential information that can be obtained from the 3-day Kp forecast product, such as the start time of the geomagnetic storm, the maximum storm level, and the storm duration. In this study, we aimed to predict the next 3-day Kp index based on the previous Kp time series and SDO/AIA 193&#xa0;&#xc5; images. We prepared datasets from May 2010 to December 2019 for training and datasets from January 2020 to October 2022 for testing. The similarity parameters of the previous and current geomagnetic conditions between the samples are calculated and analyzed. We assumed that the paired samples with high-similarity parameters of the previous and current geomagnetic conditions would also have high-similarity parameters of the next 3-day geomagnetic conditions. Based on the assumption, we selected the three best similarity parameters through the feature selection process and adopted the scalable tree boosting system (XGBoost) to develop a prediction model. It took the similarity parameters of the previous and current geomagnetic conditions as input and provided the best match sample from the training subset as a forecast. For the next 3-day non-storm (maximum Kp <inline-formula id="inf1">
<mml:math id="m1">
<mml:mo>&#x3c;</mml:mo>
</mml:math>
</inline-formula> 5) prediction period, our model reached an F1-score of 0.96. For the next 3-day storm (maximum Kp &#x2265; 5) prediction period, our model reached an F1-score of 0.82, a recall of 0.70, and a precision of 0.98.</p>
</abstract>
<kwd-group>
<kwd>geomagnetic index</kwd>
<kwd>geomagnetic storm</kwd>
<kwd>space weather</kwd>
<kwd>machine learning</kwd>
<kwd>geomagnetic activity</kwd>
</kwd-group>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>A geomagnetic storm is the consequence of solar wind disturbances originating from the Sun and impacting on geospace (<xref ref-type="bibr" rid="B9">Gonzalez&#xa0;et&#xa0;al., 1994</xref>; <xref ref-type="bibr" rid="B19">Perreault and Akasofu, 1978</xref>). The solar and interplanetary sources of the geomagnetic storms are related to coronal mass ejections [CMEs; (<xref ref-type="bibr" rid="B11">Gosling&#xa0;et&#xa0;al., 1991</xref>; <xref ref-type="bibr" rid="B28">Webb&#xa0;et&#xa0;al., 2000</xref>; <xref ref-type="bibr" rid="B6">Chen, 2011</xref>; <xref ref-type="bibr" rid="B29">Webb and Howard, 2012</xref>);] and the coronal hole high-speed stream (<xref ref-type="bibr" rid="B26">Tsurutani&#xa0;et&#xa0;al., 1995</xref>; <xref ref-type="bibr" rid="B10">Gonzalez&#xa0;et&#xa0;al., 1999</xref>; <xref ref-type="bibr" rid="B25">Tsurutani&#xa0;et&#xa0;al., 2006</xref>; <xref ref-type="bibr" rid="B31">Zhang&#xa0;et&#xa0;al., 2007</xref>). Geomagnetic storms can last from hours to days and sometimes lead to space weather effects, for example, the sudden enhancement of the electric currents in the magnetosphere and ionosphere, the severe changes of the relativistic electron fluxes in the Van Allen radiation belts, and the density enhancement in the upper atmosphere (<xref ref-type="bibr" rid="B22">Ritter&#xa0;et&#xa0;al., 2010</xref>; <xref ref-type="bibr" rid="B14">Mansilla, 2011</xref>; <xref ref-type="bibr" rid="B30">Xiong&#xa0;et&#xa0;al., 2015</xref>; <xref ref-type="bibr" rid="B32">Zhang&#xa0;et&#xa0;al., 2019</xref>).</p>
<p>Magnetic activity indices were designed to describe variations in the geomagnetic field, including the Kp index, ap index, and Ap index (<xref ref-type="bibr" rid="B16">Mayaud, 1980</xref>; <xref ref-type="bibr" rid="B17">Menvielle and Berthelier, 1991</xref>; <xref ref-type="bibr" rid="B2">Bartels, 2013a</xref>; <xref ref-type="bibr" rid="B3">Bartels, 2013b</xref>; <xref ref-type="bibr" rid="B32">Zhang&#xa0;et&#xa0;al., 2019</xref>). The planetary 3-hour-range Kp index ranging from 0 to 9 is calculated from 13 geomagnetic observatories between 44&#xb0; and 60&#xb0; northern or southern geomagnetic latitude. The 3-hour-range ap index is the equivalent range of the Kp index. The 1-day-range Ap index is calculated from an 8-point Ap index per day. Furthermore, Kp is a good representative for geomagnetic activity and is used to classify the geomagnetic conditions into categories (for example, minor storm, moderate, and major storm) in the Space Weather Prediction Center (SWPC) facilitated at the National Oceanic and Atmospheric Administration (NOAA) and the Space Environment Prediction Center (SEPC) facilitated at the National Space Science Center and the Chinese Academy of Sciences.</p>
<p>The Kp index and Ap index are also important inputs for physical-based geospace models, such as magnetosphere and plasmasphere models, and thermosphere and ionosphere models (<xref ref-type="bibr" rid="B15">Matzka&#xa0;et&#xa0;al., 2021</xref>). Compared with the 1-day-range Ap index, the 3-hour-range Kp index can provide more refined information on the geomagnetic activity, such as the start time of the geomagnetic storm, the maximum storm level, and the duration of the storm. Therefore, Kp index prediction is very important and necessary for space weather forecasts.</p>
<p>There are many models developed to predict geomagnetic activity on multiple time scales of hours to years based on statistical and machine learning methods (<xref ref-type="bibr" rid="B8">Feynman and Gu, 1986</xref>; <xref ref-type="bibr" rid="B1">Bala and Reiff, 2012</xref>; <xref ref-type="bibr" rid="B27">Wang&#xa0;et&#xa0;al., 2015</xref>; <xref ref-type="bibr" rid="B13">Luo&#xa0;et&#xa0;al., 2017</xref>; <xref ref-type="bibr" rid="B24">Tan&#xa0;et&#xa0;al., 2018</xref>; <xref ref-type="bibr" rid="B23">Shprits&#xa0;et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B33">Zhelavskaya&#xa0;et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B5">Chakraborty and Morley, 2020</xref>). However, only a few have been applied to the operational space weather forecast. At present, SWPC routinely provides products of a 45-day Ap index forecast<xref ref-type="fn" rid="fn1">
<sup>1</sup>
</xref>, 27-day Ap index and the largest Kp index forecast<xref ref-type="fn" rid="fn2">
<sup>2</sup>
</xref>, and 3-day Kp index forecast<xref ref-type="fn" rid="fn3">
<sup>3</sup>
</xref>. SEPC routinely provides products of a 27-day Ap index forecast<xref ref-type="fn" rid="fn4">
<sup>4</sup>
</xref> and 3-h Kp index forecast in less than 3&#xa0;h advance<xref ref-type="fn" rid="fn5">
<sup>5</sup>
</xref>. So far, there is no public algorithm designed for a 3-day Kp index forecast, and the conventional product of a 3-day Kp index forecast is only provided by SWPC.</p>
<p>The 3-day Kp index forecast is an essential and basic product in space weather forecasts. It contains 24 points in a 3-day period and describes the geomagnetic conditions with a 3-hour-range resolution. Considering the increasing demand for space weather forecasts, it is still important and necessary for the space weather community to develop prediction models that can provide the 3-day Kp index forecast product.</p>
<p>In this study, we aim to develop a model for the next 3-day Kp index time-series prediction. Here, we introduce the data preparation in <xref ref-type="sec" rid="s2">Section&#xa0;2</xref>, then develop a classification model based on machine-learning algorithms, and conduct prediction error analysis in <xref ref-type="sec" rid="s3">Section&#xa0;3</xref>. The conclusion and discussion are presented in <xref ref-type="sec" rid="s4">Section&#xa0;4</xref>.</p>
</sec>
<sec id="s2">
<title>2 Data and methodology</title>
<sec id="s2-1">
<title>2.1 Data preparation</title>
<p>In this study, we used two kinds of data from May 2010 to October 2022, including the 3-hour Kp index from the National Oceanic and Atmospheric Administration (NOAA) and the 193&#xc5; wavelength images measured at the SDO/AIA (<xref ref-type="bibr" rid="B18">O&#x2019;Dwyer&#xa0;et&#xa0;al., 2010</xref>; <xref ref-type="bibr" rid="B12">Lemen&#xa0;et&#xa0;al., 2012</xref>; <xref ref-type="bibr" rid="B20">Pesnell&#xa0;et&#xa0;al., 2012</xref>). They were divided into the training subset (including data from May 2010 to December 2019) and the testing subset (including data from January 2020 to October 2022). A prediction model was trained by the training subset and tested by the independent testing subset.</p>
<p>It should be mentioned that we only focus on the background solar wind (including the coronal hole high-speed streams and co-rotating interaction regions) in this study. Therefore, we removed the time periods when the geomagnetic conditions are affected by interplanetary coronal mass ejections (ICMEs), according to the near-Earth ICMEs list (<xref ref-type="bibr" rid="B4">Cane and Richardson, 2003</xref>; <xref ref-type="bibr" rid="B21">Richardson and Cane, 2010</xref>). For each day, we selected two samples according to the timepoints at 0:00 UTC and 12:00 UTC. Thus, we obtained 6,042 samples from the training subset and 1853 samples from the testing subset.</p>
<p>As shown in the flow chart in <xref ref-type="fig" rid="F1">Figure&#xa0;1</xref>, we prepared the 54-day Kp index time series before the current timepoint (<italic>T</italic>), and the AIA 193&#xc5; image was measured at <italic>T</italic> as inputs and we took the 3-day Kp index time series after <italic>T</italic> as outputs. <xref ref-type="fig" rid="F2">Figure&#xa0;2</xref> shows how to divide the Kp time series into the previous 54-day Kp input and the next 3-day Kp output for the current <italic>T</italic> sample. Take the sample at 00:00 UTC on 22 Oct 2019 as an example, we took the Kp time series (432 points) from 00:00 UTC on August 29 to 00:00 UTC on October 22 as the input and the Kp time series (24 points) from 00:00 UTC on October 22 to 00:00 UTC on October 25 as the output. <xref ref-type="fig" rid="F3">Figure&#xa0;3</xref> shows how to prepare the AIA 193&#xc5; image measured at <italic>T</italic> as input for the current <italic>T</italic> sample. Take the sample at 00:00 UTC on 22 Oct 2019 as an example, the observed image is shown in the top left panel. Then, a slice covering the [<italic>S</italic>40, <italic>N</italic>40] and [<italic>E</italic>40, <italic>W</italic>40] areas was cut from the observed image and shown in the top right panel. A median value was calculated based on the previous monthly slices. After all values above the median value in the slice at current <italic>T</italic> were replaced by 0, we obtained a slice as shown in the bottom left panel referring to the properties of coronal holes. The slice in the bottom left panel was then resized into a smaller size of 32 &#xd7; 32 pixels and used as one of the inputs.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Flowchart of this study.</p>
</caption>
<graphic xlink:href="fspas-10-1082737-g001.tif"/>
</fig>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Illustration of preparing the 3-h Kp index time series for the current T at 00:00 UTC on 22 Oct 2019. The 3-h Kp index is represented by a blue (red) bar if it is related to the non-storm (geomagnetic storm) period. The black vertical line refers to the current time point T. The left side of the line refers to the previous 54-day Kp index time series (one of the model inputs). The right side of the line refers to the next 3-day Kp index time series (the model outputs).</p>
</caption>
<graphic xlink:href="fspas-10-1082737-g002.tif"/>
</fig>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Illustration of preparing the AIA 193&#xc5; images. The upper left shows the 193&#xc5; images at 00:00:04 UTC on 22 Oct 2019 measured using SDO/AIA. The upper right shows the cut slice covering the [<italic>S</italic>40, <italic>N</italic>40] and [<italic>E</italic>40, <italic>W</italic>40] areas. After all values above the median in the slice are replaced by 0, the pre-processed slice referring to the coronal holes is shown in the bottom left.</p>
</caption>
<graphic xlink:href="fspas-10-1082737-g003.tif"/>
</fig>
<p>To compare the difference between the training and testing subsets, we calculated the maximum value of the 3-day Kp output and took it as a representative for each sample. Then, we draw the sample distribution histogram in <xref ref-type="fig" rid="F4">Figure&#xa0;4</xref>. The blue and red bars represent the training and testing subsets, respectively. It was found that the maximum Kp of the samples in the two subsets had similar distribution properties. There were 1,614 (26.7%) and 390 (21.0%) storm samples (maximum Kp &#x2265; 5) in the training and testing subsets, respectively.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Sample distribution histogram at the training and testing subsets. The <italic>x</italic>-axis represents the maximum value of the 3-day Kp output for each sample. The <italic>y</italic>-axis represents the sample number.</p>
</caption>
<graphic xlink:href="fspas-10-1082737-g004.tif"/>
</fig>
</sec>
<sec id="s2-2">
<title>2.2 Similarity calculation and labeling of sample pairs</title>
<p>Two samples at timepoints <italic>T</italic>
<sub>
<italic>i</italic>
</sub> and <italic>T</italic>
<sub>
<italic>j</italic>
</sub> from the training subset were called a pair (where the timepoint <italic>T</italic>
<sub>
<italic>i</italic>
</sub> was more than 3 days farther from <italic>T</italic>
<sub>
<italic>j</italic>
</sub>). For each sample at timepoint <italic>T</italic>
<sub>
<italic>i</italic>
</sub>, we picked up more than 5,000 other samples from the training subset to form sample pairs with it. All the samples that were within a 2-month interval of the current timepoint were excluded. As a result, we obtained more than 25 million sample pairs from the training subset.</p>
<p>We assume that if the inputs of two samples in a pair have high similarity (that is, their previous and current geomagnetic conditions are similar), their geomagnetic conditions in the next 3 days should be similar too. In this case, the two samples in the pair are similar to each other so that one sample can be a proper representative of the other one. In this study, the similarity parameters of the pair inputs were calculated and used to develop a model for the 3-day Kp forecast based on this assumption.</p>
<p>
<xref ref-type="fig" rid="F1">Figure&#xa0;1</xref> shows that based on the 54-day Kp time-series inputs, we derived 11 similarity parameters for each pair. The first two parameters were the mean absolute error (<italic>MAE</italic>
<sub>
<italic>Kp</italic>
</sub>) and the root mean square error (<italic>RMSE</italic>
<sub>
<italic>Kp</italic>
</sub>). They were calculated by the following formulas, where <italic>x</italic> and <italic>y</italic> represent the inputs of a pair at timepoints <italic>T</italic>
<sub>
<italic>i</italic>
</sub> and <italic>T</italic>
<sub>
<italic>j</italic>
</sub>, respectively:<disp-formula id="e1">
<mml:math id="m2">
<mml:mi>M</mml:mi>
<mml:mi>A</mml:mi>
<mml:mi>E</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:munderover accentunder="false" accent="true">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:mfenced open="&#x7c;" close="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:math>
<label>(1)</label>
</disp-formula>
<disp-formula id="e2">
<mml:math id="m3">
<mml:mi>R</mml:mi>
<mml:mi>M</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>E</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:msqrt>
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:munderover accentunder="false" accent="true">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msqrt>
<mml:mo>.</mml:mo>
</mml:math>
<label>(2)</label>
</disp-formula>
</p>
<p>The next two parameters were the maximum absolute error (<italic>MaxDiff</italic>
<sub>
<italic>Kp</italic>
</sub>) and the sum of the absolute error for the storm (Kp &#x2265; 5) period (<italic>SDiff</italic>
<sub>
<italic>Kp</italic>
</sub>). Also, four spatial distances were calculated, namely, the Euclidean distance (<italic>D</italic>&#xa0;(<italic>Euclidean</italic>)<sub>
<italic>Kp</italic>
</sub>), cosine distance (<italic>D</italic>&#xa0;(<italic>Cosine</italic>)<sub>
<italic>Kp</italic>
</sub>), correlation (<italic>D</italic>&#xa0;(<italic>Correlation</italic>)<sub>
<italic>Kp</italic>
</sub>), and Hamming distance (<italic>D</italic>&#xa0;(<italic>Hamming</italic>)<sub>
<italic>Kp</italic>
</sub>), by the following formulas and were taken as similarity parameters. The SciPy package<xref ref-type="fn" rid="fn6">
<sup>6</sup>
</xref> was used to calculate these spatial distances.<disp-formula id="e3">
<mml:math id="m4">
<mml:mi>D</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>d</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:msqrt>
<mml:mrow>
<mml:munderover accentunder="false" accent="true">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msqrt>
</mml:math>
<label>(3)</label>
</disp-formula>
<disp-formula id="e4">
<mml:math id="m5">
<mml:mi>D</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>&#x22c5;</mml:mo>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mfenced open="&#x2016;" close="&#x2016;">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mfenced open="&#x2016;" close="&#x2016;">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:math>
<label>(4)</label>
</disp-formula>
<disp-formula id="e5">
<mml:math id="m6">
<mml:mi>D</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x22c5;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>y</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mfenced open="&#x2016;" close="&#x2016;">
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mfenced open="&#x2016;" close="&#x2016;">
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>y</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>.</mml:mo>
</mml:math>
<label>(5)</label>
</disp-formula>
</p>
<p>We also derived three features (<italic>Feature</italic>
<sub>
<italic>KPCA(linear)</italic>
</sub>, <italic>Feature</italic>
<sub>
<italic>KPCA(rbf)</italic>
</sub>, <italic>Feature</italic>
<sub>
<italic>KPCA(cosine)</italic>
</sub>) from each Kp time-series input using the kernel principle component analysis (KPCA) algorithm with the &#x201c;linear,&#x201d; &#x201c;rbf,&#x201d; and &#x201c;cosine&#x201d; kernel using the Scikit-learn package<xref ref-type="fn" rid="fn7">
<sup>7</sup>
</xref>. Through the KPCA algorithm, a Kp time-series input can be represented by a number (the feature). So, for each pair, we obtained two numbers (<italic>Feature</italic>
<sub>
<italic>x</italic>, <italic>KPCA</italic>(<italic>kernel</italic>)</sub> and <italic>Feature</italic>
<sub>
<italic>y</italic>, <italic>KPCA</italic>(<italic>kernel</italic>)</sub>) representing the Kp time-series pair inputs (<italic>x</italic> and <italic>y</italic>). Then, we calculated the absolute error of the two numbers and took it as the similarity parameter for the pair. In this way, we obtain three similarity parameters, namely, <italic>F</italic>&#xa0;(<italic>linear</italic>)<sub>
<italic>Kp</italic>
</sub>, <italic>F</italic>&#xa0;(<italic>rbf</italic>)<sub>
<italic>Kp</italic>
</sub>, and <italic>F</italic>&#xa0;(<italic>cosine</italic>)<sub>
<italic>Kp</italic>
</sub>, by the following formula:<disp-formula id="e6">
<mml:math id="m7">
<mml:mtable class="align" columnalign="left">
<mml:mtr>
<mml:mtd columnalign="right">
<mml:mspace width="-1em"/>
<mml:mspace width="-1em"/>
<mml:mi>F</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mo>&#x3d;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>r</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>K</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>A</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msub>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="right"/>
<mml:mtd columnalign="left">
<mml:mspace width="-.2em"/>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>r</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>e</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>y</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>K</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>A</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>l</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mfenced open="{" close="}">
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>r</mml:mi>
<mml:mi>g</mml:mi>
<mml:mi>b</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>.</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:math>
<label>(6)</label>
</disp-formula>
</p>
<p>
<xref ref-type="fig" rid="F1">Figure&#xa0;1</xref> shows that based on the coronal holes image inputs, we derived another seven similarity parameters for each pair. The first two parameters were the mean absolute error (<italic>MAE</italic>
<sub>
<italic>CH</italic>
</sub>) and root mean square error (<italic>RMSE</italic>
<sub>
<italic>CH</italic>
</sub>), and were calculated by <xref ref-type="disp-formula" rid="e1">Formulas&#xa0;1</xref> and <xref ref-type="disp-formula" rid="e2">2</xref>. The next two were the widely used image similarity parameters, such as structural index similarity (<italic>SSIM</italic>
<sub>
<italic>CH</italic>
</sub>) and peak signal-to-noise ratio (<italic>PSNR</italic>
<sub>
<italic>CH</italic>
</sub>), which were calculated using the Scikit-image package<xref ref-type="fn" rid="fn8">
<sup>8</sup>
</xref>. Then, in a similar way, we calculated the absolute error of the KPCA features derived by the image pair inputs using <xref ref-type="disp-formula" rid="e6">Formula&#xa0;6</xref> and took them as the similarity parameters, such as <italic>F</italic>&#xa0;(<italic>linear</italic>)<sub>
<italic>CH</italic>
</sub>, <italic>F</italic>&#xa0;(<italic>rbf</italic>)<sub>
<italic>CH</italic>
</sub>, and <italic>F</italic>&#xa0;(<italic>cosine</italic>)<sub>
<italic>CH</italic>
</sub>.</p>
<p>Then, we obtained 18 similarity parameters from the pair inputs in the training subsets. Except for the correlation (<italic>D</italic>&#xa0;(<italic>Correlation</italic>)<sub>
<italic>Kp</italic>
</sub>), structural index similarity (<italic>SSIM</italic>
<sub>
<italic>CH</italic>
</sub>), and peak signal-to-noise ratio (<italic>PSNR</italic>
<sub>
<italic>CH</italic>
</sub>), the larger the other 15 parameters, the higher similarity the pair inputs reach. Hence, it is the opposite for the correlation (<italic>D</italic>&#xa0;(<italic>Correlation</italic>)<sub>
<italic>Kp</italic>
</sub>), structural index similarity (<italic>SSIM</italic>
<sub>
<italic>CH</italic>
</sub>), and peak signal-to-noise ratio (<italic>PSNR</italic>
<sub>
<italic>CH</italic>
</sub>). Then, we standardized the 18 similarity parameters independently by computing the relevant statistics on the pairs in the training subsets using the Scikit-learn package.</p>
<p>As we established the pair inputs and calculated the similarity parameters, the pair outputs (3-day Kp time series) should be analyzed and labeled as a number (1 or 0). Here, we adopted a batch labeling method according to the following principles:<list list-type="simple">
<list-item>
<p>1) For each sample at timepoint <italic>T</italic>
<sub>
<italic>i</italic>
</sub>, if the next 3-day geomagnetic conditions reached the storm level (maximum value of Kp &#x2265; 5), it is a storm sample. Otherwise, it is a non-storm sample.</p>
</list-item>
<list-item>
<p>2) For each sample at timepoint <italic>T</italic>
<sub>
<italic>i</italic>
</sub>, we picked up more than 5,000 other samples and formed pairs with them (denoted by <italic>T</italic>
<sub>
<italic>i</italic>
</sub> pairs). If a pair contains one storm sample and one non-storm sample, we considered the pair as a bad match and labeled the pair output as 0.</p>
</list-item>
<list-item>
<p>3) For each sample at timepoint <italic>T</italic>
<sub>
<italic>i</italic>
</sub>, we selected the top 100 pairs with the least mean absolute errors between their outputs (the next 3-day Kp time series) from the remaining <italic>T</italic>
<sub>
<italic>i</italic>
</sub> pairs. We considered those 100 pairs as good matches and labeled their pair outputs as 1. The remaining pairs were labeled as 0.</p>
</list-item>
</list>
</p>
<p>Finally, we prepared our dataset into a standard form consisting of inputs (18 similarity parameters) and output (binary labels) that is suitable for a binary classification task.</p>
</sec>
<sec id="s2-3">
<title>2.3 Feature selection of similarity parameters</title>
<p>Feature selection is a widely used approach in the machine learning community to select the best features from the dataset in which the model can perform better with less training time. In this study, we selected the best features by comparing the Pearson&#x2019;s correlation of the 18 similarity parameters with the binary labels of the pairs using the SelectKBest from the Scikit-learn package. The correlations were standardized to scores ranging from 0 to 1, as shown in <xref ref-type="fig" rid="F5">Figure&#xa0;5</xref>.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Scores of similarity of input pairs at the training dataset by feature selection.</p>
</caption>
<graphic xlink:href="fspas-10-1082737-g005.tif"/>
</fig>
<p>It was found that there were three best features that had a significantly higher score than others. The top three best features were the Euclidean distance of the 54-day Kp time series (<italic>D</italic>&#xa0;(<italic>Euclidean</italic>)<sub>
<italic>Kp</italic>
</sub>), the Hamming distance of the 54-day Kp time series (<italic>D</italic>&#xa0;(<italic>Hamming</italic>)<sub>
<italic>Kp</italic>
</sub>), and the differences between the KPCA features of the current coronal hole images for each pair (<italic>F</italic>&#xa0;(<italic>linear</italic>)<sub>
<italic>CH</italic>
</sub>). They were selected from the dataset and used for model development, as shown in the flowchart in <xref ref-type="fig" rid="F1">Figure&#xa0;1</xref>.</p>
</sec>
</sec>
<sec id="s3">
<title>3 Model development and result analysis</title>
<sec id="s3-1">
<title>3.1 Development of a classification model for sample pairs</title>
<p>After the three best features had been selected by feature selection, we determined to develop a classification model for pairs in the training subset to predict whether a pair of two samples is a good match or not. If the pair is a good match, their geomagnetic conditions are similar to each other so that one sample can be considered as a forecast of the other.</p>
<p>The scalable tree boosting system (XGBoost) is a widely used machine learning algorithm for binary classification tasks. It implements gradient boosting which performs additive optimization in functional space and incorporates a regularized model to prevent over-fitting (<xref ref-type="bibr" rid="B7">Chen and Guestrin, 2016</xref>).</p>
<p>We applied the XGBoost algorithm to develop a classification model using the Scikit-learn package<xref ref-type="fn" rid="fn9">
<sup>9</sup>
</xref> to predict whether a pair of two samples is a good match or not. If the pair is a good match, their geomagnetic conditions are similar to each other so that one sample can be considered as a forecast of the other. Considering the &#x201c;f1-weighted&#x201d; (it is a balanced score of a standard binary classification task) as metrics, the best hyper-parameters of the XGBoost model are <italic>n</italic>_<italic>estimators</italic> &#x3d; 0.01 and max&#x2009;_<italic>depth</italic> &#x3d; 20.</p>
<p>As shown in <xref ref-type="fig" rid="F1">Figure&#xa0;1</xref>, to predict the 3-day Kp time series at <italic>T</italic>
<sub>
<italic>i</italic>
</sub>, we give a forecast of the 3-day Kp time series following the steps:<list list-type="simple">
<list-item>
<p>1) We pick up a sample at <italic>T</italic>
<sub>
<italic>j</italic>
</sub> to establish a pair and calculate the similarity parameters of the pair.</p>
</list-item>
<list-item>
<p>2) For the pair at <italic>T</italic>
<sub>
<italic>i</italic>
</sub> and <italic>T</italic>
<sub>
<italic>j</italic>
</sub>, the three similarity parameters are fed into the classification model. If the model predicts 1, the pair is a good match, whereas if the model predicts 0, it is not a good match.</p>
</list-item>
<list-item>
<p>3) If we find a good match, we take the 3-day Kp time series after <italic>T</italic>
<sub>
<italic>j</italic>
</sub> as forecast at <italic>T</italic>
<sub>
<italic>i</italic>
</sub>. Otherwise, we repeat the aforementioned steps.</p>
</list-item>
</list>
</p>
<p>We developed a prediction model using the training subset and applied the model on the testing subset from January 2020 to October 2022 and evaluated its performance. Both the geomagnetic storm prediction (maximum Kp &#x2265; 5) and non-storm (maximum Kp <inline-formula id="inf2">
<mml:math id="m8">
<mml:mo>&#x3c;</mml:mo>
</mml:math>
</inline-formula> 5) prediction are important in space weather forecasts. Thus, we will evaluate the model for the geomagnetic storm prediction (maximum Kp &#x2265; 5) as well as the non-storm (maximum Kp <inline-formula id="inf3">
<mml:math id="m9">
<mml:mo>&#x3c;</mml:mo>
</mml:math>
</inline-formula> 5) prediction tasks.</p>
</sec>
<sec id="s3-2">
<title>3.2 Evaluation metrics and model performance</title>
<p>For a binary classification task like the next 3-day geomagnetic storm (maximum Kp &#x2265; 5) prediction, the confusion matrix is shown in <xref ref-type="table" rid="T1">Table&#xa0;1</xref>. The model is evaluated by its ability to predict the next 3-day geomagnetic storm, the first day (day 1) geomagnetic storm, the second day (day 2) geomagnetic storm, and the third day (day 3) geomagnetic storm. We also evaluate the model&#x2019;s ability to predict the next 3-day non-storm conditions, the first day (day 1) of non-storm conditions, the second day (day 2) of non-storm conditions, and the third day (day 3) of non-storm conditions.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Confusion matrix for binary classification of the geomagnetic storm.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center"/>
<th colspan="2" align="center">Actual geomagnetic storm (observation)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Predicted geomagnetic storm (forecasts)</td>
<td align="center">True positive (TP)<break/>hit case</td>
<td align="center">False positive (FP)<break/>false alarm case</td>
</tr>
<tr>
<td align="left"/>
<td align="center">False negative (FN)<break/>missed case</td>
<td align="center">True negative (TN)<break/>correct non-storm case</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>There are three metrics for binary classification used in the study, including precision, recall, and F1-score. They are calculated by the following formulas:<disp-formula id="e7">
<mml:math id="m10">
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:math>
<label>(7)</label>
</disp-formula>
<disp-formula id="e8">
<mml:math id="m11">
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:math>
<label>(8)</label>
</disp-formula>
<disp-formula id="e9">
<mml:math id="m12">
<mml:mi>F</mml:mi>
<mml:mn>1</mml:mn>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:math>
<label>(9)</label>
</disp-formula>where the true positive (TP), false positive (FP), false negative (FN), and true negative (TN) are calculated following the confusion matrix, as shown in <xref ref-type="table" rid="T1">Table&#xa0;1</xref> for storm prediction and <xref ref-type="table" rid="T2">Table&#xa0;2</xref> for non-storm prediction. For example, for storm prediction, the higher the recall, the more storm samples have been correctly predicted. The higher the precision, the fewer false alarms have been made. An F1-score is a balance metric for recall and precision. A larger F1-score shows a better ability to classify a sample into the correct category.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Confusion matrix for the binary classification of non-storm.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center"/>
<th colspan="2" align="center">Actual non-storm (observation)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="2" align="center">Predicted non-storm (forecasts)</td>
<td rowspan="2" align="center">True positive (TP)<break/>hit case</td>
<td align="center">False positive (FP)</td>
</tr>
<tr>
<td align="center">false alarm case</td>
</tr>
<tr>
<td rowspan="2" align="left"/>
<td rowspan="2" align="center">False negative (FN)<break/>missed case</td>
<td align="center">True negative (TN)</td>
</tr>
<tr>
<td align="center">correct storm case</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The three metrics, recall, precision, and F1-score for the geomagnetic storm prediction task (considering the storm samples as positive samples) were listed as &#x201c;For storm category (maximum Kp &#x2265; 5),&#x201d; as shown in <xref ref-type="table" rid="T3">Table&#xa0;3</xref>. The metrics for the non-storm prediction task (considering the non-storm samples as positive samples) were listed as &#x201c;For non-storm category (maximum Kp <inline-formula id="inf4">
<mml:math id="m13">
<mml:mo>&#x3c;</mml:mo>
</mml:math>
</inline-formula> 5),&#x201d; as shown in <xref ref-type="table" rid="T3">Table&#xa0;3</xref>. We found that</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Evaluation metrics for the best 3-day Kp index prediction model.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Metric</th>
<th align="center">3-day<sup>
<italic>a</italic>
</sup>
</th>
<th align="center">Day 1<sup>
<italic>b</italic>
</sup>
</th>
<th align="center">Day 2<sup>
<italic>c</italic>
</sup>
</th>
<th align="center">Day 3<sup>
<italic>d</italic>
</sup>
</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left"/>
<td colspan="4" align="center">For all samples in the test dataset</td>
</tr>
<tr>
<td align="center">Mean error (ME) average</td>
<td align="center">0.03</td>
<td align="char" char=".">0.06</td>
<td align="char" char=".">0.01</td>
<td align="char" char=".">0.01</td>
</tr>
<tr>
<td align="center">ME standard deviation</td>
<td align="center">0.65</td>
<td align="char" char=".">0.92</td>
<td align="char" char=".">0.88</td>
<td align="char" char=".">0.87</td>
</tr>
<tr>
<td align="center">Mean absolute error (MAE) average</td>
<td align="center">1.06</td>
<td align="char" char=".">1.04</td>
<td align="char" char=".">1.08</td>
<td align="char" char=".">1.05</td>
</tr>
<tr>
<td align="center">MAE standard deviation</td>
<td align="center">0.32</td>
<td align="char" char=".">0.47</td>
<td align="char" char=".">0.43</td>
<td align="char" char=".">0.44</td>
</tr>
<tr>
<td align="center">Root mean square error (RMSE) average</td>
<td align="center">1.31</td>
<td align="char" char=".">1.24</td>
<td align="char" char=".">1.31</td>
<td align="char" char=".">1.27</td>
</tr>
<tr>
<td align="center">RMSE standard deviation</td>
<td align="center">0.37</td>
<td align="char" char=".">0.50</td>
<td align="char" char=".">0.45</td>
<td align="char" char=".">0.47</td>
</tr>
<tr>
<td align="center">
<italic>MaxDiff</italic>
<sup>
<italic>e</italic>
</sup> average</td>
<td align="center">2.87</td>
<td align="char" char=".">2.15</td>
<td align="char" char=".">2.42</td>
<td align="char" char=".">2.26</td>
</tr>
<tr>
<td align="center">
<italic>MaxDiff</italic> standard deviation</td>
<td align="center">0.78</td>
<td align="char" char=".">0.82</td>
<td align="char" char=".">0.77</td>
<td align="char" char=".">0.80</td>
</tr>
<tr>
<td align="center">
<italic>SDiff</italic>
<sup>
<italic>f</italic>
</sup> average</td>
<td align="center">2.29</td>
<td align="char" char=".">1.00</td>
<td align="char" char=".">1.22</td>
<td align="char" char=".">1.22</td>
</tr>
<tr>
<td align="center">
<italic>SDiff</italic> standard deviation</td>
<td align="center">7.56</td>
<td align="char" char=".">3.72</td>
<td align="char" char=".">4.44</td>
<td align="char" char=".">3.77</td>
</tr>
<tr>
<td align="left"/>
<td colspan="4" align="center">For non-storm category (Kp <inline-formula id="inf5">
<mml:math id="m14">
<mml:mo>&#x3c;</mml:mo>
</mml:math>
</inline-formula> 5)<sup>
<italic>g</italic>
</sup>
</td>
</tr>
<tr>
<td align="center">&#xa0;&#xa0;Precision</td>
<td align="center">0.93</td>
<td align="char" char=".">0.92</td>
<td align="char" char=".">0.92</td>
<td align="char" char=".">0.95</td>
</tr>
<tr>
<td align="center">&#xa0;&#xa0;Recall</td>
<td align="center">0.99</td>
<td align="char" char=".">0.99</td>
<td align="char" char=".">0.97</td>
<td align="char" char=".">0.92</td>
</tr>
<tr>
<td align="center">&#xa0;&#xa0;F1-score</td>
<td align="center">0.96</td>
<td align="char" char=".">0.95</td>
<td align="char" char=".">0.95</td>
<td align="char" char=".">0.93</td>
</tr>
<tr>
<td align="left"/>
<td colspan="4" align="center">For storm category (Kp &#x2265; 5)<sup>
<italic>h</italic>
</sup>
</td>
</tr>
<tr>
<td align="center">&#xa0;&#xa0;Precision</td>
<td align="center">0.98</td>
<td align="char" char=".">0.53</td>
<td align="char" char=".">0.45</td>
<td align="char" char=".">0.37</td>
</tr>
<tr>
<td align="center">&#xa0;&#xa0;Recall</td>
<td align="center">0.70</td>
<td align="char" char=".">0.17</td>
<td align="char" char=".">0.19</td>
<td align="char" char=".">0.48</td>
</tr>
<tr>
<td align="center">&#xa0;&#xa0;F1-score</td>
<td align="center">0.82</td>
<td align="char" char=".">0.25</td>
<td align="char" char=".">0.27</td>
<td align="char" char=".">0.41</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>
<sup>
<italic>a</italic>
</sup>Metrics based on the 3-day Kp index observations and forecasts.</p>
</fn>
<fn>
<p>
<sup>
<italic>b</italic>
</sup>Metrics of the first day (day 1) based on the 3-day Kp index observations and forecasts.</p>
</fn>
<fn>
<p>
<sup>
<italic>c</italic>
</sup>Metrics of the second day (day 2) based on the 3-day Kp index observations and forecasts.</p>
</fn>
<fn>
<p>
<sup>
<italic>d</italic>
</sup>Metrics of the third day (day 3) based on the 3-day Kp index observations and forecasts.</p>
</fn>
<fn>
<p>
<sup>
<italic>e</italic>
</sup>
<italic>MaxDiff</italic> is the maximum absolute error of the Kp index observation and forecast.</p>
</fn>
<fn>
<p>
<sup>
<italic>f</italic>
</sup>
<italic>SDiff</italic> is the sum of the absolute error of the Kp index observation and forecast for the storm period (Kp &#x2265; 5).</p>
</fn>
<fn>
<p>
<sup>
<italic>g</italic>
</sup>Corresponding to the confusion matrix shown in <xref ref-type="table" rid="T1">Table&#xa0;1</xref>.</p>
</fn>
<fn>
<p>
<sup>
<italic>h</italic>
</sup>Corresponding to the confusion matrix shown in <xref ref-type="table" rid="T2">Table&#xa0;2</xref>.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>1) Our model reaches an F1-score of 0.96, a recall of 0.99, and a precision of 0.93 for the next 3-day period of non-storm category prediction and an F1-score of 0.82, a recall of 0.70, and a precision of 0.98 for the next 3-day period of storm category prediction.</p>
<p>2) However, both for the non-storm category and storm category, the three metrics (F1-score, recall, and precision) by our model are lower at the first day, second day, and third day period prediction than that in the next 3-day period prediction. It indicates that it is difficult to accurately predict the storm periods in a shorter time period (less than 3 days).</p>
<p>We also conducted an error analysis of the model forecasts with the observations in the testing dataset. For each sample (containing 24 points of the 3-hour Kp values) in the testing subset, we compared the forecasts with the observations by four statistics, including the mean absolute error (<italic>MAE</italic>), the root mean square error (<italic>RMSE</italic>), the maximum of the absolute error (<italic>MaxDiff</italic>), and the sum of the absolute error for the storm (Kp &#x2265; 5) period (<italic>SDiff</italic>). For 1853 samples in the testing subset, we calculated the average and standard deviation of the four statistics by the following formulas:<disp-formula id="e10">
<mml:math id="m15">
<mml:mi>A</mml:mi>
<mml:mi>v</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>g</mml:mi>
<mml:mi>e</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:munderover accentunder="false" accent="true">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:math>
<label>(10)</label>
</disp-formula>
<disp-formula id="e11">
<mml:math id="m16">
<mml:mi>S</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>d</mml:mi>
<mml:mi>D</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>v</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:msqrt>
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfrac>
<mml:munderover accentunder="false" accent="true">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msqrt>
<mml:mo>.</mml:mo>
</mml:math>
<label>(11)</label>
</disp-formula>
</p>
<p>The eight statistics of the 3-day period, namely, day 1, day 2, and day 3 predictions, for all samples in the testing subset by our model are shown in <xref ref-type="table" rid="T3">Table&#xa0;3</xref>. It was found that<list list-type="simple">
<list-item>
<p>1) The average and standard deviation of the mean error for the next 3-day Kp prediction is 0.03 and 0.65, respectively. The average and standard deviation of the mean absolute error for the next 3-day Kp prediction is 1.06 and 0.32, respectively.</p>
</list-item>
<list-item>
<p>2) The average and standard deviation of <italic>MaxDiff</italic> (the maximum absolute error of the 3-day Kp time-series) is 2.87 and 0.78, respectively.</p>
</list-item>
</list>
</p>
<p>Moreover, we compared our model results with the daily product of the 3-day Kp index forecasts provided by SWPC from 30 Nov 2020 to 27 Oct 2022. After we had removed the time-periods when the geomagnetic conditions are affected by interplanetary coronal mass ejections (ICMEs), there were 50 storm samples and 548 non-storm samples. Evaluation metrics for SWPC&#x2019;s 3-day Kp index forecast products and results obtained by our model are shown in <xref ref-type="table" rid="T4">Table&#xa0;4</xref>. It was found that<list list-type="simple">
<list-item>
<p>1) For the next 3-day prediction, our model provides a positive mean error average, while SWPC provides a negative mean error average. It indicates that statistically, our results are usually higher than the observations and SWPC&#x2019;s products are usually lower than the observations.</p>
</list-item>
<list-item>
<p>2) For the next 3-day prediction, our model provides a mean error average of 1.13. It is slightly higher than the mean error average of 1.03 calculated from SWPC&#x2019;s product.</p>
</list-item>
<list-item>
<p>3) Our model performs better than SWPC&#x2019;s product for the three metrics (recall, precision, and F1-score) in the 3-day prediction. However, compared with SWPC&#x2019;s results, our model had less recall and higher precision for the storm category in the first-day, second-day, and third-day predictions.</p>
</list-item>
</list>
</p>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>Evaluation metrics for SWPC&#x2019;s 3-day Kp index forecast products and our model.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Metric</th>
<th colspan="4" align="center">SWPC&#x2019;s daily result</th>
<th colspan="4" align="center">Our model result</th>
</tr>
<tr>
<th align="left"/>
<th align="center">3-day</th>
<th align="center">Day 1</th>
<th align="center">Day 2</th>
<th align="center">Day 3</th>
<th align="center">3-day</th>
<th align="center">Day 1</th>
<th align="center">Day 2</th>
<th align="center">Day 3</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left"/>
<td colspan="8" align="center">For samples in the same test dataset</td>
</tr>
<tr>
<td align="center">ME average</td>
<td align="center">-0.22</td>
<td align="char" char=".">-0.26</td>
<td align="char" char=".">-0.22</td>
<td align="char" char=".">-0.17</td>
<td align="char" char=".">0.12</td>
<td align="char" char=".">0.18</td>
<td align="char" char=".">0.09</td>
<td align="char" char=".">0.09</td>
</tr>
<tr>
<td align="center">ME standard deviation</td>
<td align="center">0.60</td>
<td align="char" char=".">0.80</td>
<td align="char" char=".">0.87</td>
<td align="char" char=".">0.86</td>
<td align="char" char=".">0.67</td>
<td align="char" char=".">0.97</td>
<td align="char" char=".">0.93</td>
<td align="char" char=".">0.91</td>
</tr>
<tr>
<td align="center">Mean absolute error (MAE) average</td>
<td align="center">1.03</td>
<td align="char" char=".">1.01</td>
<td align="char" char=".">1.04</td>
<td align="char" char=".">1.03</td>
<td align="char" char=".">1.13</td>
<td align="char" char=".">1.11</td>
<td align="char" char=".">1.15</td>
<td align="char" char=".">1.32</td>
</tr>
<tr>
<td align="center">MAE standard deviation</td>
<td align="center">0.29</td>
<td align="char" char=".">0.42</td>
<td align="char" char=".">0.45</td>
<td align="char" char=".">0.43</td>
<td align="char" char=".">0.34</td>
<td align="char" char=".">0.51</td>
<td align="char" char=".">0.45</td>
<td align="char" char=".">0.47</td>
</tr>
<tr>
<td align="center">RMSE average</td>
<td align="center">1.26</td>
<td align="char" char=".">1.21</td>
<td align="char" char=".">1.24</td>
<td align="char" char=".">1.21</td>
<td align="char" char=".">1.39</td>
<td align="char" char=".">1.32</td>
<td align="char" char=".">1.39</td>
<td align="char" char=".">1.36</td>
</tr>
<tr>
<td align="center">RMSE standard deviation</td>
<td align="center">0.34</td>
<td align="char" char=".">0.46</td>
<td align="char" char=".">0.48</td>
<td align="char" char=".">0.46</td>
<td align="char" char=".">0.39</td>
<td align="char" char=".">0.54</td>
<td align="char" char=".">0.48</td>
<td align="char" char=".">0.51</td>
</tr>
<tr>
<td align="center">
<italic>MaxDiff</italic> average</td>
<td align="center">2.68</td>
<td align="char" char=".">2.11</td>
<td align="char" char=".">2.15</td>
<td align="char" char=".">2.10</td>
<td align="char" char=".">3.03</td>
<td align="char" char=".">2.30</td>
<td align="char" char=".">2.52</td>
<td align="char" char=".">2.38</td>
</tr>
<tr>
<td align="center">
<italic>MaxDiff</italic> standard deviation</td>
<td align="center">0.75</td>
<td align="char" char=".">0.76</td>
<td align="char" char=".">0.79</td>
<td align="char" char=".">0.73</td>
<td align="char" char=".">0.84</td>
<td align="char" char=".">0.88</td>
<td align="char" char=".">0.80</td>
<td align="char" char=".">0.88</td>
</tr>
<tr>
<td align="center">
<italic>SDiff</italic> average</td>
<td align="center">3.69</td>
<td align="char" char=".">1.48</td>
<td align="char" char=".">1.48</td>
<td align="char" char=".">1.16</td>
<td align="char" char=".">3.12</td>
<td align="char" char=".">1.38</td>
<td align="char" char=".">1.65</td>
<td align="char" char=".">1.62</td>
</tr>
<tr>
<td align="center">
<italic>SDiff</italic> standard deviation</td>
<td align="center">7.17</td>
<td align="char" char=".">3.82</td>
<td align="char" char=".">3.95</td>
<td align="char" char=".">3.31</td>
<td align="char" char=".">8.60</td>
<td align="char" char=".">4.46</td>
<td align="char" char=".">4.81</td>
<td align="char" char=".">4.02</td>
</tr>
<tr>
<td align="left"/>
<td colspan="8" align="center">For non-storm category (Kp <inline-formula id="inf6">
<mml:math id="m17">
<mml:mo>&#x3c;</mml:mo>
</mml:math>
</inline-formula> 5)</td>
</tr>
<tr>
<td align="center">Precision</td>
<td align="center">0.79</td>
<td align="char" char=".">0.92</td>
<td align="char" char=".">0.90</td>
<td align="char" char=".">0.90</td>
<td align="char" char=".">0.89</td>
<td align="char" char=".">0.89</td>
<td align="char" char=".">0.90</td>
<td align="char" char=".">0.93</td>
</tr>
<tr>
<td align="center">Recall</td>
<td align="center">0.89</td>
<td align="char" char=".">0.93</td>
<td align="char" char=".">0.94</td>
<td align="char" char=".">0.97</td>
<td align="char" char=".">0.99</td>
<td align="char" char=".">0.98</td>
<td align="char" char=".">0.97</td>
<td align="char" char=".">0.88</td>
</tr>
<tr>
<td align="center">F1-score</td>
<td align="center">0.84</td>
<td align="char" char=".">0.92</td>
<td align="char" char=".">0.92</td>
<td align="char" char=".">0.93</td>
<td align="char" char=".">0.94</td>
<td align="char" char=".">0.93</td>
<td align="char" char=".">0.93</td>
<td align="char" char=".">0.90</td>
</tr>
<tr>
<td align="left"/>
<td colspan="8" align="center">For storm category (Kp &#x2265; 5)</td>
</tr>
<tr>
<td align="center">Precision</td>
<td align="center">0.60</td>
<td align="char" char=".">0.43</td>
<td align="char" char=".">0.46</td>
<td align="char" char=".">0.50</td>
<td align="char" char=".">0.98</td>
<td align="char" char=".">0.50</td>
<td align="char" char=".">0.53</td>
<td align="char" char=".">0.37</td>
</tr>
<tr>
<td align="center">Recall</td>
<td align="center">0.42</td>
<td align="char" char=".">0.41</td>
<td align="char" char=".">0.32</td>
<td align="char" char=".">0.24</td>
<td align="char" char=".">0.71</td>
<td align="char" char=".">0.15</td>
<td align="char" char=".">0.26</td>
<td align="char" char=".">0.50</td>
</tr>
<tr>
<td align="center">F1-score</td>
<td align="center">0.49</td>
<td align="char" char=".">0.42</td>
<td align="char" char=".">0.38</td>
<td align="char" char=".">0.33</td>
<td align="char" char=".">0.82</td>
<td align="char" char=".">0.23</td>
<td align="char" char=".">0.34</td>
<td align="char" char=".">0.43</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec id="s4">
<title>4 Conclusion and discussion</title>
<p>In this study, we aimed to develop a model for the next 3-day Kp index time-series prediction based on the previous 54-day Kp time series and SDO/AIA 193&#xa0;&#xc5; images. We prepared a dataset (6,042 samples) from May 2010 to December 2019 for training, and a dataset (1853 samples) from January 2020 to October 2022 for testing.</p>
<p>The similarity parameters of the previous and current geomagnetic conditions between the samples are calculated and analyzed, as well as the similarity parameters of the next 3-day geomagnetic conditions. We assumed that the paired samples with high similarity for the previous and current geomagnetic conditions would also have high similarity for the next 3-day geomagnetic conditions. Based on the assumption, we first selected the three best similarity parameters through the feature selection process and then adopted the XGBoost algorithm to develop a prediction model for the next 3-day Kp forecast. The model took the best three similarity parameters of the previous and current geomagnetic conditions as input and provided the best match sample from the training subset as a forecast for the next 3-day Kp time-series.</p>
<p>A prediction error analysis by our model was conducted. For the non-storm prediction, our model reached an F1-score of 0.96 for the next 3-day period and an F1-score over 0.92 for the first day, second day, and third day period. For the storm prediction, it reached an F1-score of 0.82, a recall of 0.70, and a precision of 0.98 for the next 3-day period.</p>
<p>We also compared our model results with the daily product of the 3-day Kp index forecasts provided by SWPC from 30 Nov 2020 to 27 Oct 2022. In statistics, our results were usually higher than the observations and SWPC&#x2019;s products were usually lower than the observations. Compared with SWPC&#x2019;s products for the next 3-day prediction, our model reached higher metrics (recall, precision, and F1-score). However, our model showed a higher mean error average in the next 3-day prediction, and less recall and higher precision for the storm category in the first-day, second-day, and third-day predictions.</p>
<p>This study established a prediction model that can be used to provide the 3-day Kp forecast product, which is important and necessary for space weather forecasts. There is some essential information that can be obtained from the 3-day Kp forecast product, such as the start time of the geomagnetic storm, the maximum storm level, and the duration of the storm. Therefore, it is a more refined product than the 3-day Ap forecast product. So far, the 3-day Kp forecast product is routinely provided by the Space Weather Prediction Center facilitated by the National Oceanic and Atmospheric Administration. Considering the increasing demand for space weather forecasts, more prediction models that can provide essential products should be developed.</p>
<p>However, the current model has limitations in accurately predicting the storm periods in a shorter time period (less than 3 days) which lead to lower evaluation metrics (recall, precision, and F1-score) in the first-day, second-day, and third-day predictions. In the future, we would like to improve the 3-day Kp forecast model by deriving more relevant similarity parameters of the geomagnetic conditions and adopting a complex machine learning algorithm such as a convolution neural network and long short-term memory.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s5">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/Supplementary Material; further inquiries can be directed to the corresponding author.</p>
</sec>
<sec id="s6">
<title>Author contributions</title>
<p>JW, BL, SL, and LS met the authorship criteria and agreed to be accountable for the content of the work.</p>
</sec>
<sec id="s7">
<title>Funding</title>
<p>JW was supported by the National Science Foundation of China (Grant No. 42074224), the youth innovation promotion association CAS, the Key Research Program of the Chinese Academy of Sciences (Grant No. ZDRE-KT-2021-3), and Pandeng Program of National Space Science Center of the Chinese Academy of Sciences.</p>
</sec>
<ack>
<p>The authors would like to thank the NOAA and the National Centers for Environmental Information that provided the geomagnetic indices. They authors thank the SDO/AIA team members that contributed to the SDO mission.</p>
</ack>
<sec sec-type="COI-statement" id="s8">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s9">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors, and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<fn-group>
<fn id="fn1">
<label>1</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://www.swpc.noaa.gov/products/usaf-45-day-ap-and-f107cm-flux-forecast">https://www.swpc.noaa.gov/products/usaf-45-day-ap-and-f107cm-flux-forecast</ext-link>
</p>
</fn>
<fn id="fn2">
<label>2</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://www.swpc.noaa.gov/products/27-day-outlook-107-cm-radio-flux-and-geomagnetic-indices">https://www.swpc.noaa.gov/products/27-day-outlook-107-cm-radio-flux-and-geomagnetic-indices</ext-link>
</p>
</fn>
<fn id="fn3">
<label>3</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://www.swpc.noaa.gov/products/3-day-geomagnetic-forecast">https://www.swpc.noaa.gov/products/3-day-geomagnetic-forecast</ext-link>
</p>
</fn>
<fn id="fn4">
<label>4</label>
<p>
<ext-link ext-link-type="uri" xlink:href="http://www.sepc.ac.cn/eng/ApForecast.php">http://www.sepc.ac.cn/eng/ApForecast.php</ext-link>
</p>
</fn>
<fn id="fn5">
<label>5</label>
<p>
<ext-link ext-link-type="uri" xlink:href="http://www.sepc.ac.cn/eng/Kp3HPred.php">http://www.sepc.ac.cn/eng/Kp3HPred.php</ext-link>
</p>
</fn>
<fn id="fn6">
<label>6</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://docs.scipy.org/doc/scipy/index.html">https://docs.scipy.org/doc/scipy/index.html</ext-link>
</p>
</fn>
<fn id="fn7">
<label>7</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://scikit-learn.org">https://scikit-learn.org</ext-link>
</p>
</fn>
<fn id="fn8">
<label>8</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://scikit-image.org/docs/stable/api/skimage.html">https://scikit-image.org/docs/stable/api/skimage.html</ext-link>
</p>
</fn>
<fn id="fn9">
<label>9</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://scikit-learn.org">https://scikit-learn.org</ext-link>
</p>
</fn>
</fn-group>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bala</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Reiff</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Improvements in short-term forecasting of geomagnetic activity</article-title>. <source>Space weather.</source> <volume>10</volume>, <fpage>S06001</fpage>. <pub-id pub-id-type="doi">10.1029/2012SW000779</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bartels</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2013a</year>). <article-title>II &#x2013; The technique of scaling indices k and q of geomagnetic activity</article-title>. <source>Geology</source> <volume>2013</volume>. <pub-id pub-id-type="doi">10.1016/B978-1-4832-1304-0.50006-3</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bartels</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2013b</year>). <article-title>The geomagnetic measures for the time-variations of solar corpuscular radiation, described for use in correlation studies in other geophysical fields</article-title>. <source>Ann. Intern. Geophys.</source> <volume>4</volume>, <fpage>227</fpage>. <pub-id pub-id-type="doi">10.1016/B978-1-4832-1304-0.50007-5</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cane</surname>
<given-names>H. V.</given-names>
</name>
<name>
<surname>Richardson</surname>
<given-names>I. G.</given-names>
</name>
</person-group> (<year>2003</year>). <article-title>Interplanetary coronal mass ejections in the near-Earth solar wind during 1996-2002</article-title>. <source>J. Geophys. Res. (Space Phys.</source> <volume>108</volume>, <fpage>1156</fpage>. <pub-id pub-id-type="doi">10.1029/2002JA009817</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chakraborty</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Morley</surname>
<given-names>S. K.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Probabilistic prediction of geomagnetic storms and the K<sub>p</sub> index</article-title>. <source>J. Space Weather Space Clim.</source> <volume>10</volume>, <fpage>36</fpage>. <pub-id pub-id-type="doi">10.1051/swsc/2020037</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>P. F.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Coronal mass ejections: Models and their observational basis</article-title>. <source>Living Rev. Sol. Phys.</source> <volume>8</volume>, <fpage>1</fpage>. <pub-id pub-id-type="doi">10.12942/lrsp-2011-1</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Guestrin</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>XGBoost: A scalable tree boosting System</article-title>. <comment>arXiv e-prints , arXiv:1603.02754</comment>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Feynman</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Gu</surname>
<given-names>X. Y.</given-names>
</name>
</person-group> (<year>1986</year>). <article-title>Prediction of geomagnetic activity on time scales of one to ten years</article-title>. <source>Rev. Geophys.</source> <volume>24</volume>, <fpage>650</fpage>&#x2013;<lpage>666</lpage>. <pub-id pub-id-type="doi">10.1029/RG024i003p00650</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gonzalez</surname>
<given-names>W. D.</given-names>
</name>
<name>
<surname>Joselyn</surname>
<given-names>J. A.</given-names>
</name>
<name>
<surname>Kamide</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Kroehl</surname>
<given-names>H. W.</given-names>
</name>
<name>
<surname>Rostoker</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Tsurutani</surname>
<given-names>B. T.</given-names>
</name>
<etal/>
</person-group> (<year>1994</year>). <article-title>What is a geomagnetic storm?</article-title> <source>J. Geophys. Res.</source> <volume>99</volume>, <fpage>5771</fpage>&#x2013;<lpage>5792</lpage>. <pub-id pub-id-type="doi">10.1029/93ja02867</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gonzalez</surname>
<given-names>W. D.</given-names>
</name>
<name>
<surname>Tsurutani</surname>
<given-names>B. T.</given-names>
</name>
<name>
<surname>Cl&#xfa;a de Gonzalez</surname>
<given-names>A. L.</given-names>
</name>
</person-group> (<year>1999</year>). <article-title>Interplanetary origin of geomagnetic storms</article-title>. <source>Space Sci. Rev.</source> <volume>88</volume>, <fpage>529</fpage>&#x2013;<lpage>562</lpage>. <pub-id pub-id-type="doi">10.1023/a:1005160129098</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gosling</surname>
<given-names>J. T.</given-names>
</name>
<name>
<surname>McComas</surname>
<given-names>D. J.</given-names>
</name>
<name>
<surname>Phillips</surname>
<given-names>J. L.</given-names>
</name>
<name>
<surname>Bame</surname>
<given-names>S. J.</given-names>
</name>
</person-group> (<year>1991</year>). <article-title>Geomagnetic activity associated with Earth passage of interplanetary shock disturbances and coronal mass ejections</article-title>. <source>J. Geophys. Res.</source> <volume>96</volume>, <fpage>7831</fpage>&#x2013;<lpage>7839</lpage>. <pub-id pub-id-type="doi">10.1029/91ja00316</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lemen</surname>
<given-names>J. R.</given-names>
</name>
<name>
<surname>Title</surname>
<given-names>A. M.</given-names>
</name>
<name>
<surname>Akin</surname>
<given-names>D. J.</given-names>
</name>
<name>
<surname>Boerner</surname>
<given-names>P. F.</given-names>
</name>
<name>
<surname>Chou</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Drake</surname>
<given-names>J. F.</given-names>
</name>
<etal/>
</person-group> (<year>2012</year>). <article-title>The atmospheric imaging assembly (AIA) on the solar dynamics observatory (SDO)</article-title>. <source>Sol. Phys.</source> <volume>275</volume>, <fpage>17</fpage>&#x2013;<lpage>40</lpage>. <pub-id pub-id-type="doi">10.1007/s11207-011-9776-8</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Luo</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Gong</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Two empirical models for short-term forecast of Kp</article-title>. <source>Space weather.</source> <volume>15</volume>, <fpage>503</fpage>&#x2013;<lpage>516</lpage>. <pub-id pub-id-type="doi">10.1002/2016SW001585</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mansilla</surname>
<given-names>G. A.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Some effects in the upper atmosphere during geomagnetic storms</article-title>. <source>Adv. Space Res.</source> <volume>47</volume>, <fpage>930</fpage>&#x2013;<lpage>937</lpage>. <pub-id pub-id-type="doi">10.1016/j.asr.2010.11.017</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Matzka</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Stolle</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Yamazaki</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Bronkalla</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Morschhauser</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>The geomagnetic kp index and derived indices of geomagnetic activity</article-title>. <source>Space weather.</source> <volume>19</volume>, <fpage>e2020SW002641</fpage>. <pub-id pub-id-type="doi">10.1029/2020SW002641</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mayaud</surname>
<given-names>P. N.</given-names>
</name>
</person-group> (<year>1980</year>). <article-title>Derivation, meaning, and use of geomagnetic indices</article-title>. <source>Wash. D.C. Am. Geophys. Union Geophys. Monogr. Ser.</source> <volume>22</volume>, <fpage>607</fpage>. <pub-id pub-id-type="doi">10.1029/GM022.22.607M</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Menvielle</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Berthelier</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>1991</year>). <article-title>The k-derived planetary indices: Description and availability</article-title>. <source>Rev. Geophys.</source> <volume>29</volume>, <fpage>415</fpage>&#x2013;<lpage>432</lpage>. <pub-id pub-id-type="doi">10.1029/91RG00994</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>O&#x2019;Dwyer</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Del Zanna</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Mason</surname>
<given-names>H. E.</given-names>
</name>
<name>
<surname>Weber</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Tripathi</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>SDO/AIA response to coronal hole, quiet Sun, active region, and flare plasma</article-title>. <source>Astronomy Astrophysics</source> <volume>521</volume>, <fpage>A21</fpage>. <pub-id pub-id-type="doi">10.1051/0004-6361/201014872</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Perreault</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Akasofu</surname>
<given-names>S. I.</given-names>
</name>
</person-group> (<year>1978</year>). <article-title>A study of geomagnetic storms</article-title>. <source>Geophys. J.</source> <volume>54</volume>, <fpage>547</fpage>&#x2013;<lpage>573</lpage>. <pub-id pub-id-type="doi">10.1111/j.1365-246x.1978.tb05494.x</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pesnell</surname>
<given-names>W. D.</given-names>
</name>
<name>
<surname>Thompson</surname>
<given-names>B. J.</given-names>
</name>
<name>
<surname>Chamberlin</surname>
<given-names>P. C.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>The solar dynamics observatory (SDO)</article-title>. <source>Sol. Phys.</source> <volume>275</volume>, <fpage>3</fpage>&#x2013;<lpage>15</lpage>. <pub-id pub-id-type="doi">10.1007/s11207-011-9841-3</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Richardson</surname>
<given-names>I. G.</given-names>
</name>
<name>
<surname>Cane</surname>
<given-names>H. V.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Near-Earth interplanetary coronal mass ejections during solar cycle 23 (1996 - 2009): Catalog and summary of properties</article-title>. <source>Sol. Phys.</source> <volume>264</volume>, <fpage>189</fpage>&#x2013;<lpage>237</lpage>. <pub-id pub-id-type="doi">10.1007/s11207-010-9568-6</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ritter</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>L&#xfc;hr</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Doornbos</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Substorm-related thermospheric density and wind disturbances derived from CHAMP observations</article-title>. <source>Ann. Geophys.</source> <volume>28</volume>, <fpage>1207</fpage>&#x2013;<lpage>1220</lpage>. <pub-id pub-id-type="doi">10.5194/angeo-28-1207-2010</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shprits</surname>
<given-names>Y. Y.</given-names>
</name>
<name>
<surname>Vasile</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Zhelavskaya</surname>
<given-names>I. S.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Nowcasting and predicting the kp index using historical values and real-time observations</article-title>. <source>Space weather.</source> <volume>17</volume>, <fpage>1219</fpage>&#x2013;<lpage>1229</lpage>. <pub-id pub-id-type="doi">10.1029/2018SW002141</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tan</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhong</surname>
<given-names>Q.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Geomagnetic index kp forecasting with LSTM</article-title>. <source>Space weather.</source> <volume>16</volume>, <fpage>406</fpage>&#x2013;<lpage>416</lpage>. <pub-id pub-id-type="doi">10.1002/2017SW001764</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tsurutani</surname>
<given-names>B. T.</given-names>
</name>
<name>
<surname>Gonzalez</surname>
<given-names>W. D.</given-names>
</name>
<name>
<surname>Gonzalez</surname>
<given-names>A. L. C.</given-names>
</name>
<name>
<surname>Guarnieri</surname>
<given-names>F. L.</given-names>
</name>
<name>
<surname>Gopalswamy</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Grande</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2006</year>). <article-title>Corotating solar wind streams and recurrent geomagnetic activity: A review</article-title>. <source>J. Geophys. Res. (Space Phys.</source> <volume>111</volume>, <fpage>A07S01</fpage>. <pub-id pub-id-type="doi">10.1029/2005JA011273</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tsurutani</surname>
<given-names>B. T.</given-names>
</name>
<name>
<surname>Gonzalez</surname>
<given-names>W. D.</given-names>
</name>
<name>
<surname>Gonzalez</surname>
<given-names>A. L. C.</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Arballo</surname>
<given-names>J. K.</given-names>
</name>
<name>
<surname>Okada</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>1995</year>). <article-title>Interplanetary origin of geomagnetic activity in the declining phase of the solar cycle</article-title>. <source>J. Geophys. Res.</source> <volume>100</volume>, <fpage>21717</fpage>&#x2013;<lpage>21733</lpage>. <pub-id pub-id-type="doi">10.1029/95ja01476</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhong</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Miao</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Z.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Statistical analysis and verification of 3-hourly geomagnetic activity probability predictions</article-title>. <source>Space weather.</source> <volume>13</volume>, <fpage>831</fpage>&#x2013;<lpage>852</lpage>. <pub-id pub-id-type="doi">10.1002/2015SW001251</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Webb</surname>
<given-names>D. F.</given-names>
</name>
<name>
<surname>Cliver</surname>
<given-names>E. W.</given-names>
</name>
<name>
<surname>Crooker</surname>
<given-names>N. U.</given-names>
</name>
<name>
<surname>Cry</surname>
<given-names>O. C. S.</given-names>
</name>
<name>
<surname>Thompson</surname>
<given-names>B. J.</given-names>
</name>
</person-group> (<year>2000</year>). <article-title>Relationship of halo coronal mass ejections, magnetic clouds, and magnetic storms</article-title>. <source>J. Geophys. Res.</source> <volume>105</volume>, <fpage>7491</fpage>&#x2013;<lpage>7508</lpage>. <pub-id pub-id-type="doi">10.1029/1999ja000275</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Webb</surname>
<given-names>D. F.</given-names>
</name>
<name>
<surname>Howard</surname>
<given-names>T. A.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Coronal mass ejections: Observations</article-title>. <source>Living Rev. Sol. Phys.</source> <volume>9</volume>, <fpage>3</fpage>. <pub-id pub-id-type="doi">10.1007/s41116-017-0009-6</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xiong</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>L&#xfc;hr</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Fejer</surname>
<given-names>B. G.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Global features of the disturbance winds during storm time deduced from CHAMP observations</article-title>. <source>J. Geophys. Res. (Space Phys.</source> <volume>120</volume>, <fpage>5137</fpage>&#x2013;<lpage>5150</lpage>. <pub-id pub-id-type="doi">10.1002/2015JA021302</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Richardson</surname>
<given-names>I. G.</given-names>
</name>
<name>
<surname>Webb</surname>
<given-names>D. F.</given-names>
</name>
<name>
<surname>Gopalswamy</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Huttunen</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Kasper</surname>
<given-names>J. C.</given-names>
</name>
<etal/>
</person-group> (<year>2007</year>). <article-title>Solar and interplanetary sources of major geomagnetic storms (Dst &#x3c;&#x3d; -100 nT) during 1996-2005</article-title>. <source>J. Geophys. Res. (Space Phys.</source> <volume>112</volume>, <fpage>A10102</fpage>. <pub-id pub-id-type="doi">10.1029/2007JA012321</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Xiong</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Meng</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Yuan</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>The influence of geomagnetic storm of 7-8 september 2017 on the swarm precise orbit determination</article-title>. <source>J. Geophys. Res. (Space Phys.</source> <volume>124</volume>, <fpage>6971</fpage>&#x2013;<lpage>6984</lpage>. <pub-id pub-id-type="doi">10.1029/2018JA026316</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhelavskaya</surname>
<given-names>I. S.</given-names>
</name>
<name>
<surname>Vasile</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Shprits</surname>
<given-names>Y. Y.</given-names>
</name>
<name>
<surname>Stolle</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Matzka</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Systematic analysis of machine learning and feature selection techniques for prediction of the kp index</article-title>. <source>Space weather.</source> <volume>17</volume>, <fpage>1461</fpage>&#x2013;<lpage>1486</lpage>. <pub-id pub-id-type="doi">10.1029/2019SW002271</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>