<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Mar. Sci.</journal-id>
<journal-title>Frontiers in Marine Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Mar. Sci.</abbrev-journal-title>
<issn pub-type="epub">2296-7745</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fmars.2025.1540912</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Marine Science</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Using a seasonal and trend decomposition algorithm to improve machine learning prediction of inflow from the Yellow River, China, into the sea</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" equal-contrib="yes">
<name>
<surname>Wang</surname>
<given-names>Shuo</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="author-notes" rid="fn003">
<sup>&#x2020;</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" equal-contrib="yes">
<name>
<surname>Yang</surname>
<given-names>Ke</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="author-notes" rid="fn003">
<sup>&#x2020;</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Peng</surname>
<given-names>Hui</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2906189/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Key Laboratory of Marine Environment Science and Ecology, Ministry of Education and College of Environmental Science and Engineering, Ocean University of China</institution>, <addr-line>Qingdao</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Shandong Provincial Key Laboratory of Water Pollution Control and Resource Reuse, School of Environmental Science and Engineering, Shandong University</institution>, <addr-line>Qingdao, Shandong</addr-line>, <country>China</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Shandong Provincial Key Laboratory of Marine Environment and Geological Engineering, Ocean University of China</institution>, <addr-line>Qingdao</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Haosheng Huang, Louisiana State University, United States</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Gon&#xe7;alo Jesus, National Laboratory for Civil Engineering, Portugal</p>
<p>Yanfeng Li, Beijing Normal University, China</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Hui Peng, <email xlink:href="mailto:pengh@ouc.edu.cn">pengh@ouc.edu.cn</email>
</p>
</fn>
<fn fn-type="equal" id="fn003">
<p>&#x2020;These authors have contributed equally to this work and share first authorship</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>09</day>
<month>05</month>
<year>2025</year>
</pub-date>
<pub-date pub-type="collection">
<year>2025</year>
</pub-date>
<volume>12</volume>
<elocation-id>1540912</elocation-id>
<history>
<date date-type="received">
<day>12</day>
<month>12</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>14</day>
<month>04</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2025 Wang, Yang and Peng</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Wang, Yang and Peng</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>The Yellow River is the largest inflow into the Bohai Sea, and its inflow changes directly affect the ecological environment and marine health of the Bohai Sea. Therefore, accurate prediction of the inflow of the Yellow River is crucial for maintaining the ecological balance of the Bohai Sea and protecting marine resources. Time decomposition algorithms, combined with machine learning, are effective tools to enhance the capabilities of inflow prediction models. However future data leakage from decomposition items was ignored in many studies. It is necessary to develop the right method to operate time decomposition to avoid future data leakage. In this study, the inflow from the Yellow River into the sea was predicted based on a machine learning model (light gradient boosting machine, LightGBM) and a time decomposition algorithm (seasonal and trend decomposition using loess, STL), and the future data leakage in different ways of using STL were evaluated. The results showed that the overall performance of the STL&#x2013;LightGBM model was better than that of the LightGBM model. The STL&#x2013;LightGBM took the historical inflow for 8 days as the input, and predicted that the average NSE of the next 1&#x2013;7 days would reach 0.720. Even when the forecast period was 7 days, the STL&#x2013;LightGBM (NSE: 0.549 for 7-day lead time) was 0.105 higher than the LightGBM (NSE: 0.444 for 7-day lead time). We found that STL pretreatment of the entire test set overestimated the true performance of STL&#x2013;LightGBM. It is recommended that the STL preprocesses each sample of the test set to avoid future data leakage. The study can provide help for water resources management and offshore environmental management.</p>
</abstract>
<kwd-group>
<kwd>Bohai Sea</kwd>
<kwd>inflow</kwd>
<kwd>LightGBM</kwd>
<kwd>seasonal and trend decomposition using loess</kwd>
<kwd>time series pretreatment</kwd>
</kwd-group>
<contract-sponsor id="cn001">National Natural Science Foundation of China<named-content content-type="fundref-id">10.13039/501100001809</named-content>
</contract-sponsor>
<contract-sponsor id="cn002">China Institute of Water Resources and Hydropower Research<named-content content-type="fundref-id">10.13039/501100004614</named-content>
</contract-sponsor>
<counts>
<fig-count count="12"/>
<table-count count="3"/>
<equation-count count="8"/>
<ref-count count="65"/>
<page-count count="17"/>
<word-count count="8337"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-in-acceptance</meta-name>
<meta-value>Coastal Ocean Processes</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>The Bohai Sea, located in the western Pacific Ocean, is a shallow, semi-enclosed marginal sea and China&#x2019;s only inland sea (<xref ref-type="bibr" rid="B8">Cheng et&#xa0;al., 2023</xref>). The Yellow River is the second longest river in China, accounting for more than 75% of the total freshwater input into the Bohai Sea (<xref ref-type="bibr" rid="B29">Liu et&#xa0;al., 2022</xref>). The Yellow River Estuary (YRE) has the broadest and most complete wetland ecosystem in China&#x2019;s temperate zone (<xref ref-type="bibr" rid="B4">Bai et&#xa0;al., 2012</xref>; <xref ref-type="bibr" rid="B24">Li et&#xa0;al., 2009</xref>). The river&#x2019;s inflow not only influences the ecology of the YRE but also transports a large amount of nutrients to the Bohai Sea, affecting the health of the marine ecological environment (<xref ref-type="bibr" rid="B28">Liu et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B60">Yang F. X. et&#xa0;al., 2024</xref>). Predicting the flow of the Yellow River into the sea can prepare decision-makers and avoid or minimize potential losses and disasters (<xref ref-type="bibr" rid="B26">Liu et&#xa0;al., 2024</xref>; <xref ref-type="bibr" rid="B58">Xu et&#xa0;al., 2016</xref>).</p>
<p>Inflow forecasting models are generally divided into process-driven models and data-driven models (<xref ref-type="bibr" rid="B20">Jiang et&#xa0;al., 2020</xref>, <xref ref-type="bibr" rid="B19">2024</xref>; <xref ref-type="bibr" rid="B22">Kratzert et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B55">Xie et&#xa0;al., 2023</xref>). Data-based inflow prediction methods mostly involve machine learning (<xref ref-type="bibr" rid="B36">Reichstein et&#xa0;al., 2019</xref>). Compared with traditional process-driven models, machine learning methods can accurately capture the nonlinear characteristics between input and output data without understanding the physical mechanism, and accurately predict and analyze the target variables using a simple modeling process (<xref ref-type="bibr" rid="B37">Shen, 2018</xref>; <xref ref-type="bibr" rid="B53">Wu J. H. et&#xa0;al., 2023</xref>). In recent years, many studies have used machine learning methods to establish the relationship between hydrological variables in different watersheds and have achieved satisfactory results (<xref ref-type="bibr" rid="B2">Althoff and Destouni, 2023</xref>; <xref ref-type="bibr" rid="B14">Huang et&#xa0;al., 2024</xref>; <xref ref-type="bibr" rid="B40">Singh et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B46">Wang S, et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B64">Zhi et&#xa0;al., 2021</xref>). However, hydrological time series data are composed of trend, seasonality, periodic motion, and error components, and irregular random motion leads to inherently nonlinear, complex, and non-stationary time series (<xref ref-type="bibr" rid="B3">Apaydin et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B16">Jehanzaib et&#xa0;al., 2023</xref>). The complexity of the inflow process makes it difficult for machine learning models to distinguish and identify these characteristics, which is challenging for the accurate long-term prediction of inflow. Therefore, different data preprocessing methods, such as decomposition techniques, are needed to improve the prediction accuracy of the models (<xref ref-type="bibr" rid="B3">Apaydin et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B13">He et&#xa0;al., 2024</xref>; <xref ref-type="bibr" rid="B30">Parisouj et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B65">Zuo et&#xa0;al., 2020</xref>).</p>
<p>The most competitive machine learning models need at least three elements: preprocessing methods, machine learning models, and appropriate training algorithms (<xref ref-type="bibr" rid="B12">He et&#xa0;al., 2021</xref>). Signal processing is a frequently used time series processing method, which can weaken the redundant content of the signal, filter out the mixed noise and interference, and transform the signal into a form that is easy to process, transmit, and analyze for subsequent processing (<xref ref-type="bibr" rid="B65">Zuo et&#xa0;al., 2020</xref>). The commonly used signal processing methods include wavelet analysis, Fourier transform, ensemble empirical mode decomposition (EEMD), variational mode decomposition (VMD), singular spectrum analysis (SSA), and seasonal and trend decomposition using loess (STL). In addition to this signal processing, an ensemble model is another method to improve the accuracy of the modeling. Ensemble models aim to give full play to the advantages of various prediction models by properly combining different prediction models, thus making comprehensive use of all the information (<xref ref-type="bibr" rid="B1">Abbasi et&#xa0;al., 2021</xref>). An ensemble model can effectively make use of the information decomposed by an algorithm and improve the prediction accuracy of the system. Some studies have also confirmed this view, such as artificial neural network (ANN) based on SSA (<xref ref-type="bibr" rid="B3">Apaydin et&#xa0;al., 2021</xref>) and support vector machine (SVM) based on EEMD and VMD (<xref ref-type="bibr" rid="B6">Chen S, et&#xa0;al., 2021</xref>).</p>
<p>Despite the growing popularity of signal processing-based time series forecasts in hydrology and water resources, the correct design and interpretation of this integrated signal processing model has not always been scrutinized, which often leads to invalid prediction design and cannot be used in real-world scenarios (<xref ref-type="bibr" rid="B11">Du et&#xa0;al., 2017</xref>; <xref ref-type="bibr" rid="B34">Quilty and Adamowski, 2018</xref>). The impact of test data feature leakage (i.e., the &#x201c;future data&#x201d; issue) in ensemble model decomposition algorithms is often overlooked, representing a critical blind spot in current research. Test data feature leakage can lead to premature exposure of target variable information, giving the model an unrealistically advantageous performance during the testing phase. This results in an overestimation of the model&#x2019;s predictive capabilities, ultimately undermining its practical applicability. Therefore, a thorough investigation of this issue is crucial for enhancing the scientific rigor and reliability of machine learning-based hydrological prediction models.</p>
<p>The development of a machine learning model requires training data and test sets. The test set does not participate in the training, and it is mainly used to test the accuracy of the training model. It cannot be used as the basis for the selection of algorithms such as parameter adjustment and feature selection. Before the ensemble model testing of certain decomposition methods, some preprocessing methods must be used to deal with the test set. <xref ref-type="bibr" rid="B13">He et&#xa0;al. (2024)</xref> proposed a seasonal decomposition-based gated recurrent unit (SD-GRU) method for daily inflow prediction. <xref ref-type="bibr" rid="B6">Chen S, et&#xa0;al. (2021)</xref> employed EEMD and VMD for signal decomposition, introducing a hybrid model based on a two-stage decomposition, SVM, and ensemble methods for annual inflow prediction. In addition, the STL decomposition method effectively extracts trend and seasonal components, demonstrating strong adaptability and interpretability in hydrological time series analysis and forecasting (<xref ref-type="bibr" rid="B9">Cleveland and Cleveland, 1990</xref>). Compared to other signal decomposition methods, STL offers significant advantages in handling non-stationarity and enhancing model generalization (<xref ref-type="bibr" rid="B15">Hyndman and Athanasopoulos, 2018</xref>). However, these methods of preprocessing test set data in time series may lead to future data leakage (the decomposed feature contains the information from the target variable). The input features of the test set must not contain information from the target variables, otherwise, the data features will be leaked, and the credibility of the test set will be decreased.</p>
<p>This study thus aimed to develop an ensemble model based on STL decomposition, a machine learning model, and an ensemble method to improve prediction of the Yellow River inflow into the sea under different pre-processing scenarios. This is important for improving the habitat conditions and maintaining the biodiversity of the YRE. Specifically, the study: (1) used autocorrelation analysis and STL to select time-lag features and identify flow time series features, respectively; (2) identified the characteristics of time lags and the influence of lead time on the model by developing inflow forecasting models with different time windows and different lead times; and (3) considered the rigor of the test set by setting different STL pretreatment scenarios to compare the combined effects of STL and member models in different scenarios.</p>
</sec>
<sec id="s2">
<label>2</label>
<title>Methodology</title>
<sec id="s2_1">
<label>2.1</label>
<title>Autocorrelation analysis and partial correlation analysis</title>
<p>Autocorrelation analysis (ACF) and partial correlation analysis (PACF) can quantitatively represent the inherent correlation of multi-feature time series, and the methods are usually used to calculate the time dependence on the past (<xref ref-type="bibr" rid="B5">Chen et&#xa0;al., 2020</xref>). ACF can quantitatively measure the correlation between the observations of time <italic>t</italic> and the previous <italic>k</italic> periods, whereas PACF can measure the correlation between two specific and discontinuous periods. The autocorrelation coefficient of ACF can be calculated as follows (<xref ref-type="disp-formula" rid="eq1">Equation 1</xref>):</p>
<disp-formula id="eq1">
<label>(1)</label>
<mml:math display="block" id="M1">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c1;</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:mstyle>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>O</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mover accent="true">
<mml:mi>O</mml:mi>
<mml:mo stretchy="true">&#xaf;</mml:mo>
</mml:mover>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>O</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mover accent="true">
<mml:mi>O</mml:mi>
<mml:mo stretchy="true">&#xaf;</mml:mo>
</mml:mover>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:mstyle>
<mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>O</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mover accent="true">
<mml:mi>O</mml:mi>
<mml:mo stretchy="true">&#xaf;</mml:mo>
</mml:mover>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im1">
<mml:mrow>
<mml:msub>
<mml:mi>O</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> time <italic>t</italic> observation, <inline-formula>
<mml:math display="inline" id="im2">
<mml:mrow>
<mml:msub>
<mml:mi>O</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is time <inline-formula>
<mml:math display="inline" id="im3">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> observation, <inline-formula>
<mml:math display="inline" id="im4">
<mml:mover accent="true">
<mml:mi>O</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
</mml:math>
</inline-formula> is the average value of all observed value, and <italic>k</italic> is the lag time (days). The PACF can be calculated as follows (<xref ref-type="disp-formula" rid="eq2">Equations 2</xref>, <xref ref-type="disp-formula" rid="eq3">3</xref>):</p>
<disp-formula id="eq2">
<label>(2)</label>
<mml:math display="block" id="M2">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3d5;</mml:mi>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&amp;</mml:mo>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c1;</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3d5;</mml:mi>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mstyle>
<mml:msub>
<mml:mi>&#x3c1;</mml:mi>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3d5;</mml:mi>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mstyle>
<mml:msub>
<mml:mi>&#x3c1;</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq3">
<label>(3)</label>
<mml:math display="block" id="M3">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3d5;</mml:mi>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&amp;</mml:mo>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>&#x3d5;</mml:mi>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>&#x3d5;</mml:mi>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>&#x3d5;</mml:mi>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Through the above equations, the correlation of different time delays can be calculated.</p>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Sliding window</title>
<p>The original time series was transformed into input and output marker subseries for better model training. The sliding window (SW) method was used to construct the input and output of training samples based on continuous time series observations (<xref ref-type="bibr" rid="B35">Ramkumar and Jothiprakash, 2024</xref>; <xref ref-type="bibr" rid="B63">Zhang et&#xa0;al., 2019</xref>). The time series training samples generated by the SW method can be represented as follows (<xref ref-type="disp-formula" rid="eq4">Equation 4</xref>):</p>
<disp-formula id="eq4">
<label>(4)</label>
<mml:math display="block" id="M4">
<mml:mrow>
<mml:mi>I</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>t</mml:mi>
<mml:mo>:</mml:mo>
<mml:mo>{</mml:mo>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mo>&#x2207;</mml:mo>
<mml:mi>d</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mo>&#x2207;</mml:mo>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>}</mml:mo>
<mml:mo>&#x2192;</mml:mo>
<mml:mi>o</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>t</mml:mi>
<mml:mo>:</mml:mo>
<mml:mo>{</mml:mo>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>+</mml:mo>
<mml:mo>&#x2207;</mml:mo>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>}</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im5">
<mml:mrow>
<mml:mi>Q</mml:mi>
<mml:mo>=</mml:mo>
<mml:mo stretchy="false">[</mml:mo>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mn>3</mml:mn>
</mml:msub>
<mml:mo>&#x22ef;</mml:mo>
<mml:mi>Q</mml:mi>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> is the complete sequence, Input represents an input at the time <inline-formula>
<mml:math display="inline" id="im6">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mo>&#x2207;</mml:mo>
<mml:mi>d</mml:mi>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> to <inline-formula>
<mml:math display="inline" id="im7">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> day, output represents the output at time <inline-formula>
<mml:math display="inline" id="im8">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>+</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#x2207;</mml:mo>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. The operational mode of the SW is shown in <xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref>. The size of the SW not only makes the number of training samples of time series significantly different but also influences the input subsequence and output sequence associated with each training sample. Compared with a large SW size, a small SW size will provide more training samples, but the samples may not contain enough input information; a larger SW size will result in fewer training samples, and irrelevant interference information will be included in the model input set. Therefore, the appropriate sliding window size should be chosen. No other additional features (such as precipitation and temperature) were used in this study, which aimed to predict future inflow from historical flow data and its terms of STL decomposition.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>The operation mode of the sliding window and scenario settings. Window size of 6 is used as an example.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-12-1540912-g001.tif"/>
</fig>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>Seasonal and trend decomposition using loess</title>
<p>STL is a widely used and robust method for decomposing time series, in which loess (locally weighted regression) is a method for estimating nonlinear relations. The STL decomposition method was proposed by <xref ref-type="bibr" rid="B9">Cleveland and Cleveland (1990)</xref>, and has several advantages: STL can handle any type of seasonality, not just monthly and quarterly data; seasonal items can change over times; and the rate of change can be easily controlled. STL was used to decompose the time series of Yellow River inflow into three items: trend term, seasonal term, and residual term (<xref ref-type="disp-formula" rid="eq5">Equation 5</xref>).</p>
<disp-formula id="eq5">
<label>(5)</label>
<mml:math display="block" id="M5">
<mml:mrow>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>R</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where the original time series data, seasonal components, trend components and residual components are expressed as <inline-formula>
<mml:math display="inline" id="im9">
<mml:mrow>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula>
<mml:math display="inline" id="im10">
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula>
<mml:math display="inline" id="im11">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula>
<mml:math display="inline" id="im12">
<mml:mrow>
<mml:msub>
<mml:mi>R</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. They range from 1 to <italic>N</italic> (sequence length). The key of the STL algorithm is locally weighted regression, which combines the simplicity of traditional linear regression and the flexibility of nonlinear regression to fit a smooth two-dimensional scatter map. The process of the decomposition algorithm is shown in <xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref>.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>The process of the STL algorithm.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-12-1540912-g002.tif"/>
</fig>
</sec>
<sec id="s2_4">
<label>2.4</label>
<title>Light gradient boosting machine</title>
<p>Light gradient boosting machine (LightGBM) was originally developed jointly by Microsoft and Peking University to solve the problems of efficiency and scalability of the gradient boosting decision tree (GBDT) when applied to high-dimensional input characteristics and large amounts of data (<xref ref-type="bibr" rid="B21">Ke et&#xa0;al., 2017</xref>). LightGBM does not use information gain to segment the internal nodes of each tree as traditional GBDT does. LightGBM combines two innovative techniques: gradient-based one-side sampling (GOSS) and exclusive feature bundling (EFB) to segment internal nodes. For the GOSS algorithm, <italic>a</italic> is the proportion of larger gradient samples, and <inline-formula>
<mml:math display="inline" id="im13">
<mml:mrow>
<mml:mi>b</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mo stretchy="false">(</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>a</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> is the proportion of randomly selected smaller gradient samples, and the distribution is divided into data sets <italic>A</italic> and <italic>B</italic>. When calculating the information gain, it is necessary to ensure that discarding some samples with smaller gradients will not affect the model training, thus the coefficient <inline-formula>
<mml:math display="inline" id="im14">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mi>b</mml:mi>
</mml:mfrac>
</mml:mrow>
</mml:math>
</inline-formula> will be multiplied by the reserved smaller gradient samples. After the last iteration, the sample data are sorted in descending order of gradient. The final calculated gain is as follows (<xref ref-type="disp-formula" rid="eq6">Equation 6</xref>):</p>
<disp-formula id="eq6">
<label>(6)</label>
<mml:math display="block" id="M6">
<mml:mrow>
<mml:msub>
<mml:mtext>V</mml:mtext>
<mml:mtext>j</mml:mtext>
</mml:msub>
<mml:mo stretchy="false">(</mml:mo>
<mml:mtext>d</mml:mtext>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mtext>n</mml:mtext>
</mml:mfrac>
<mml:mo>[</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mstyle displaystyle="true">
<mml:msub>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mtext>x</mml:mtext>
<mml:mtext>i</mml:mtext>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mtext>A</mml:mtext>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:msub>
<mml:mtext>g</mml:mtext>
<mml:mtext>i</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:mstyle>
<mml:mo>+</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mtext>a</mml:mtext>
</mml:mrow>
<mml:mtext>b</mml:mtext>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:msub>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mtext>x</mml:mtext>
<mml:mtext>i</mml:mtext>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mtext>B</mml:mtext>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:msub>
<mml:mtext>g</mml:mtext>
<mml:mtext>i</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:mstyle>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
<mml:mrow>
<mml:msubsup>
<mml:mtext>n</mml:mtext>
<mml:mn>1</mml:mn>
<mml:mtext>j</mml:mtext>
</mml:msubsup>
<mml:mo stretchy="false">(</mml:mo>
<mml:mtext>d</mml:mtext>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mfrac>
<mml:mo>+</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mstyle displaystyle="true">
<mml:msub>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mtext>x</mml:mtext>
<mml:mtext>i</mml:mtext>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mtext>A</mml:mtext>
<mml:mtext>r</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:msub>
<mml:mtext>g</mml:mtext>
<mml:mtext>i</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:mstyle>
<mml:mo>+</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mtext>a</mml:mtext>
</mml:mrow>
<mml:mtext>b</mml:mtext>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:msub>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mtext>x</mml:mtext>
<mml:mtext>i</mml:mtext>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mtext>B</mml:mtext>
<mml:mtext>r</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:msub>
<mml:mtext>g</mml:mtext>
<mml:mtext>i</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:mstyle>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
<mml:mrow>
<mml:msubsup>
<mml:mtext>n</mml:mtext>
<mml:mrow><mml:mtext>r</mml:mtext></mml:mrow>
<mml:mtext>j</mml:mtext>
</mml:msubsup>
<mml:mo stretchy="false">(</mml:mo>
<mml:mtext>d</mml:mtext>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mfrac>
<mml:mo>]</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im15">
<mml:mrow>
<mml:msub>
<mml:mtext>A</mml:mtext>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mo>{</mml:mo>
<mml:msub>
<mml:mtext>x</mml:mtext>
<mml:mtext>i</mml:mtext>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mtext>A</mml:mtext>
<mml:mo>:</mml:mo>
<mml:msub>
<mml:mtext>x</mml:mtext>
<mml:mrow>
<mml:mtext>ij</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2264;</mml:mo>
<mml:mtext>d</mml:mtext>
<mml:mo>}</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mtext>A</mml:mtext>
<mml:mtext>r</mml:mtext>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mo>{</mml:mo>
<mml:msub>
<mml:mtext>x</mml:mtext>
<mml:mtext>i</mml:mtext>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mtext>A</mml:mtext>
<mml:mo>:</mml:mo>
<mml:msub>
<mml:mtext>x</mml:mtext>
<mml:mrow>
<mml:mtext>ij</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>&gt;</mml:mo>
<mml:mtext>d</mml:mtext>
<mml:mo>}</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula>
<mml:math display="inline" id="im16">
<mml:mrow>
<mml:msub>
<mml:mtext>B</mml:mtext>
<mml:mtext>l</mml:mtext>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mo>{</mml:mo>
<mml:msub>
<mml:mtext>x</mml:mtext>
<mml:mtext>i</mml:mtext>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mtext>B</mml:mtext>
<mml:mo>:</mml:mo>
<mml:msub>
<mml:mtext>x</mml:mtext>
<mml:mrow>
<mml:mtext>ij</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2264;</mml:mo>
<mml:mtext>d</mml:mtext>
<mml:mo>}</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mtext>B</mml:mtext>
<mml:mtext>r</mml:mtext>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mo>{</mml:mo>
<mml:msub>
<mml:mtext>x</mml:mtext>
<mml:mtext>i</mml:mtext>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mtext>B</mml:mtext>
<mml:mo>:</mml:mo>
<mml:msub>
<mml:mtext>x</mml:mtext>
<mml:mrow>
<mml:mtext>ij</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>&gt;</mml:mo>
<mml:mtext>d</mml:mtext>
<mml:mo>}</mml:mo>
<mml:mo>&#xa0;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im17">
<mml:mrow>
<mml:msub>
<mml:mtext>g</mml:mtext>
<mml:mtext>i</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> denotes the negative gradients of the loss function for the LightGBM outputs in each iteration.</p>
<p>In addition to using GOSS for sampling, LightGBM uses EFB to speed up the training process without losing accuracy. Many applications have high and sparse input features that are mutually exclusive at the same time (i.e., these features cannot be non-zero at the same time). However, the EFB algorithm can bind mutually exclusive features in data sets to form a low-dimensional feature set, which can effectively avoid the calculation of zero-value features. In the algorithm, a table recording non-zero features can be established for each feature. By scanning the data in the table, the time complexity of creating a histogram can be effectively reduced. These two algorithms solve the problem of the number of data and the number of data features, respectively. Compared to other tree-based models such as XGBoost and Random Forest, LightGBM offers faster training speed and superior processing capability for large-scale time series data. Previous studies have demonstrated its strong performance in hydrological forecasting, particularly in inflow prediction, where it effectively captures nonlinear relationships and temporal dependencies (<xref ref-type="bibr" rid="B21">Ke et&#xa0;al., 2017</xref>). In the current study, the input and output variables of the LightGBM models were historical flow and future flow, respectively, while the input and output variables of the STL&#x2013;LightGBM models were historical flow and its terms of STL decomposition and future flow, respectively. A training-testing set division ratio of 6:4 was used, and the data were divided multiple times during model training by the K-Fold (K=5) cross-validation method to alleviate the model&#x2019;s dependence on specific samples. To improve the model performance, hyperparameter tuning was performed using Python&#x2019;s Hyperopt library. The Tree-structured Parzen Estimator (TPE) algorithm was used to efficiently search the hyperparameter space for the best combination to enhance the generalization ability and prediction accuracy of the model.</p>
</sec>
</sec>
<sec id="s3">
<label>3</label>
<title>Case study</title>
<sec id="s3_1">
<label>3.1</label>
<title>Study area and data</title>
<p>Here, a case study was conducted for the Lijin hydrological station (<xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3</bold>
</xref>). Lijin Station (37&#xb0;31&#x2032;37.2&#x2033; N, 118&#xb0;18&#x2032;29.52&#x2033; E), located in Dongying, Shandong Province, China, 104 km from the estuary of the Yellow River, is the last hydrological station before the Yellow River enters the Bohai Sea. We collected the inflow as raw experimental dataset records (data source: <ext-link ext-link-type="uri" xlink:href="http://www.yrcc.gov.cn/">http://www.yrcc.gov.cn/</ext-link>) obtained every day (from January 2009 to December 2021). The Yellow River has the largest amount of sediment in the world (<xref ref-type="bibr" rid="B33">Qiu et&#xa0;al., 2024</xref>), and part of the land in Dongying is formed from deposition from the Yellow River (<xref ref-type="bibr" rid="B48">Wang and Sun, 2021</xref>). The Yellow River has historically flooded from time to time. The river outflow also transports many nutrients into the Bohai Sea, affecting the health of the marine ecological environment (<xref ref-type="bibr" rid="B60">Yang F. X. et&#xa0;al., 2024</xref>). Hypoxia often occurs in the Bohai Bay, mainly because of the considerable pollution burden (<xref ref-type="bibr" rid="B44">Wang et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B51">Wei et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B52">Wu et&#xa0;al., 2022</xref>). If some flow data can be predicted in advance, decision-makers can be prepared to avoid and reduce some unnecessary losses and disasters. Because the YRE is fed from a wide range of river basins, the temporal and spatial characteristics of the variables affecting the inflow are difficult to identify. This study aims to explore the potential of the STL&#x2013;LightGBM approach by using only inflow time series data, allowing for a clearer evaluation of the STL algorithm and LightGBM model without interference from external variables. This approach helps isolate the model&#x2019;s core mechanisms, reduces complexity, enhances generalization. Therefore, to predict the inflow efficiently and succinctly, no additional features (such as precipitation and temperature) were used in this study. The aim of the study was to predict future inflow through historical inflow data and its terms of STL decomposition.</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Map of the YRE and the location of Lijin Station.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-12-1540912-g003.tif"/>
</fig>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Open-source software and performance metrics</title>
<p>This study relied on Python 3.7 open-source libraries, including Numpy, Math, and Pandas. Statsmodels was used to compute the ACF, PACF and STL. The LightGBM and Sklearn packages were used to implement LightGBM and SW. Matplotlib and Seaborn were used to draw figures. The packages were installed using Anaconda on the Windows 10 system. All the experiments were conducted on a workstation equipped with an Intel i5-10600KF CPU, a 16 GB RAM, and an NVIDIA GTX Geforce 3060 (12GB) GPU.</p>
<p>In this study, the Nash-Sutcliffe efficiency (NSE) and root mean square error (RMSE) were used to evaluate the performance of the LightGBM and STL&#x2013;LightGBM models. These measures were defined by the following formulas (<xref ref-type="disp-formula" rid="eq7">Equations 7</xref>, <xref ref-type="disp-formula" rid="eq8">8</xref>):</p>
<disp-formula id="eq7">
<label>(7)</label>
<mml:math display="block" id="M7">
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>E</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:mstyle>
<mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:msubsup>
<mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mover accent="true">
<mml:mi>Q</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:mstyle>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq8">
<label>(8)</label>
<mml:math display="block" id="M8">
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>M</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>E</mml:mi>
<mml:mo>=</mml:mo>
<mml:msqrt>
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:mstyle>
<mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:mfrac>
</mml:mrow>
</mml:msqrt>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im18">
<mml:mrow>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> represents the observed data, <inline-formula>
<mml:math display="inline" id="im19">
<mml:mrow>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the value of prediction, and <inline-formula>
<mml:math display="inline" id="im20">
<mml:mover accent="true">
<mml:mi>Q</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im21">
<mml:mover accent="true">
<mml:mi>P</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
</mml:math>
</inline-formula> denote the mean observed and predicted values, respectively.</p>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Predicting variable selection</title>
<p>The selection of input variables is very important for time series prediction (<xref ref-type="bibr" rid="B41">Tran et&#xa0;al., 2015</xref>). In this study, ACF was used to calculate the time lag of inflow. <xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4</bold>
</xref> showed the autocorrelation functions and partial autocorrelation functions for various lag numbers at the Lijin Station. There was significant autocorrelation in the flow into the sea at different times. The ACF thresholds of &#x2265;0.7, &#x2265;0.6, and &#x2265;0.5 represent different levels of autocorrelation in time series, commonly used to assess the temporal dependence of data in time series modeling. The PACF further validates the lag windows determined by the ACF, ensuring that the selected lags effectively capture the most significant historical information in the time series. Considering PACF and ACF comprehensively, we used different time delays (input variables) and different lead times (output variables) to train the machine learning model, as detailed in <xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>.</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>ACF <bold>(A)</bold> and PACF <bold>(B)</bold> of the runoff of Lijin Station. The shaded area in the figures indicates the &#xb1; 95% confidence level.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-12-1540912-g004.tif"/>
</fig>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Sliding window size based on ACF value.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="center">Sliding windows (days)</th>
<th valign="top" align="center">Leading time (days)</th>
<th valign="top" align="center">ACF</th>
<th valign="top" align="center">Model</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="center">6</td>
<td valign="top" align="center">1-7</td>
<td valign="top" align="center">&gt;=0.7</td>
<td valign="top" align="center">LightGBM6</td>
</tr>
<tr>
<td valign="top" align="center">8</td>
<td valign="top" align="center">1-7</td>
<td valign="top" align="center">&gt;=0.6</td>
<td valign="top" align="center">LightGBM8</td>
</tr>
<tr>
<td valign="top" align="center">12</td>
<td valign="top" align="center">1-7</td>
<td valign="top" align="center">&gt;=0.5</td>
<td valign="top" align="center">LightGBM12</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3_4">
<label>3.4</label>
<title>STL pre-processing methods</title>
<p>The time-series flow data were divided into a 60% training set and a 40% test set. The effects of different STL application scenarios on the performance of the machine learning model were compared. The following scenarios were set (<xref ref-type="fig" rid="f1">
<bold>Figures&#xa0;1</bold>
</xref>, <xref ref-type="fig" rid="f5">
<bold>5</bold>
</xref>).</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>Structure of the technique used in this study.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-12-1540912-g005.tif"/>
</fig>
<p>Scenario 1: The original data set was divided into a training set and test set, and the training set and test set were processed into input and output subsequence through the time SW. The training set and test set were decomposed by STL. The historical flow (<inline-formula>
<mml:math display="inline" id="im22">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mo>&#x2207;</mml:mo>
<mml:mi>d</mml:mi>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#xa0;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> to <inline-formula>
<mml:math display="inline" id="im23">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>) and decomposition terms (trend term, seasonal term, and residual term) were used as input variables of the model, and the future flow (<inline-formula>
<mml:math display="inline" id="im24">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>+</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#x2207;</mml:mo>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>) was used as the output variable to train and test the model (abbreviated as S1).</p>
<p>Scenario 2: The original data set was divided into a training set and test set, and the training set and test set were processed into input and output subsequence through the time sliding window. For the training set and test set, each sample pair was decomposed by STL. Its historical flow (<inline-formula>
<mml:math display="inline" id="im25">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mo>&#x2207;</mml:mo>
<mml:mi>d</mml:mi>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#xa0;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> to <inline-formula>
<mml:math display="inline" id="im26">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>) and its decomposition term were taken as input variables of the model, and its future flow (<inline-formula>
<mml:math display="inline" id="im27">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>+</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#x2207;</mml:mo>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>) was used as output variable to train and test the model (abbreviated as S2). As shown in <xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref>, during the training process, the model can only use historical data from the training set and does not access any information from the test set. In the testing phase, the trained model is used exclusively for prediction without refitting or adjusting parameters based on the test data, thereby preventing information leakage.</p>
<p>Another scenario decomposed the original data set into new data by STL. The data generated by STL were divided into a training set and a test set. The model was trained and tested by using the time SW as the input and output subsequence. The results of this scheme and scenario 1 were similar and will not be repeated in this study.</p>
</sec>
</sec>
<sec id="s4" sec-type="results">
<label>4</label>
<title>Results</title>
<sec id="s4_1" sec-type="results">
<label>4.1</label>
<title>Results of time autocorrelation</title>
<p>For a time series, whether the data series has time autocorrelation or not should be determined (<xref ref-type="bibr" rid="B3">Apaydin et&#xa0;al., 2021</xref>). <xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4A</bold>
</xref> shows the autocorrelation diagram of the inflow, wherein the horizontal axis represents the number of delay periods (days), and the longitudinal axis represents the autocorrelation coefficient. This is located on one side of the zero axis for a long time, which is a typical characteristic of a monotone trend series. At the same time, there is an obvious fluctuation pattern, which is typical of strong autocorrelation of a time series with periodic variation. The time series is also a non-stationary series that contained a trend, seasonal, or periodic series. We used the STL decomposition algorithm to extract the time characteristics in preparation for the establishment of the machine learning model.</p>
</sec>
<sec id="s4_2">
<label>4.2</label>
<title>STL analysis</title>
<p>In this study, STL was used to improve the prediction potential of the model. <xref ref-type="fig" rid="f6">
<bold>Figure&#xa0;6</bold>
</xref> shows the decomposition results using the STL method of the data series of the Yellow River flow data. As shown in <xref ref-type="fig" rid="f6">
<bold>Figure&#xa0;6</bold>
</xref>, the seasonal component exhibits a clear and regular annual cycle, with runoff peaks typically occurring between June and September each year. This seasonal pattern is highly consistent with the hydrological cycle of the Yellow River Basin and is closely related to the annual water and sediment regulation measures (<xref ref-type="bibr" rid="B18">Jia and Yi, 2023</xref>; <xref ref-type="bibr" rid="B62">Zhang et&#xa0;al., 2021</xref>). Notably, the seasonal component remains relatively stable across different years and was hardly affected by extreme events, indicating that the STL method demonstrates high accuracy and robustness in extracting seasonality from time series data. From 2015 to 2017, the trend component shows a significant downward trend. This change in trend corresponds to fluctuations in the residual component, suggesting that the fundamental pattern of runoff in the Yellow River may have undergone changes during this period. The residual component captures abnormal variations beyond the trend and seasonality. During extreme runoff events, the residuals exhibit marked deviations, especially from June to September each year, showing sharp fluctuations.</p>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>Decomposed daily runoff using the STL method.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-12-1540912-g006.tif"/>
</fig>
</sec>
<sec id="s4_3">
<label>4.3</label>
<title>Model cross-validation</title>
<p>Cross-validation algorithm was employed, by dividing the whole dataset to 5-sub classes, to check the accuracy and robustness of the models. The cross-validation process was consistent across different models, and the results were similar. Therefore, the analysis was focused solely on the S2-LightGBM8 model. <xref ref-type="fig" rid="f7">
<bold>Figure&#xa0;7A</bold>
</xref> illustrates the 5-fold cross-validation process of the TPE algorithm for searching the optimal structure of the S2&#x2013;LightGBM8 model. The lowest RMSE value was recorded in the first fold. Among different lead times, the RMSE values of L1 were the lowest, indicating the best performance in short-term prediction. The RMSE values of L4 were moderate, while L7 exhibited high RMSE values, suggesting that prediction accuracy decreased as the lead time increased. <xref ref-type="fig" rid="f7">
<bold>Figure&#xa0;7B</bold>
</xref> shows the variation of RMSE values with iteration under different lead times. After the Fifteenth iteration, a significantly decrease in RMSE was observed, followed by a stable trend with further iterations. The decline in RMSE values indicated the high efficiency of the TPE algorithm in optimizing parameters and tuning the structure of the S2&#x2013;LightGBM8 model.</p>
<fig id="f7" position="float">
<label>Figure&#xa0;7</label>
<caption>
<p>Cross-validation results of the S2-LightGBM8 model: <bold>(A)</bold> RMSE by number of folds; <bold>(B)</bold> RMSE variation with iterations. L1, L4, and L7 respectively represent 1-day lead time, 4-day lead time, and 7-day lead time.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-12-1540912-g007.tif"/>
</fig>
</sec>
<sec id="s4_4">
<label>4.4</label>
<title>Model performance under different SW</title>
<p>The prediction performance of the original model (LightGBM) and the STL&#x2013;LightGBM model under different SW (LightGBM6, STL&#x2013;LightGBM6, LightGBM8, STL&#x2013;LightGBM8, LightGBM12, STL&#x2013;LightGBM12) were compared, and the best SW of the model was determined based on the NSE and RMSE. The results showed that the prediction performance of the LightGBM model and STL&#x2013;LightGBM varied with the size of SW (<xref ref-type="table" rid="T2">
<bold>Tables&#xa0;2</bold>
</xref>, <xref ref-type="table" rid="T3">
<bold>3</bold>
</xref>). When the lead time of S1&#x2013;STL&#x2013;LightGBM was less than 2 days, the S1&#x2013;STL&#x2013;LightGBM8 performed better. For example, when the lead time was 2 days, the NSE (RMSE) of S1&#x2013;STL&#x2013;LightGBM8 was 0.954 (211.342), which was better than that of the other SW models. When the lead time was 3&#x2013;5 days, S1&#x2013;STL&#x2013;LightGBM6 had the best performance, with the best values of NSE (0.941, 0.908, and 0.845) and RMSE (239.763, 299.538, and 389.868). The overall performance of the model was reflected by the mean value. S1&#x2013;STL&#x2013;LightGBM6 had the highest average NSE (0.869) and the lowest average RMSE (334.883). S2&#x2013;STL&#x2013;LightGBM was similar to S1&#x2013;STL&#x2013;LightGBM. When the SW was 8 days, the performance was slightly better when predicting short-term flow (lead time: 1&#x2013;4 days), but the performance decreased with the increase in the lead time. It is observed that the overall effect of STL&#x2013;LightGBM8 was better than the other SW models in S2 (<xref ref-type="fig" rid="f8">
<bold>Figure&#xa0;8</bold>
</xref>), and the model was more robust. In addition, according to the results of the autocorrelation analysis (<xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4</bold>
</xref>), when the prediction factor (input) was closer to the target variable (output), the contribution of the model was greater, and there was a higher autocorrelation. This result was similar to that of <xref ref-type="bibr" rid="B5">Chen et&#xa0;al. (2020)</xref>. The results also showed that the flow of the Yellow River into the sea could be predicted by time autoregressive machine learning based on a single variable.</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Performance statistics using LightGBM and S1-STL&#x2013;LightGBM for predicting flow at 1 to 7 days ahead during the testing period.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" rowspan="2" align="center">Metric</th>
<th valign="middle" rowspan="2" align="center">Model</th>
<th valign="middle" colspan="7" align="center">Lead time (days)</th>
</tr>
<tr>
<th valign="middle" align="center">1</th>
<th valign="middle" align="center">2</th>
<th valign="middle" align="center">3</th>
<th valign="middle" align="center">4</th>
<th valign="middle" align="center">5</th>
<th valign="middle" align="center">6</th>
<th valign="middle" align="center">7</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="6" align="center">NSE</td>
<td valign="middle" align="center">LightGBM6</td>
<td valign="middle" align="center">0.931</td>
<td valign="middle" align="center">0.855</td>
<td valign="middle" align="center">0.766</td>
<td valign="middle" align="center">0.685</td>
<td valign="middle" align="center">0.608</td>
<td valign="middle" align="center">0.545</td>
<td valign="middle" align="center">0.460</td>
</tr>
<tr>
<td valign="middle" align="center">STL-LightGBM6</td>
<td valign="middle" align="center">0.966</td>
<td valign="middle" align="center">0.953</td>
<td valign="middle" align="center">0.941</td>
<td valign="middle" align="center">0.908</td>
<td valign="middle" align="center">0.845</td>
<td valign="middle" align="center">0.772</td>
<td valign="middle" align="center">0.696</td>
</tr>
<tr>
<td valign="middle" align="center">LightGBM8</td>
<td valign="middle" align="center">0.930</td>
<td valign="middle" align="center">0.837</td>
<td valign="middle" align="center">0.743</td>
<td valign="middle" align="center">0.675</td>
<td valign="middle" align="center">0.585</td>
<td valign="middle" align="center">0.524</td>
<td valign="middle" align="center">0.444</td>
</tr>
<tr>
<td valign="middle" align="center">STL-LightGBM8</td>
<td valign="middle" align="center">0.967</td>
<td valign="middle" align="center">0.954</td>
<td valign="middle" align="center">0.924</td>
<td valign="middle" align="center">0.894</td>
<td valign="middle" align="center">0.837</td>
<td valign="middle" align="center">0.771</td>
<td valign="middle" align="center">0.717</td>
</tr>
<tr>
<td valign="middle" align="center">LightGBM12</td>
<td valign="middle" align="center">0.922</td>
<td valign="middle" align="center">0.845</td>
<td valign="middle" align="center">0.718</td>
<td valign="middle" align="center">0.665</td>
<td valign="middle" align="center">0.606</td>
<td valign="middle" align="center">0.485</td>
<td valign="middle" align="center">0.422</td>
</tr>
<tr>
<td valign="middle" align="center">STL-LightGBM12</td>
<td valign="middle" align="center">0.963</td>
<td valign="middle" align="center">0.950</td>
<td valign="middle" align="center">0.929</td>
<td valign="middle" align="center">0.896</td>
<td valign="middle" align="center">0.830</td>
<td valign="middle" align="center">0.755</td>
<td valign="middle" align="center">0.660</td>
</tr>
<tr>
<td valign="middle" rowspan="6" align="center">RMSE</td>
<td valign="middle" align="center">LightGBM6</td>
<td valign="middle" align="center">260.377</td>
<td valign="middle" align="center">376.581</td>
<td valign="middle" align="center">478.825</td>
<td valign="middle" align="center">555.302</td>
<td valign="middle" align="center">619.537</td>
<td valign="middle" align="center">667.800</td>
<td valign="middle" align="center">727.755</td>
</tr>
<tr>
<td valign="middle" align="center">STL-LightGBM6</td>
<td valign="middle" align="center">181.516</td>
<td valign="middle" align="center">214.669</td>
<td valign="middle" align="center">239.763</td>
<td valign="middle" align="center">299.538</td>
<td valign="middle" align="center">389.868</td>
<td valign="middle" align="center">472.955</td>
<td valign="middle" align="center">545.873</td>
</tr>
<tr>
<td valign="middle" align="center">LightGBM8</td>
<td valign="middle" align="center">261.633</td>
<td valign="middle" align="center">399.182</td>
<td valign="middle" align="center">502.262</td>
<td valign="middle" align="center">564.332</td>
<td valign="middle" align="center">637.923</td>
<td valign="middle" align="center">683.061</td>
<td valign="middle" align="center">738.251</td>
</tr>
<tr>
<td valign="middle" align="center">STL-LightGBM8</td>
<td valign="middle" align="center">179.635</td>
<td valign="middle" align="center">211.342</td>
<td valign="middle" align="center">273.444</td>
<td valign="middle" align="center">322.232</td>
<td valign="middle" align="center">399.969</td>
<td valign="middle" align="center">473.608</td>
<td valign="middle" align="center">526.772</td>
</tr>
<tr>
<td valign="middle" align="center">LightGBM12</td>
<td valign="middle" align="center">276.437</td>
<td valign="middle" align="center">389.842</td>
<td valign="middle" align="center">525.924</td>
<td valign="middle" align="center">572.863</td>
<td valign="middle" align="center">621.572</td>
<td valign="middle" align="center">710.940</td>
<td valign="middle" align="center">752.975</td>
</tr>
<tr>
<td valign="middle" align="center">STL-LightGBM12</td>
<td valign="middle" align="center">190.956</td>
<td valign="middle" align="center">221.322</td>
<td valign="middle" align="center">264.687</td>
<td valign="middle" align="center">318.871</td>
<td valign="middle" align="center">408.116</td>
<td valign="middle" align="center">490.345</td>
<td valign="middle" align="center">577.543</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Performance statistics using LightGBM and S2-STL&#x2013;LightGBM for predicting flow at 1 to 7 days ahead during the testing period.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" rowspan="2" align="center">Metric</th>
<th valign="middle" rowspan="2" align="center">Model</th>
<th valign="middle" colspan="7" align="center">Lead time (days)</th>
</tr>
<tr>
<th valign="middle" align="center">1</th>
<th valign="middle" align="center">2</th>
<th valign="middle" align="center">3</th>
<th valign="middle" align="center">4</th>
<th valign="middle" align="center">5</th>
<th valign="middle" align="center">6</th>
<th valign="middle" align="center">7</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="6" align="center">NSE</td>
<td valign="middle" align="center">LightGBM6</td>
<td valign="middle" align="center">0.931</td>
<td valign="middle" align="center">0.855</td>
<td valign="middle" align="center">0.766</td>
<td valign="middle" align="center">0.685</td>
<td valign="middle" align="center">0.608</td>
<td valign="middle" align="center">0.545</td>
<td valign="middle" align="center">0.460</td>
</tr>
<tr>
<td valign="middle" align="center">STL-LightGBM6</td>
<td valign="middle" align="center">0.898</td>
<td valign="middle" align="center">0.774</td>
<td valign="middle" align="center">0.724</td>
<td valign="middle" align="center">0.693</td>
<td valign="middle" align="center">0.675</td>
<td valign="middle" align="center">0.638</td>
<td valign="middle" align="center">0.554</td>
</tr>
<tr>
<td valign="middle" align="center">LightGBM8</td>
<td valign="middle" align="center">0.930</td>
<td valign="middle" align="center">0.837</td>
<td valign="middle" align="center">0.743</td>
<td valign="middle" align="center">0.675</td>
<td valign="middle" align="center">0.585</td>
<td valign="middle" align="center">0.524</td>
<td valign="middle" align="center">0.444</td>
</tr>
<tr>
<td valign="middle" align="center">STL-LightGBM8</td>
<td valign="middle" align="center">0.906</td>
<td valign="middle" align="center">0.834</td>
<td valign="middle" align="center">0.776</td>
<td valign="middle" align="center">0.710</td>
<td valign="middle" align="center">0.653</td>
<td valign="middle" align="center">0.614</td>
<td valign="middle" align="center">0.549</td>
</tr>
<tr>
<td valign="middle" align="center">LightGBM12</td>
<td valign="middle" align="center">0.922</td>
<td valign="middle" align="center">0.845</td>
<td valign="middle" align="center">0.718</td>
<td valign="middle" align="center">0.665</td>
<td valign="middle" align="center">0.606</td>
<td valign="middle" align="center">0.485</td>
<td valign="middle" align="center">0.422</td>
</tr>
<tr>
<td valign="middle" align="center">STL-LightGBM12</td>
<td valign="middle" align="center">0.868</td>
<td valign="middle" align="center">0.728</td>
<td valign="middle" align="center">0.676</td>
<td valign="middle" align="center">0.584</td>
<td valign="middle" align="center">0.614</td>
<td valign="middle" align="center">0.603</td>
<td valign="middle" align="center">0.586</td>
</tr>
<tr>
<td valign="middle" rowspan="6" align="center">RMSE</td>
<td valign="middle" align="center">LightGBM6</td>
<td valign="middle" align="center">260.377</td>
<td valign="middle" align="center">376.581</td>
<td valign="middle" align="center">478.825</td>
<td valign="middle" align="center">555.302</td>
<td valign="middle" align="center">619.537</td>
<td valign="middle" align="center">667.800</td>
<td valign="middle" align="center">727.755</td>
</tr>
<tr>
<td valign="middle" align="center">STL-LightGBM6</td>
<td valign="middle" align="center">316.467</td>
<td valign="middle" align="center">470.386</td>
<td valign="middle" align="center">519.787</td>
<td valign="middle" align="center">548.182</td>
<td valign="middle" align="center">564.132</td>
<td valign="middle" align="center">596.041</td>
<td valign="middle" align="center">661.442</td>
</tr>
<tr>
<td valign="middle" align="center">LightGBM8</td>
<td valign="middle" align="center">261.633</td>
<td valign="middle" align="center">399.182</td>
<td valign="middle" align="center">502.262</td>
<td valign="middle" align="center">564.332</td>
<td valign="middle" align="center">637.923</td>
<td valign="middle" align="center">683.061</td>
<td valign="middle" align="center">738.251</td>
</tr>
<tr>
<td valign="middle" align="center">STL-LightGBM8</td>
<td valign="middle" align="center">304.129</td>
<td valign="middle" align="center">403.531</td>
<td valign="middle" align="center">469.065</td>
<td valign="middle" align="center">533.411</td>
<td valign="middle" align="center">583.064</td>
<td valign="middle" align="center">615.244</td>
<td valign="middle" align="center">664.881</td>
</tr>
<tr>
<td valign="middle" align="center">LightGBM12</td>
<td valign="middle" align="center">276.437</td>
<td valign="middle" align="center">389.842</td>
<td valign="middle" align="center">525.924</td>
<td valign="middle" align="center">572.863</td>
<td valign="middle" align="center">621.572</td>
<td valign="middle" align="center">710.940</td>
<td valign="middle" align="center">752.975</td>
</tr>
<tr>
<td valign="middle" align="center">STL-LightGBM12</td>
<td valign="middle" align="center">360.064</td>
<td valign="middle" align="center">516.345</td>
<td valign="middle" align="center">564.006</td>
<td valign="middle" align="center">638.719</td>
<td valign="middle" align="center">615.624</td>
<td valign="middle" align="center">624.365</td>
<td valign="middle" align="center">637.180</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="f8" position="float">
<label>Figure&#xa0;8</label>
<caption>
<p>Performance of the projected LightGBM and STL&#x2013;LightGBM models. X-axis (6, 8, and 12) represents the size of the sliding window.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-12-1540912-g008.tif"/>
</fig>
</sec>
<sec id="s4_5">
<label>4.5</label>
<title>Comparison of prediction results at different lead times</title>
<p>To compare the effects of different lead times on the prediction performance of models, <xref ref-type="fig" rid="f9">
<bold>Figure&#xa0;9</bold>
</xref> shows the prediction results of the model with a window size of 8 and the lead time of 1, 4, and 7 days, respectively. When the lead time was 1 day (<xref ref-type="fig" rid="f9">
<bold>Figure&#xa0;9A</bold>
</xref>), the predicted values of the three models fitted well with the observed values, and the overall fitting effect was satisfactory. The results showed that the three models were able to use the historical flow for 8 days as an input variable to predict the flow into the sea in the next 1-day period. <xref ref-type="fig" rid="f9">
<bold>Figure&#xa0;9B</bold>
</xref> shows the fit of the predicted value of the model with the real value with a lead time of 4 days. The prediction results were worse than those with a forecast period of 1 day, and the original model (LightGBM) was also the worst of all the models. The prediction with low inflow was better than that with high inflow. Compared with other forecast periods, the prediction effect of the 7-day lead time was the worst, especially the prediction near the peak, where most of the sample forecasts underestimated the observed value (<xref ref-type="fig" rid="f9">
<bold>Figure&#xa0;9C</bold>
</xref>). The results showed that the performance of all the models decreased in varying degrees with an increase in the lead time. In other words, the accuracy of models in predicting the inflow of the Yellow River into the sea in the coming 7 days was lower than that for the next 1-day period.</p>
<fig id="f9" position="float">
<label>Figure&#xa0;9</label>
<caption>
<p>Predicted and observed runoff time series with prediction lead times ranging from 1 to 7 days using LightGBM and STL&#x2013;LightGBM during the testing period: <bold>(A)</bold> 1-day lead time; <bold>(B)</bold> 4-day lead time; and <bold>(C)</bold> 7-day lead time.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-12-1540912-g009.tif"/>
</fig>
<p>Taylor diagrams of three models were drawn to measure the changes in model performance with different lead times (<xref ref-type="fig" rid="f10">
<bold>Figure&#xa0;10</bold>
</xref>). Taylor diagrams are often used to evaluate the accuracy of models, and the commonly used accuracy indicators are the correlation coefficient, standard deviation, and RMSE. As shown in <xref ref-type="fig" rid="f10">
<bold>Figure&#xa0;10A</bold>
</xref>, the distance on the diagram of the three models was relatively close, the results were good, and the correlation coefficients all reached more than 0.95. For the predicted value, the standard deviation was close to 1, indicating that the performance was relatively stable. <xref ref-type="fig" rid="f10">
<bold>Figure&#xa0;10A</bold>
</xref> shows that all three models could well predict the inflow into the sea in the next 1-day period. However, with the extension of the lead time, the scatter points of the three models become dispersed, indicating a decline in their predictive performance (<xref ref-type="fig" rid="f10">
<bold>Figures&#xa0;10B, C</bold>
</xref>). Especially when the lead time was 7 days, the correlation coefficient of the original model (LightGBM) was less than 0.75, indicating that the original model could not predict the sea flow on the seventh day.</p>
<fig id="f10" position="float">
<label>Figure&#xa0;10</label>
<caption>
<p>Taylor diagram of the model performance: <bold>(A)</bold> 1-day lead time; <bold>(B)</bold> 4-day lead time; and <bold>(C)</bold> 7-day lead time. The scatter in the Taylor diagram represents the model, the radiation represents the correlation coefficient, the horizontal and vertical axes represent the standard deviation, and the dotted line represents the root mean square error.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-12-1540912-g010.tif"/>
</fig>
</sec>
<sec id="s4_6">
<label>4.6</label>
<title>Comparison of prediction performance between STL&#x2013;LightGBM and LightGBM</title>
<p>From the above analysis and discussion, STL technology was observed to have significantly improved the prediction ability of the LightGBM model. In this section, we discuss the improvement of the original model in different scenarios and the reasons for it. We found that the performance of the model decreased with the growth of the lead time, but the decline in the S1&#x2013;STL&#x2013;LightGBM performance was less than that of the original model. From the radar diagram (<xref ref-type="fig" rid="f11">
<bold>Figure&#xa0;11</bold>
</xref>), we can observe that with the extension of the lead time, the improvement of the LightGBM model by S1&#x2013;STL increased gradually. At the lead time reached seven days, the NSE of S1&#x2013;STL was approximately 0.3 higher than that of the LightGBM model. In addition, we can observe that when the lead time was 4 days and 7 days, the S1&#x2013;STL&#x2013;LightGBM was much better than the original model (<xref ref-type="fig" rid="f8">
<bold>Figures&#xa0;8</bold>
</xref>, <xref ref-type="fig" rid="f9">
<bold>9</bold>
</xref>). This also confirms that the S1&#x2013;STL scheme can effectively improve the long-term prediction ability of the original model.</p>
<fig id="f11" position="float">
<label>Figure&#xa0;11</label>
<caption>
<p>Radar diagram of the model performance for Scenarios 1 <bold>(A)</bold> and Scenarios 2 <bold>(B)</bold>. The dotted line represents the NSE difference between the STL&#x2013;LightGBM model and the LightGBM model.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-12-1540912-g011.tif"/>
</fig>
<p>The promotion ability of S2&#x2013;STL was, however, different from that of S1&#x2013;STL. Through <xref ref-type="fig" rid="f11">
<bold>Figure&#xa0;11</bold>
</xref>, we can clearly observe that S2&#x2013;STL did not improve the prediction ability of the original model 3 days before the lead time and it reduced the accuracy. When the lead time reached the fourth day, the prediction ability of S2&#x2013;STL was greater than that of the original model. For example, when the forecast period was 7 days, the S2&#x2013;STL&#x2013;LightGBM8 increased by 0.105 compared with the NSE of the LightGBM8 model (<xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref>), and the result was satisfactory. From the above results, we can also conclude that both S1&#x2013;STL and S2&#x2013;STL improved the long-term prediction ability of the original model. In other words, STL can help address the problem that the performance of the model decreases as the lead time increases.</p>
</sec>
</sec>
<sec id="s5" sec-type="discussion">
<label>5</label>
<title>Discussion</title>
<sec id="s5_1">
<label>5.1</label>
<title>Effect of different lead times on model performance</title>
<p>In general, the prediction performance of inflow models decreases as the lead time increases, which is also the case in our study. <xref ref-type="fig" rid="f8">
<bold>Figures&#xa0;8</bold>
</xref>, <xref ref-type="fig" rid="f9">
<bold>9</bold>
</xref> clearly show that all models could predict flow for 1-day lead time. However, with the increase of the lead time, the prediction ability of models declined. This was because when the lead time was short, there was a simple linear relationship between t day and <inline-formula>
<mml:math display="inline" id="im28">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>: <inline-formula>
<mml:math display="inline" id="im29">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mo>&#x2207;</mml:mo>
<mml:mi>d</mml:mi>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> flow into the sea, and thus all the models could predict the 1-day flow (<inline-formula>
<mml:math display="inline" id="im30">
<mml:mi>t</mml:mi>
</mml:math>
</inline-formula> day). Therefore, there was no significant difference in the prediction effect of different models with a 1-day lead time.</p>
<p>However, with the extension of the lead time, the autocorrelation of time flow series weakened rapidly, the correlation between <inline-formula>
<mml:math display="inline" id="im31">
<mml:mrow>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mo>&#x2207;</mml:mo>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>: <inline-formula>
<mml:math display="inline" id="im32">
<mml:mrow>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im33">
<mml:mrow>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mo>&#x2207;</mml:mo>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> became complex and nonlinear, and the performance of the model decreased. The inflow of the Yellow River into the sea is affected by the climate of the mainstream of the Yellow River every year, and the annual rainfall is different as is the inflow into the sea (<xref ref-type="bibr" rid="B50">Wang et&#xa0;al., 2024</xref>; <xref ref-type="bibr" rid="B48">Wang and Sun, 2021</xref>). The flow of the Yellow River into the sea is also influenced by human activities (<xref ref-type="bibr" rid="B39">Shi et&#xa0;al., 2019</xref>), such as reservoir regulation, urbanization, agricultural practice, soil and water conservation measures, and mining (<xref ref-type="bibr" rid="B10">Dou et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B43">Wang and Cheng, 2022</xref>; <xref ref-type="bibr" rid="B54">Wu X, et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B56">Xin and Liu, 2022</xref>; <xref ref-type="bibr" rid="B61">Yu et&#xa0;al., 2021</xref>). We assumed that obtaining more data through various techniques, such as upstream reservoir operation, precipitation, may improve the predictive performance of the model.</p>
</sec>
<sec id="s5_2">
<label>5.2</label>
<title>Data leakage in time series preprocessing</title>
<p>STL decomposition, as a classical time series preprocessing method, relies on global trend fitting and seasonal smoothing. Studies have pointed out that any smoothing operation applied to the entire test set may introduce data leakage during the testing phase (<xref ref-type="bibr" rid="B59">Yang X. Y. et&#xa0;al., 2024</xref>). Therefore, if STL decomposition is performed on the entire test set at once during testing, the decomposition result of a given sample may be influenced by future observations. This essentially introduces future information into the model, which is equivalent to the model &#x201c;seeing the future&#x201d; during training or testing. Such a practice violates the fundamental assumption of causality in time series forecasting, which requires models to be trained solely on historical data, and may thus result in misleading evaluations of model performance (<xref ref-type="bibr" rid="B32">Qian et&#xa0;al., 2019</xref>). However, many existing studies have not fully recognized the potential data leakage issues arising from using decomposition methods such as STL on the entire test set, which may lead to overestimation of the improvements these methods bring to machine learning models (<xref ref-type="bibr" rid="B3">Apaydin et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B7">Chen Z, et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B65">Zuo et&#xa0;al., 2020</xref>).</p>
<p>To address this issue, we proposed a stepwise decomposition strategy (S2&#x2013;STL), which ensured that, at each time point, STL decomposition only utilized the current and past observations to extract trend and seasonal components. This guaranteed that the generated features did not contain any future information and strictly adhered to the causality constraints required for time series modeling (see <xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref>). Theoretically, S2&#x2013;STL could be regarded as a recursive adaptation of STL&#x2019;s local smoothing philosophy, where the sliding window moved forward with time, relying solely on historical observations to simulate the real information boundaries in forecasting tasks. Compared with global decomposition approaches, stepwise decomposition offered distinct advantages in model generalization and stability. This strategy has already been validated in meteorological time series (<xref ref-type="bibr" rid="B49">Wang and Wu, 2016</xref>) and hydrological runoff forecasting (<xref ref-type="bibr" rid="B34">Quilty and Adamowski, 2018</xref>).</p>
<p>Results demonstrated significant differences in model performance between S1&#x2013;STL and S2&#x2013;STL. In S1&#x2013;STL, STL was first applied to the entire test set before generating input-output pairs through a sliding window, which meant that each input variable&#x2019;s trend, seasonal, and residual components may implicitly contain future information from the target variable. This process provided the model with prior signals that were unavailable in actual forecasting scenarios, resulting in systematically underestimated test errors and overestimated generalization capabilities. Similarly, seasonality patterns extracted from the full dataset reduce the learning burden for the model, thereby exaggerating the performance gains attributed to STL decomposition. In contrast, S2&#x2013;STL performed recursive decomposition based solely on historical data, effectively preventing future information leakage and providing an accurate representation of the decomposition strategy&#x2019;s true utility in real-world forecasting tasks.</p>
<p>In conclusion, S2&#x2013;STL not only adhered strictly to the causality principle inherent in time series forecasting but also effectively mitigated the risks of information leakage due to improper preprocessing, making it a rigorous and reliable strategy for time series decomposition.</p>
</sec>
<sec id="s5_3">
<label>5.3</label>
<title>The role of STL in predicting inflow into the sea</title>
<p>This study used the STL method for time series decomposition. Although methods like empirical mode decomposition (EMD), SSA, and wavelet transform (WT) are also widely applied, they differ significantly from STL in terms of decomposition logic and applicability. EMD often produces unstable decomposition results when handling time series with strong trends and is not suitable for extracting long-term trends (<xref ref-type="bibr" rid="B59">Yang X. Y. et&#xa0;al., 2024</xref>). SSA is computationally intensive, primarily used for signal denoising rather than specifically designed for seasonal decomposition, and requires complex hyperparameter adjustments. While WT is suitable for non-stationary data, the choice of wavelet basis significantly impacts the decomposition results (<xref ref-type="bibr" rid="B34">Quilty and Adamowski, 2018</xref>), adding complexity to its application. In contrast, STL provides a clear mathematical formulation, does not rely on parameter selection, is applicable to data of different time scales, and can reliably decompose trend and seasonal components. Regarding data leakage, this study specifically investigated the potential data leakage when time series decomposition methods were combined with machine learning models. Our research found that unreasonable decomposition strategies may lead to data leakage. S2&#x2013;STL avoided data leakage through stepwise decomposition, offering a rigorous and practically applicable decomposition strategy for forecasting tasks. While other decomposition methods follow different mechanisms that may lead to different data leakage patterns and require further research.</p>
<p>Through the pre-processing of the time series (<xref ref-type="bibr" rid="B3">Apaydin et&#xa0;al., 2021</xref>), we observed that the inflow data of the Yellow River into the sea was a non-stationary time series, which contained trend, seasonality, or periodicity. STL decomposed the original dataset into trend items, seasonal terms, and residual terms based on loess. These data combined with historical observation data made the characteristics of the input samples more abundant. STL was very resilient to outliers in the inflow data, resulting in a robust component subseries. The robustness of components could be translated into enhanced prediction accuracy for these subseries of prediction methods. The newly generated series reflected the seasonality and trend characteristics of the original data, and then improved the prediction ability of the model (<xref ref-type="bibr" rid="B12">He et&#xa0;al., 2021</xref>).</p>
<p>Inflow exhibits distinct periodic variations, and extracting the seasonal component helps the model better capture cyclic patterns, reducing prediction errors caused by periodic changes in the data. The trend component reflects the long-term variations in inflow, providing LightGBM with smooth and stable input features. In long-term forecasting tasks with extended lead times, the original time series may exhibit significant fluctuations. Extracting the trend component helps mitigate short-term disturbances, enhancing the model&#x2019;s robustness in long-term predictions. This explains why both S1&#x2013;STL and S2&#x2013;STL outperform the original LightGBM model in long-term forecasting. The residual component contains non-periodic, random fluctuations. If not properly handled, these residuals may introduce noise and affect the model&#x2019;s generalization ability. However, STL decomposition effectively separates trend and seasonal signals, allowing LightGBM to focus on learning more representative residual information. This reduces the impact of random fluctuations on the model and enhances its accuracy in long-term predictions.</p>
<p>The difference in the STL decomposition term between S1 and S2 was analyzed from the point of view of the test set value. As shown in <xref ref-type="fig" rid="f12">
<bold>Figure&#xa0;12</bold>
</xref>, we observed that when the input variable of the original test set (test set of LightGBM) was close to the target variable, the correlation was strong, which was consistent with the previous ACF results (<xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4</bold>
</xref>). The correlation between the seasonal items of S1&#x2013;STL and the target variable (output) in each lag time was greater than that of historical observation data. It is obviously unreasonable that the characteristics of the data in the sample are leaked owing to the overall STL decomposition of the test set. However, we observed that the correlation between the three STL decomposition items of S2&#x2013;STL and the target variable increased with the increase of leading time, and the correlation between the seasonal term and the target variable was less than the historical observation data, which may be due to the trend of STL degradation of the sample by loess. Although the improvement of S2&#x2013;STL was not as strong as that of S1&#x2013;STL, S2&#x2013;STL was more in line with the practical application of the model. To sum up, the forecasting ability of STL&#x2013;LightGBM was better than that of LightGBM, especially the forecast ability (NSE) over 7 days was improved by 0.1. Because STL can improve the problem that the performance of the model decreases with the increase of the lead time, STL can improve the machine learning ability of prediction of the Yellow River&#x2019;s inflow into the sea.</p>
<fig id="f12" position="float">
<label>Figure&#xa0;12</label>
<caption>
<p>Correlation diagram between decomposition term of STL and the target variable.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-12-1540912-g012.tif"/>
</fig>
</sec>
<sec id="s5_4">
<label>5.4</label>
<title>Advantages of STL&#x2013;LightGBM in predicting coastal inflow</title>
<p>The inflow of the Yellow River plays a crucial role in shaping the offshore ecosystem&#x2019;s health of the Bohai Sea. As shown in <xref ref-type="fig" rid="f6">
<bold>Figure&#xa0;6</bold>
</xref>, due to the short-term interruption of Water-Sediment Regulation Scheme (WSRS) from 2015 to 2017, the discharge into the sea showed a declining trend during this period (<xref ref-type="bibr" rid="B47">Wang J. J. et&#xa0;al., 2022</xref>). This further highlights the significant regulatory role of WSRS in the hydrological processes of the LYR. These changes also had a notable impact on the seasonal pattern of the Yellow River, altering its natural flow regime and affecting the timing and magnitude of inflow variations. During the WSRS, the Yellow River transported over 20% of the annual discharge and 60% of the annual sediment load to the YRE (<xref ref-type="bibr" rid="B27">Liu et&#xa0;al., 2012</xref>; <xref ref-type="bibr" rid="B23">Li and Sheng, 2011</xref>). This sudden large input has significantly altered the physical and chemical characteristics of the estuary, affecting the ecological balance of the Bohai Sea (<xref ref-type="bibr" rid="B60">Yang F. X.  et&#xa0;al., 2024</xref>). These ecological changes not only have a profound impact on the Bohai Sea&#x2019;s ecosystem but also directly affect the sustainable development of fisheries and regional environmental health.</p>
<p>Predicting river inflow into the sea is a key aspect of offshore environmental management (<xref ref-type="bibr" rid="B42">Vinayachandran et&#xa0;al., 2015</xref>). The volume of water and sediment discharged during WSRS greatly exceeds that of natural flood seasons (<xref ref-type="bibr" rid="B17">Ji et&#xa0;al., 2020</xref>), significantly affecting the spatiotemporal distribution of suspended sediment concentrations (<xref ref-type="bibr" rid="B25">Liu, 2015</xref>). The Bohai Sea faces several major ecological issues, such as eutrophication, hypoxia, and sediment pollution, all of which are closely related to river inflows. Additionally, river inflows can impact fish habitats and breeding conditions, and accurately predicting inflow variations can help adjust fishery harvesting plans to prevent overfishing. The STL&#x2013;LightGBM framework proposed in this study combines STL with the LightGBM model to extract key seasonal and trend patterns from time series data, significantly improving prediction accuracy.</p>
<p>The model proposed in this study used streamflow time series and its decomposition components as input variables, without relying on external factors such as regional meteorology, topography, soil, or human activities. This enhances the model&#x2019;s transferability across different regions. The results showed that the inflow data itself exhibits significant temporal autocorrelation (<xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4</bold>
</xref>), meaning that future streamflow can be effectively predicted based solely on past streamflow data. For example, in short-term flow forecasting for the Yellow River Basin, accurate predictions can be made using only the past 7&#x2013;14 days of streamflow data (<xref ref-type="bibr" rid="B45">Wang et&#xa0;al., 2025</xref>). Moreover, previous studies have also confirmed the feasibility of forecasting future runoff based solely on historical streamflow data (<xref ref-type="bibr" rid="B31">Parsaie et&#xa0;al., 2024</xref>; <xref ref-type="bibr" rid="B38">Shi et&#xa0;al., 2025</xref>; <xref ref-type="bibr" rid="B57">Xu et&#xa0;al., 2024</xref>). Therefore, as long as other regions or rivers have sufficient historical streamflow data, the method holds potential for other basins, but further validation is required.</p>
<p>This framework enables early prediction of ecological changes and provides scientific guidance for addressing issues such as hypoxia and eutrophication. Additionally, this prediction technology holds significant potential for offshore environmental monitoring and early warning systems, supporting the management of fisheries resources, water quality maintenance, and ecological protection. It provides a solid foundation for ensuring the long-term health and stability of the Bohai Sea&#x2019;s ecological environment.</p>
</sec>
</sec>
<sec id="s6" sec-type="conclusions">
<label>6</label>
<title>Conclusion</title>
<p>In this study, on the basis of the historical single variable of the sea inflow of the Yellow River, STL was used to improve the prediction effect of the machine learning model on future inflow. The main results were as follows:</p>
<list list-type="order">
<list-item>
<p>The LightGBM model could predict the recent flow based on the historical inflow of the Yellow River into the sea, and the prediction performance of LightGBM model decreased rapidly with the increase of the lead time. Taking LightGBM8 as an example, the NSEs of 1-, 4-, and 7-day (lead time) were 0.930, 0.675 and 0.444, respectively.</p>
</list-item>
<list-item>
<p>STL can improve the prediction ability of traditional machine learning models. In Scenario 2, when the lead time was 6 days and 7 days, the NSEs of STL&#x2013;LightGBM8 were 0.614 and 0.549, respectively, which are better than that of LightGBM. It is recommended that the STL preprocesses each sample of the test set because this is practical. STL pretreatment of the entire test set overestimated the true performance of the STL&#x2013;LightGBM.</p>
</list-item>
</list>
<p>This study conducted hydrological time series prediction based on data from the Lijin Hydrological Station and obtained several important conclusions. However, some limitations and areas for improvement remain: (1) Choice of decomposition methods: STL was used for time series decomposition, but it is not the only option. Future studies could explore alternatives such as EMD, SSA, or WT to assess their effectiveness in hydrological prediction. (2) Method generalizability: Although the method performed well on data from the Lijin Station, it should be tested on other stations to evaluate its adaptability under different hydrological conditions. (3) Optimization strategies: More advanced techniques, such as Bayesian optimization, could be used to fine-tune key parameters like window size and improve overall model performance. (4) Model extension: Future work could also explore deep learning models like LSTM and Transformer to further enhance predictive capability.</p>
</sec>
</body>
<back>
<sec id="s7" sec-type="data-availability">
<title>Data availability statement</title>
<p>Publicly available datasets were analyzed in this study. This data can be found here: <uri xlink:href="http://www.yrcc.gov.cn/">http://www.yrcc.gov.cn/</uri>.</p>
</sec>
<sec id="s8" sec-type="author-contributions">
<title>Author contributions</title>
<p>SW: Conceptualization, Data curation, Methodology, Software, Validation, Visualization, Writing &#x2013; original draft, Writing &#x2013; review &amp; editing. KY: Data curation, Methodology, Validation, Visualization, Writing &#x2013; original draft, Writing &#x2013; review &amp; editing. HP: Conceptualization, Data curation, Formal Analysis, Funding acquisition, Investigation, Methodology, Resources, Writing &#x2013; original draft, Writing &#x2013; review &amp; editing.</p>
</sec>
<sec id="s9" sec-type="funding-information">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research and/or publication of this article. This study was financially supported by programs of the National Natural Science Foundation of China (grant number U190621) and the Open Research Fund of State Key Laboratory of Simulation and Regulation of Water Cycle in River Basin, China Institute of Water Resources and Hydropower Research (grant number SKL2024YJZD02).</p>
</sec>
<sec id="s10" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s11" sec-type="ai-statement">
<title>Generative AI statement</title>
<p>The author(s) declare that no Generative AI was used in the creation of this manuscript.</p>
</sec>
<sec id="s12" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Abbasi</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Farokhnia</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Bahreinimotlagh</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Roozbahani</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>A hybrid of Random Forest and Deep Auto-Encoder with support vector regression methods for accuracy improvement and uncertainty reduction of long-term streamflow prediction</article-title>. <source>J. Hydrol.</source> <volume>597</volume>, <elocation-id>125717</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jhydrol.2020.125717</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Althoff</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Destouni</surname> <given-names>G.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Global patterns in water flux partitioning: Irrigated and rainfed agriculture drives asymmetrical flux to vegetation over runoff</article-title>. <source>One Earth</source> <volume>6</volume>, <fpage>1246</fpage>&#x2013;<lpage>1257</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.oneear.2023.08.002</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Apaydin</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Taghi Sattari</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Falsafian</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Prasad</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Artificial intelligence modelling integrated with Singular Spectral analysis and Seasonal-Trend decomposition using Loess approaches for streamflow predictions</article-title>. <source>J. Hydrol.</source> <volume>600</volume>, <elocation-id>126506</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jhydrol.2021.126506</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bai</surname> <given-names>J. H.</given-names>
</name>
<name>
<surname>Xiao</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>K. J.</given-names>
</name>
<name>
<surname>Gao</surname> <given-names>H. F.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Arsenic and heavy metal pollution in wetland soils from tidal freshwater and salt marshes before and after the flow-sediment regulation regime in the Yellow River Delta, China</article-title>. <source>J. Hydrol.</source> <volume>450-451</volume>, <fpage>244</fpage>&#x2013;<lpage>253</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jhydrol.2012.05.006</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>J. X.</given-names>
</name>
<name>
<surname>Han</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Gao</surname> <given-names>H. K.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Z. Q.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>The importance of short lag-time in the runoff forecasting model based on long short-term memory</article-title>. <source>J. Hydrol.</source> <volume>589</volume>, <elocation-id>125359</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jhydrol.2020.125359</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Ren</surname> <given-names>M. M.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>W.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Combining two-stage decomposition based machine learning methods for annual runoff forecasting</article-title>. <source>J. Hydrol.</source> <volume>603</volume>, <elocation-id>126945</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jhydrol.2021.126945</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Jiang</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>S. N.</given-names>
</name>
<name>
<surname>Lin</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Bychkov</surname> <given-names>I.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>A transfer Learning-Based LSTM strategy for imputing Large-Scale consecutive missing data and its application in a water quality prediction system</article-title>. <source>J. Hydrol.</source> <volume>602</volume>, <elocation-id>126573</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jhydrol.2021.126573</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cheng</surname> <given-names>X. Y.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>J. R.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>S. L.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Dynamic response of water flow and sediment transport off the Yellow River mouth to tides and waves in winter</article-title>. <source>Front. Mar. Sci.</source> <volume>10</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fmars.2023.1181347</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cleveland</surname> <given-names>R. B.</given-names>
</name>
<name>
<surname>Cleveland</surname> <given-names>W. S.</given-names>
</name>
</person-group> (<year>1990</year>). <article-title>STL: a seasonal-trend decomposition procedure based on Loess</article-title>. <source>J. Off. Stat.</source> <volume>6</volume>, <fpage>3</fpage>&#x2013;<lpage>73</lpage>.</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dou</surname> <given-names>X. Y.</given-names>
</name>
<name>
<surname>Guo</surname> <given-names>H. D.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Liang</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>X. T.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). <article-title>Dynamic landscapes and the influence of human activities in the Yellow River Delta wetland region</article-title>. <source>Sci. Total Environ.</source> <volume>899</volume>, <elocation-id>166239</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.scitotenv.2023.166239</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Du</surname> <given-names>K. C.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Lei</surname> <given-names>J. Q.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>The incorrect usage of singular spectral analysis and discrete wavelet transform in hybrid models to predict hydrological time series</article-title>. <source>J. Hydrol.</source> <volume>552</volume>, <fpage>44</fpage>&#x2013;<lpage>51</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jhydrol.2017.06.019</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>He</surname> <given-names>H. T.</given-names>
</name>
<name>
<surname>Gao</surname> <given-names>S. C.</given-names>
</name>
<name>
<surname>Jin</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Sato</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X. Y.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>A seasonal-trend decomposition-based dendritic neuron model for financial time series prediction</article-title>. <source>Appl. Soft Comput.</source> <volume>108</volume>, <elocation-id>107488</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.asoc.2021.107488</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>He</surname> <given-names>F. F.</given-names>
</name>
<name>
<surname>Wan</surname> <given-names>Q. J.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Y. Q.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X. Q.</given-names>
</name>
<name>
<surname>Feng</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Daily runoff prediction with a seasonal decomposition-based deep GRU method</article-title>. <source>Water</source> <volume>16</volume>, <elocation-id>618</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/w16040618</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname> <given-names>L. T.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>H. X.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>H. F.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>S. A.</given-names>
</name>
<name>
<surname>Guo</surname> <given-names>W. X.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Quantitatively linking ecosystem service functions with soil moisture and ecohydrology regimes in watershed</article-title>. <source>Sci. Total Environ.</source> <volume>955</volume>, <elocation-id>176866</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.scitotenv.2024.176866</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Hyndman</surname> <given-names>R. J.</given-names>
</name>
<name>
<surname>Athanasopoulos</surname> <given-names>G.</given-names>
</name>
</person-group> (<year>2018</year>). <source>Forecasting: Principles and Practice</source>. <edition>2nd Edn</edition> (<publisher-loc>Melbourne, VIC</publisher-loc>: <publisher-name>OTexts</publisher-name>). Available at: <uri xlink:href="https://otexts.com/fpp2/">https://otexts.com/fpp2/</uri> (Accessed <access-date>March 18, 2024</access-date>).</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jehanzaib</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Ali</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Kim</surname> <given-names>M. J.</given-names>
</name>
<name>
<surname>Kim</surname> <given-names>T.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Modeling hydrological non-stationarity to analyze environmental impacts on drought propagation</article-title>. <source>Atmos. Res.</source> <volume>286</volume>, <elocation-id>106699</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.atmosres.2023.106699</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ji</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Pan</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Impact of river discharge on hydrodynamics and sedimentary processes at yellow river delta</article-title>. <source>Mar. Geology</source> <volume>425</volume>, <elocation-id>106210</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.margeo.2020.106210</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jia</surname> <given-names>W. F.</given-names>
</name>
<name>
<surname>Yi</surname> <given-names>Y. J.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Numerical study of the water-sediment regulation scheme (WSRS) impact on suspended sediment transport in the Yellow River Estuary</article-title>. <source>Front. Mar. Sci.</source> <volume>10</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fmars.2023.1135118</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jiang</surname> <given-names>Z. F.</given-names>
</name>
<name>
<surname>Lu</surname> <given-names>B. H.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>Z. G.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>Y. R.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Comparison of process-driven SWAT model and data-driven machine learning techniques in simulating streamflow: a case study in the Fenhe River Basin</article-title>. <source>Sustainability</source> <volume>16</volume>, <elocation-id>6074</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/su16146074</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jiang</surname> <given-names>S. J.</given-names>
</name>
<name>
<surname>Zheng</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Solomatine</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Improving AI system awareness of geoscience knowledge: symbiotic integration of physical approaches and deep learning</article-title>. <source>Geophys. Res. Lett.</source> <volume>47</volume>, <elocation-id>e2020GL088229</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1029/2020GL088229</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Ke</surname> <given-names>G. L.</given-names>
</name>
<name>
<surname>Meng</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Finely</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>T. F.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Ma</surname> <given-names>W. D.</given-names>
</name>
<etal/>
</person-group>. (<year>2017</year>). &#x201c;<article-title>LightGBM: A highly efficient Gradient Boosting Decision Tree</article-title>,&#x201d; in <conf-name>Advances in Neural Information Processing Systems 30 (NIPS 2017)</conf-name>, ed <person-group person-group-type="editor">
<name>
<surname>Guyon</surname> <given-names>I.</given-names>
</name>
</person-group> (<publisher-loc>New York</publisher-loc>: <publisher-name>Curran Associates Inc Press</publisher-name>), <fpage>3149</fpage>&#x2013;<lpage>3157</lpage>.</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kratzert</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Klotz</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Herrnegger</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Sampson</surname> <given-names>A. K.</given-names>
</name>
<name>
<surname>Hochreiter</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Nearing</surname> <given-names>G. S.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Toward improved predictions in ungauged basins: Exploiting the power of machine learning</article-title>. <source>Water Resour. Res.</source> <volume>55</volume>, <fpage>11344</fpage>&#x2013;<lpage>11354</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1029/2019WR026065</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>G. Y.</given-names>
</name>
<name>
<surname>Sheng</surname> <given-names>L. X.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Model of water-sediment regulation in Yellow River and its effect</article-title>. <source>Sci. China Tech. Sci.</source> <volume>54</volume>, <fpage>924</fpage>&#x2013;<lpage>930</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11431-011-4322-3</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>S. N.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>G. X.</given-names>
</name>
<name>
<surname>Deng</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Hu</surname> <given-names>Y. M.</given-names>
</name>
<name>
<surname>Hu</surname> <given-names>W. W.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Influence of hydrology process on wetland landscape pattern: a case study in the Yellow River Delta</article-title>. <source>Ecol. Eng.</source> <volume>35</volume>, <fpage>1719</fpage>&#x2013;<lpage>1726</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ecoleng.2009.07.009</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>S. M.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Response of nutrient transports to water&#x2013;sediment regulation events in the Huanghe basin and its impact on the biogeochemistry of the Bohai</article-title>. <source>J. Mar. Syst.</source> <volume>141</volume>, <fpage>59</fpage>&#x2013;<lpage>70</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jmarsys.2014.08.008</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>C. S.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>W. Z.</given-names>
</name>
<name>
<surname>Hu</surname> <given-names>C. H.</given-names>
</name>
<name>
<surname>Xie</surname> <given-names>T. N.</given-names>
</name>
<name>
<surname>Jiang</surname> <given-names>Y. Q.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>R. X.</given-names>
</name>
<etal/>
</person-group>. (<year>2024</year>). <article-title>Research on runoff process vectorization and integration of deep learning algorithms for flood forecasting</article-title>. <source>J. Environ. Manage.</source> <volume>362</volume>, <elocation-id>121260</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jenvman.2024.121260</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>S. M.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>L. W.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>G. L.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>Z. G.</given-names>
</name>
<name>
<surname>Ren</surname> <given-names>J. L.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Impacts of human activities on nutrient transports in the Huanghe (Yellow River) estuary</article-title>. <source>J. Hydrol.</source> <volume>430-431</volume>, <fpage>103</fpage>&#x2013;<lpage>110</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jhydrol.2012.02.005</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Shi</surname> <given-names>C. X.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>Y. Y.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Trends and attribution of runoff changes in the upper and middle reaches of the Yellow River in China</article-title>. <source>J. Hydro-environ. Res.</source> <volume>37</volume>, <fpage>57</fpage>&#x2013;<lpage>66</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jher.2021.05.002</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>C. X.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X. D.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>G. Z.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Q.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Detection of vegetation coverage changes in the Yellow River Basin from 2003 to 2020</article-title>. <source>Ecol. Indic.</source> <volume>138</volume>, <elocation-id>108818</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ecolind.2022.108818</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Parisouj</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Jun</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Bateni</surname> <given-names>S. M.</given-names>
</name>
<name>
<surname>Heggy</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Band</surname> <given-names>S. S.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Machine learning models coupled with empirical mode decomposition for simulating monthly and yearly streamflows: a case study of three watersheds in Ontario, Canada</article-title>. <source>Eng. Appl. Comput. Fluid Mech.</source> <volume>17</volume>, <elocation-id>2242445</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1080/19942060.2023.2242445</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Parsaie</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Ghasemlounia</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Gharehbaghi</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Haghiabi</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Chadee</surname> <given-names>A. A.</given-names>
</name>
<name>
<surname>Nou</surname> <given-names>M. R. G.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Novel hybrid intelligence predictive model based on successive variational mode decomposition algorithm for monthly runoff series</article-title>. <source>J. Hydrol.</source> <volume>634</volume>, <elocation-id>131041</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jhydrol.2024.131041</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Qian</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Pei</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zareipour</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>N.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>A review and discussion of decomposition-based hybrid models for wind energy forecasting applications</article-title>. <source>Appl. Energy</source> <volume>235</volume>, <fpage>939</fpage>&#x2013;<lpage>953</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.apenergy.2018.10.080</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Qiu</surname> <given-names>Z. Q.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Duan</surname> <given-names>M. W.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>P. P.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>K. Y.</given-names>
</name>
<etal/>
</person-group>. (<year>2024</year>). <article-title>Four-decades of sediment transport variations in the Yellow River on the Loess Plateau using Landsat imagery</article-title>. <source>Remote Sens. Environ.</source> <volume>306</volume>, <elocation-id>114147</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.rse.2024.114147</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Quilty</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Adamowski</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Addressing the incorrect usage of wavelet-based hydrological and water resources forecasting models for real-world applications with best practices and a new forecasting framework</article-title>. <source>J. Hydrol.</source> <volume>563</volume>, <fpage>336</fpage>&#x2013;<lpage>353</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jhydrol.2018.05.003</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ramkumar</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Jothiprakash</surname> <given-names>V.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Forecasting influent wastewater quality by chaos coupled machine learning optimized with Bayesian algorithm</article-title>. <source>J. Water Process. Eng.</source> <volume>61</volume>, <elocation-id>105306</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jwpe.2024.105306</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Reichstein</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Camps-Valls</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Stevens</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Jung</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Denzler</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Carvalhais</surname> <given-names>N.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Deep learning and process understanding for data-driven Earth system science</article-title>. <source>Nature</source> <volume>566</volume>, <fpage>195</fpage>&#x2013;<lpage>204</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41586-019-0912-1</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shen</surname> <given-names>C. P.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>A transdisciplinary review of deep learning research and its relevance for water resources scientists</article-title>. <source>Water Resour. Res.</source> <volume>54</volume>, <fpage>8558</fpage>&#x2013;<lpage>8593</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1029/2018WR022643</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shi</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Qu</surname> <given-names>S. M.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>H. S.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Q. F.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>Y. Q.</given-names>
</name>
<etal/>
</person-group>. (<year>2025</year>). <article-title>Assessment of hybrid kernel function in extreme support vector regression model for streamflow time series forecasting based on a bayesian estimator decomposition algorithm</article-title>. <source>Eng. Appl. Artif. Intell.</source> <volume>149</volume>, <elocation-id>110514</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.engappai.2025.110514</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shi</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Ren</surname> <given-names>Z. P.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Gong</surname> <given-names>J. F.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Land-use changes and check dams reducing runoff and sediment yield on the Loess Plateau of China</article-title>. <source>Sci. Total Environ.</source> <volume>664</volume>, <fpage>984</fpage>&#x2013;<lpage>994</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.scitotenv.2019.01.430</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Singh</surname> <given-names>U.</given-names>
</name>
<name>
<surname>Maca</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Hanel</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Markonis</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Nidamanuri</surname> <given-names>R. R.</given-names>
</name>
<name>
<surname>Nasreen</surname> <given-names>S.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). <article-title>Hybrid multi-model ensemble learning for reconstructing gridded runoff of Europe for 500 years</article-title>. <source>Inf. Fusion</source> <volume>97</volume>, <elocation-id>101807</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.inffus.2023.101807</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tran</surname> <given-names>H. D.</given-names>
</name>
<name>
<surname>Muttil</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Perera</surname> <given-names>B. J. C.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Selection of significant input variables for time series forecasting</article-title>. <source>Environ. Modell. Software</source> <volume>64</volume>, <fpage>156</fpage>&#x2013;<lpage>163</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.envsoft.2014.11.018</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Vinayachandran</surname> <given-names>P. N.</given-names>
</name>
<name>
<surname>Jahfer</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Nanjundiah</surname> <given-names>R. S.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Impact of river runoff into the ocean on Indian summer monsoon</article-title>. <source>Environ. Res. Lett.</source> <volume>10</volume>, <elocation-id>54008</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1088/1748-9326/10/5/054008</pub-id>
</citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>X. W.</given-names>
</name>
<name>
<surname>Cheng</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Dynamic changes of cultivated land use and grain production in the lower reaches of the Yellow River based on GlobeLand30</article-title>. <source>Front. Environ. Sci.</source> <volume>10</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fenvs.2022.974812</pub-id>
</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>X. G.</given-names>
</name>
<name>
<surname>Tang</surname> <given-names>X. H.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>X. X.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>J. L.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>D. Z.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). <article-title>The seas around China in a warming climate</article-title>. <source>Nat. Rev. Earth Environ.</source> <volume>4</volume>, <fpage>535</fpage>&#x2013;<lpage>551</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s43017-023-00453-6</pub-id>
</citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>J. Y.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>R. Y.</given-names>
</name>
<name>
<surname>Mu</surname> <given-names>X. P.</given-names>
</name>
<name>
<surname>Baiyinbaoligao</surname>
</name>
<name>
<surname>Wei</surname> <given-names>J. H.</given-names>
</name>
<etal/>
</person-group>. (<year>2025</year>). <article-title>A runoff prediction approach based on machine learning, ensemble forecasting and error correction: a case study of source area of yellow river</article-title>. <source>J. Hydrol.</source> <volume>658</volume>, <elocation-id>133190</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jhydrol.2025.133190</pub-id>
</citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Peng</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Liang</surname> <given-names>S. K.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Prediction of estuarine water quality using interpretable machine learning approach</article-title>. <source>J. Hydrol.</source> <volume>605</volume>, <elocation-id>127320</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jhydrol.2021.127320</pub-id>
</citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>J. J.</given-names>
</name>
<name>
<surname>Shi</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Yuan</surname> <given-names>Q. Y.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>E. J.</given-names>
</name>
<name>
<surname>Bai</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>S. P.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Hydro-geomorphological regime of the lower yellow river and delta in response to the water&#x2013;sediment regulation scheme: process, mechanism and implication</article-title>. <source>Catena</source> <volume>219</volume>, <elocation-id>106646</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.catena.2022.106646</pub-id>
</citation>
</ref>
<ref id="B48">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>F. B.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Variability of annual sediment load and runoff in the Yellow River for the last 100 years, (1919&#x2013;2018)</article-title>. <source>Sci. Total Environ.</source> <volume>758</volume>, <elocation-id>143715</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.scitotenv.2020.143715</pub-id>
</citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>Y. M.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>On practical challenges of decomposition-based hybrid forecasting algorithms for wind speed and solar irradiation</article-title>. <source>Energy</source> <volume>112</volume>, <fpage>208</fpage>&#x2013;<lpage>220</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.energy.2016.06.075</pub-id>
</citation>
</ref>
<ref id="B50">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>W. S.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>H. B.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>S. Z.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Z. M.</given-names>
</name>
<name>
<surname>Liang</surname> <given-names>Q. H.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>S. D.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Trivariate copula functions for constructing a comprehensive atmosphere-land surface-hydrology drought index: a case study in the Yellow River basin</article-title>. <source>J. Hydrol.</source> <volume>642</volume>, <elocation-id>131784</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jhydrol.2024.131784</pub-id>
</citation>
</ref>
<ref id="B51">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wei</surname> <given-names>Q. S.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>B. D.</given-names>
</name>
<name>
<surname>Yao</surname> <given-names>Q. Z.</given-names>
</name>
<name>
<surname>Xue</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>J. C.</given-names>
</name>
<name>
<surname>Xin</surname> <given-names>M.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>Spatiotemporal variations in the summer hypoxia in the Bohai Sea (China) and controlling mechanisms</article-title>. <source>Mar. Pollut. Bull.</source> <volume>138</volume>, <fpage>125</fpage>&#x2013;<lpage>134</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.marpolbul.2018.11.041</pub-id>
</citation>
</ref>
<ref id="B52">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Kan</surname> <given-names>J. J.</given-names>
</name>
<name>
<surname>Narale</surname> <given-names>D. D.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Dynamics of bacterial communities during a seasonal hypoxia at the Bohai Sea: Coupling and response between abundant and rare populations</article-title>. <source>J. Environ. Sci.</source> <volume>111</volume>, <fpage>324</fpage>&#x2013;<lpage>339</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jes.2021.04.013</pub-id>
</citation>
</ref>
<ref id="B53">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname> <given-names>J. H.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Z. C.</given-names>
</name>
<name>
<surname>Dong</surname> <given-names>J. H.</given-names>
</name>
<name>
<surname>Cui</surname> <given-names>X. F.</given-names>
</name>
<name>
<surname>Tao</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Robust runoff prediction with explainable artificial intelligence and meteorological variables from deep learning ensemble model</article-title>. <source>Water Resour. Res.</source> <volume>59</volume>, <elocation-id>e2023WR035676</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1029/2023WR035676</pub-id>
</citation>
</ref>
<ref id="B54">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Yue</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Borthwick</surname> <given-names>A. G. L.</given-names>
</name>
<name>
<surname>Slater</surname> <given-names>L. J.</given-names>
</name>
<name>
<surname>Syvitski</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Bi</surname> <given-names>N.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). <article-title>Mega-reservoir regulation: a comparative study on downstream responses of the Yangtze and Yellow rivers</article-title>. <source>Earth-Sci. Rev.</source> <volume>245</volume>, <elocation-id>104567</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.earscirev.2023.104567</pub-id>
</citation>
</ref>
<ref id="B55">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xie</surname> <given-names>Y. T.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Ren</surname> <given-names>M. M.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>Z. X.</given-names>
</name>
<name>
<surname>Pan</surname> <given-names>X. Y.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Stacking ensemble learning models for daily runoff prediction using 1D and 2D CNNs</article-title>. <source>Expert Syst. Appl.</source> <volume>217</volume>, <elocation-id>119469</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.eswa.2022.119469</pub-id>
</citation>
</ref>
<ref id="B56">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xin</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>X. Y.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Coupling driving factors of eco-environmental protection and high-quality development in the yellow river basin</article-title>. <source>Front. Environ. Sci.</source> <volume>10</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fenvs.2022.951218</pub-id>
</citation>
</ref>
<ref id="B57">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname> <given-names>D. M.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>W. C.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>An ensemble model for monthly runoff prediction using least squares support vector machine based on variational modal decomposition with dung beetle optimization algorithm and error correction strategy</article-title>. <source>J. Hydrol.</source> <volume>629</volume>, <elocation-id>130558</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jhydrol.2023.130558</pub-id>
</citation>
</ref>
<ref id="B58">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname> <given-names>B. C.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>D. S.</given-names>
</name>
<name>
<surname>Burnett</surname> <given-names>W. C.</given-names>
</name>
<name>
<surname>Ran</surname> <given-names>X. B.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>Z. G.</given-names>
</name>
<name>
<surname>Gao</surname> <given-names>M. S.</given-names>
</name>
<etal/>
</person-group>. (<year>2016</year>). <article-title>Artificial water sediment regulation scheme influences morphology, hydrodynamics and nutrient behavior in the Yellow River estuary</article-title>. <source>J. Hydrol.</source> <volume>539</volume>, <fpage>102</fpage>&#x2013;<lpage>112</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jhydrol.2016.05.024</pub-id>
</citation>
</ref>
<ref id="B59">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname> <given-names>X. Y.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>J. Y.</given-names>
</name>
<name>
<surname>Jiang</surname> <given-names>X. C.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Research on information leakage in time series prediction based on empirical mode decomposition</article-title>. <source>Sci. Rep.</source> <volume>14</volume>, <page-range>28363</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-024-80018-9</pub-id>
</citation>
</ref>
<ref id="B60">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname> <given-names>F. X.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>Z. G.</given-names>
</name>
<name>
<surname>Bouwman</surname> <given-names>A. F.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>H. T.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>M. F.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>J.</given-names>
</name>
<etal/>
</person-group>. (<year>2024</year>). <article-title>Significant impacts of artificial regulation on nutrient concentrations and transport in Huanghe River</article-title>. <source>J. Oceanol. Limnol</source>. <volume>42</volume>, <page-range>1865&#x2013;1879</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s00343-024-3234-6</pub-id>
</citation>
</ref>
<ref id="B61">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yu</surname> <given-names>D. X.</given-names>
</name>
<name>
<surname>Han</surname> <given-names>G. X.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>X. J.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>B. H.</given-names>
</name>
<name>
<surname>Eller</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>J. Y.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>The impact of runoff flux and reclamation on the spatiotemporal evolution of the Yellow River estuarine wetlands</article-title>. <source>Ocean Coastal Manage.</source> <volume>212</volume>, <elocation-id>105804</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ocecoaman.2021.105804</pub-id>
</citation>
</ref>
<ref id="B62">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>J. J.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Lv</surname> <given-names>Q. M.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Y. B.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>J. B.</given-names>
</name>
<name>
<surname>Gao</surname> <given-names>Y. J.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>Impact of the Water&#x2013;Sediment Regulation Scheme on the phytoplankton community in the Yellow River estuary</article-title>. <source>J. Clean. Prod.</source> <volume>294</volume>, <elocation-id>126291</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jclepro.2021.126291</pub-id>
</citation>
</ref>
<ref id="B63">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>Y. F.</given-names>
</name>
<name>
<surname>Thorburn</surname> <given-names>P. J.</given-names>
</name>
<name>
<surname>Xiang</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Fitch</surname> <given-names>P.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>SSIM&#x2014;a deep learning approach for recovering missing time series sensor data</article-title>. <source>IEEE Internet Things J.</source> <volume>6</volume>, <fpage>6618</fpage>&#x2013;<lpage>6628</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/JIOT.2019.2909038</pub-id>
</citation>
</ref>
<ref id="B64">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhi</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Feng</surname> <given-names>D. P.</given-names>
</name>
<name>
<surname>Tsai</surname> <given-names>W. P.</given-names>
</name>
<name>
<surname>Sterle</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Harpold</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Shen</surname> <given-names>C. P.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>From hydrometeorology to river water quality: Can a deep learning model predict dissolved oxygen at the continental scale</article-title>? <source>Environ. Sci. Technol.</source> <volume>55</volume>, <fpage>2357</fpage>&#x2013;<lpage>2368</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1021/acs.est.0c06783</pub-id>
</citation>
</ref>
<ref id="B65">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zuo</surname> <given-names>G. G.</given-names>
</name>
<name>
<surname>Luo</surname> <given-names>J. G.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Lian</surname> <given-names>Y. N.</given-names>
</name>
<name>
<surname>He</surname> <given-names>X. X.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Decomposition ensemble model based on variational mode decomposition and long short-term memory for streamflow forecasting</article-title>. <source>J. Hydrol.</source> <volume>585</volume>, <elocation-id>124776</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jhydrol.2020.124776</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>