<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Water</journal-id>
<journal-title-group>
<journal-title>Frontiers in Water</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Water</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2624-9375</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/frwa.2026.1767400</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Application and comparison of multiple machine learning models in flood susceptibility assessment in the Beijing-Tianjin-Hebei region of China</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Li</surname>
<given-names>Haijun</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Dong</surname>
<given-names>Jiubo</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3307805"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhou</surname>
<given-names>Shuiqing</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhang</surname>
<given-names>Yaowen</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Liu</surname>
<given-names>Hongtao</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3239860"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Pang</surname>
<given-names>Yixin</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Du</surname>
<given-names>Bingbin</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>Institute of Disaster Prevention</institution>, <city>Beijing</city>, <country country="cn">China</country></aff>
<aff id="aff2"><label>2</label><institution>Hebei Key Laboratory of Resource and Environmental Disaster Mechanism and Risk Monitoring</institution>, <city>Sanhe</city>, <country country="cn">China</country></aff>
<author-notes>
<corresp id="c001"><label>&#x002A;</label>Correspondence: Jiubo Dong, <email xlink:href="mailto:dong15922164740@163.com">dong15922164740@163.com</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-27">
<day>27</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>8</volume>
<elocation-id>1767400</elocation-id>
<history>
<date date-type="received">
<day>14</day>
<month>12</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>18</day>
<month>01</month>
<year>2026</year>
</date>
<date date-type="accepted">
<day>19</day>
<month>01</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2026 Li, Dong, Zhou, Zhang, Liu, Pang and Du.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Li, Dong, Zhou, Zhang, Liu, Pang and Du</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-27">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<p>The confluence of extreme precipitation and rapid urbanization has led to a marked increase in flood risk across the Beijing-Tianjin-Hebei (BTH)region. To this end, conducting a thorough flood susceptibility assessment is of paramount importance to safeguard the region and ensure its sustainable development. Based on historical flood disaster records, 15 flood related influencing factors such as elevation, average annual rainfall, and the Normalized Difference Vegetation Index (NDVI) were selected as the initial variable set. A flood disaster susceptibility evaluation framework was established through multicollinearity analysis and feature selection based on the Information Gain Ratio (IGR). Support Vector Machine (SVM), Random Forest (RF), Extreme Gradient Boosting (XGBoost),and Multilayer Perceptron (MLP) models were employed to conduct the susceptibility assessment. The predictive performance and susceptibility zoning outcomes of the models were systematically compared using the Area Under the Receiver Operating Characteristic Curve(AUC) and a set of statistical evaluation metrics, including accuracy, Kappa coefficient, and sensitivity. Research findings demonstrate that (1) elevation, distance from rivers, average 24-h maximum rainfall, and slope constitute the primary controlling factors for flood occurrence in the BTH region;(2) Very high and High susceptibility zones are primarily concentrated in topographic transition zones, critical nodes of the river system, and key flood storage and detention areas. The high and relatively high susceptibility zones identified by the four models show a strong spatial consistency with the actual distribution of flood disasters, and exhibit minimal overfitting. (3) The AUC validation results of the four models are as follows: XGBoost (0.938)&#x202F;&#x003E;&#x202F;RF (0.920)&#x202F;&#x003E;&#x202F;MLP (0.867)&#x202F;&#x003E;&#x202F;SVM (0.854). Among these models, XGBoost produced the smallest proportion of high-susceptibility zones, demonstrating a superior ability to accurately identify areas with the highest potential flood risk. This study provides a scientific foundation for flood risk management in the BTH region and holds significant practical value for improving regional flood control strategies and spatial planning.</p>
</abstract>
<kwd-group>
<kwd>Beijing-Tianjin-Hebei region</kwd>
<kwd>flood disaster</kwd>
<kwd>machine learning algorithms</kwd>
<kwd>risk prevention and control</kwd>
<kwd>susceptibility evaluation</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was received for this work and/or its publication. This work was supported in part by the Langfang Science and Technology Research and Development Plan Self-Funded Project of China (2025013012), Science Research Project of Hebei Education Department (QN2024093), Science and Technology Innovation Program for Postgraduate Students in IDP subsidized by the Fundamental Research Funds for the Central Universities (ZY20250337), Fundamental Research Funds for the Central Universities (ZY20215144), and National Natural Science Foundation of China (Grant Nos. 72574017 and 72174019).</funding-statement>
</funding-group>
<counts>
<fig-count count="8"/>
<table-count count="2"/>
<equation-count count="18"/>
<ref-count count="58"/>
<page-count count="18"/>
<word-count count="12426"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Water and Artificial Intelligence</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="sec1">
<label>1</label>
<title>Introduction</title>
<p>Floods are one of the most frequent and devastating natural disasters globally. According to data from the International Disaster Database (EM-DAT), between 1900 and 2016, floods have resulted in approximately 7&#x202F;million fatalities worldwide and caused economic losses exceeding 700&#x202F;billion US dollars (<xref ref-type="bibr" rid="ref40">Wang et al., 2020a</xref>). Furthermore, since the year 2000, flood-related events have constituted 44% of all reported natural disasters globally (<xref ref-type="bibr" rid="ref47">Yu et al., 2022</xref>). Meanwhile, China is among the countries most severely affected by flooding, with such disasters posing a significant threat to socioeconomic development. Direct economic losses attributable to floods typically account for approximately 62% of the nation&#x2019;s total annual losses from natural disasters, while flood-related fatalities represent over 50% of the total disaster mortality (<xref ref-type="bibr" rid="ref10">Chen et al., 2020</xref>). The Beijing-Tianjin-Hebei region serves as a major political, cultural, and economic hub of China. In 2024, its population density and economic output per unit area reached 272.19 and 275.38% of the national average, respectively (<xref ref-type="bibr" rid="ref50">Zhang et al., 2022</xref>), with the urbanization process continuing to accelerate. The BTH region is characterized by complex topography and a well-developed hydrological network. In recent years, the increasing frequency and intensity of extreme precipitation events have exacerbated the occurrence of flooding, resulting in a progressively severe flood risk that poses a significant threat to human life and property (<xref ref-type="bibr" rid="ref41">Wang et al., 2020b</xref>). In 2023, the region experienced the &#x201C;Haihe 23&#x00B7;7&#x201D;basin wide extreme flood event, which was accompanied by an average areal precipitation of 155.3 millimeters and a total precipitation volume of 494&#x202F;billion cubic meters. This event resulted in direct economic losses amounting to nearly 130&#x202F;billion yuan in the BTH region, necessitated the emergency evacuation of millions of residents, and caused severe damage to critical infrastructure and ecosystems (<xref ref-type="bibr" rid="ref26">Li et al., 2023</xref>). Therefore, given the regional context of flood hazards, conducting a scientific assessment of flood susceptibility is critically important for developing targeted engineering and managerial mitigation strategies and enhancing overall resilience to natural disasters.</p>
<p>Flood susceptibility denotes the spatial probability of a region being affected by flooding, assessed based on a comprehensive evaluation of specific hazard-inducing and hazard-conducive conditions, while incorporating historical disaster records. It involves the systematic identification and construction of key evaluation indicators and the application of appropriate analytical methods. The core objective is to address the critical question of &#x201C;where flooding is most likely to occur&#x201D; (<xref ref-type="bibr" rid="ref4">Antangelo et al., 2011</xref>). Flood susceptibility assessment integrates semi-qualitative and quantitative evaluation methods to synthesize indicator data, typically representing the potential likelihood of flooding across a region. This assessment provides a scientific foundation for regional flood control planning, the optimal allocation of emergency resources, and territorial spatial planning, serving as a critical support for systematically enhancing regional flood disaster resilience. Currently, the primary approaches to flood disaster risk assessment include statistical analysis of historical disaster records, the index system method, scenario simulation, machine learning techniques, and methods integrating Remote Sensing (RS) technology with Geographic Information Systems (GIS) (<xref ref-type="bibr" rid="ref27">Lin et al., 2023</xref>). Among them, the historical disaster mathematical statistics method summarizes the patterns by analyzing past disaster data, but it relies too much on complete historical records, and its accuracy is limited when data is missing or the characteristics of disasters change (<xref ref-type="bibr" rid="ref7">Benito et al., 2005</xref>; <xref ref-type="bibr" rid="ref49">Zhang et al., 2024</xref>; <xref ref-type="bibr" rid="ref46">Xu, 2017</xref>). The index system method establishes an evaluation framework by selecting relevant influencing factors; however, it is characterized by high subjectivity in both indicator selection and weight assignment, and has limited capacity to capture the complex nonlinear interactions among multiple factors (<xref ref-type="bibr" rid="ref39">Waghwala and Agnihotri, 2019</xref>; <xref ref-type="bibr" rid="ref21">Huang et al., 2019</xref>). Although the scenario simulation method is capable of simulating flood processes under specific conditions, it is associated with high computational complexity and challenges in acquiring key parameters (<xref ref-type="bibr" rid="ref45">Wang et al., 2015</xref>; <xref ref-type="bibr" rid="ref43">Wang and Zhang, 2022</xref>). Flood assessment methods based on RS and GIS demonstrate distinct advantages in spatial analysis. Among these, RS technology has emerged as a critical underpinning for validating disaster assessment outcomes, owing to its capability for large-scale, rapid acquisition of real-time surface information (<xref ref-type="bibr" rid="ref16">Farhadi and Najafzadeh, 2021</xref>). Its spatially explicit data can intuitively reflect the actual impact scope of floods, providing an objective basis for verifying the rationality of model results (<xref ref-type="bibr" rid="ref6">Basirian et al., 2026</xref>). However, this technology still faces limitations in temporal prediction and multi-source data fusion (<xref ref-type="bibr" rid="ref22">Jiao et al., 2024</xref>). To this end, scholars have developed an evaluation framework integrating remote sensing technology and Multi-Criteria Decision-Making (MCDM) methods. By utilizing multi-temporal remote sensing data to dynamically monitor inundation extent, water body indices, and surface changes, and integrating multi-source indicators such as hydrology, topography, and socio-economics through models like the Analytic Hierarchy Process(AHP),real-time monitoring of flood processes and comprehensive risk assessment have been achieved (<xref ref-type="bibr" rid="ref15">Farhadi et al., 2021</xref>; <xref ref-type="bibr" rid="ref13">Desta et al., 2025</xref>). However, this method relies on expert experience in weight determination, leading to strong subjectivity, and it is difficult to characterize the complex non-linear relationships between various factors. To overcome the aforementioned limitations, intelligent risk mapping methods based on the combination of remote sensing and machine learning have gradually emerged. This method extracts multi-dimensional features through remote sensing and automatically identifies the complex non-linear correlations between disaster-causing factors and floods with the help of machine learning algorithms. It not only improves the spatial accuracy and timeliness of risk mapping but also promotes the paradigm shift of flood assessment from post-disaster analysis to pre-disaster prediction (<xref ref-type="bibr" rid="ref29">Mojaddadi et al., 2017</xref>). <xref ref-type="bibr" rid="ref16">Farhadi and Najafzadeh (2021)</xref>, in their study of the Galikesh River Basin in Iran, extracted multiple types of flood risk factors based on remote sensing data and GIS technology, and generated a high-precision flood risk map using a Random Forest (RF) model, thereby demonstrating the feasibility and superiority of machine learning approaches.</p>
<p>Against this backdrop, machine learning methods, as data-driven assessment approaches, exhibit remarkable advantages. They can automatically mine latent patterns from multi-source heterogeneous data, reduce reliance on complete historical records, and effectively capture the complex interactive relationships among disaster-causing factors through their robust nonlinear modeling capabilities. These methods overcome the subjectivity inherent in traditional weight assignment approaches, thereby enabling objective, efficient, and accurate assessment of flood susceptibility. Despite challenges such as overfitting and complex parameter tuning, their superior performance has still attracted considerable attention in flood risk assessment, with applications evolving gradually from early exploration to in-depth practice. Early studies mainly focused on traditional machine learning models, with Artificial Neural Networks (ANN), Multilayer Perceptron (MLP), Support Vector Machine (SVM) and K-nearest Neighbor (KNN) being commonly used. Among them, relying on its advantage in non-linear fitting, ANN achieves higher accuracy than traditional statistical models in flood susceptibility assessment of small watersheds, but it is prone to overfitting, requires a large amount of data, and has poor interpretability (<xref ref-type="bibr" rid="ref32">Rahman et al., 2019</xref>; <xref ref-type="bibr" rid="ref14">Elsafi, 2014</xref>). MLP and SVM are more mature and representative models developed on this basis, thus being selected as the representatives of traditional models in this study. MLP is suitable for multi-source heterogeneous data fusion tasks, and its performance is often superior to that of a single traditional model, but it is sensitive to data noise and the optimization process is prone to falling into local optima (<xref ref-type="bibr" rid="ref3">Andaryani et al., 2021</xref>; <xref ref-type="bibr" rid="ref38">Tsumita et al., 2025</xref>); SVM can effectively suppress overfitting in small-sample and high-dimensional scenarios with high prediction accuracy, however, the selection of its kernel function is subjective, and its efficiency is low when processing large-scale data (<xref ref-type="bibr" rid="ref36">Tehrany et al., 2015a</xref>,<xref ref-type="bibr" rid="ref37">b</xref>). Due to its sensitivity to data scale and outliers, as well as difficulty in handling complex interactive relationships among multiple factors (<xref ref-type="bibr" rid="ref12">Demissie et al., 2024</xref>; <xref ref-type="bibr" rid="ref33">Shahabi et al., 2020</xref>), KNN has limited applicability and is not included in the comparison system of this study. The current research frontier has shifted to ensemble learning methods, RF and Extreme Gradient Boosting (XGBoost), as mainstream representatives, are included in this study. RF has good robustness, anti-overfitting ability, and high tolerance for missing data, but it is insufficient in depicting fine spatial features (<xref ref-type="bibr" rid="ref54">Zhao et al., 2024</xref>; <xref ref-type="bibr" rid="ref5">Avand et al., 2019</xref>); XGBoost performs excellently in identifying low-probability events in imbalanced data, but it is relatively sensitive to hyperparameters and has lower efficiency than RF in processing large-scale data (<xref ref-type="bibr" rid="ref57">Zhu et al., 2024</xref>; <xref ref-type="bibr" rid="ref28">Madhuri et al., 2021</xref>). Although deep learning models have potential in spatiotemporal data modeling, they require a large amount of high-quality training data and have weak interpretability (<xref ref-type="bibr" rid="ref42">Wang et al., 2020</xref>; <xref ref-type="bibr" rid="ref25">Li et al., 2021</xref>). In addition, there is a lack of time-series data in the study area, so they are not included in this comparison. Although various machine learning models have been widely verified in flood susceptibility assessment, most existing studies focus on the performance demonstration of a single model, lacking systematic comparison of models with different principles under the same benchmark (<xref ref-type="bibr" rid="ref9">Cao et al., 2025</xref>). To this end, this study selects four representative models, namely SVM, RF, XGBoost, and MLP. The selection criteria cover mainstream categories of both traditional machine learning and ensemble learning, entail mature applicability aligned with the data characteristics of the study area, and involve models with distinct core mechanisms to facilitate the analysis of feature impacts on assessment performance. Through systematic comparison under a unified data basis and evaluation framework, this study aims to clarify the performance differences and applicable conditions of each model, make up for the deficiencies of existing studies, and provide a scientific basis for model selection in flood susceptibility modeling.</p>
<p>In summary, this study systematically analyzes the hazard inducing and vulnerability-contributing factors of flood disasters in the BTH region. Building upon historical disaster records and integrating multicollinearity diagnostics with information gain ratio analysis, a robust and comprehensive evaluation index system is established. Subsequently, by conducting a systematic comparison of machine learning models including SVM, RF, XGBoost, and MLP the most suitable model for the region is identified, and a flood susceptibility map is generated. Furthermore, the rationality of the susceptibility probability map produced by the model was verified using the remote sensing inundation map of the &#x201C;Haihe River &#x2018;23&#x00B7;7&#x2019; Extreme Basin Wide Flood Disaster&#x201D;. The core innovation of this study lies in the empirical performance comparison across multiple models to determine the optimal modeling approach, thereby enhancing the accuracy and reliability of the zonation results. These findings provide a scientifically sound foundation for fine-grained flood risk management and the efficient allocation of disaster prevention resources within the BTH urban agglomeration.</p>
</sec>
<sec id="sec2">
<label>2</label>
<title>Study area and data</title>
<sec id="sec3">
<label>2.1</label>
<title>Study area</title>
<p><xref ref-type="fig" rid="fig1">Figure 1</xref> provides a geographical overview of the study area. The BTH region comprises Beijing Municipality, Tianjin Municipality, and Hebei Province, covering a total area of 218,000&#x202F;km<sup>2</sup>. Situated between 113&#x00B0;27&#x2032;E and 119&#x00B0;50&#x2032;E and spanning from 36&#x00B0;05&#x2032;N to 42&#x00B0;40&#x2032;N, the study area features a complex and diverse topography, including mountains, hills, basins, and plains. Bordered by the Taihang Mountains to the west and opening onto the Bohai Sea to the east, it drains into the sea, which functions as the ultimate hydrological sink for regional runoff (<xref ref-type="bibr" rid="ref19">Hou et al., 2024</xref>). Elevation in the region exhibits a distinct decreasing trend from northwest to southeast. The northern and western parts of Hebei are dominated by mountainous and hilly terrain with pronounced relief, where average elevations exceed 1,000 meters. In contrast, the southeastern Hebei region and the municipalities of Beijing and Tianjin constitute the core of the Haihe Plain characterized by flat topography, an average elevation below 50 meters, and intensive agricultural activity and population concentration. The region is characterized by a well developed drainage system. The Luanhe and Haihe River systems form a fan shaped river network oriented from northwest to southeast, encompassing major tributaries such as the Yongding, Daqing, Chaobai, and Beiyun Rivers. Scattered throughout the plain are numerous depressions, lakes, and wetland complexes, contributing to a highly heterogeneous surface hydrological regime (<xref ref-type="bibr" rid="ref51">Zhang and Wang, 2024</xref>). The region experiences a warm temperate semi-humid monsoon climate, with precipitation markedly uneven in both space and time. The majority of rainfall occurs during the flood season (June&#x2013;September), accounting for 70&#x2013;80% of the annual total, predominantly in the form of intense rainstorms. In recent years, the frequency of extreme heavy rainfall events has increased significantly (<xref ref-type="bibr" rid="ref48">Yuan et al., 2018</xref>).</p>
<fig position="float" id="fig1">
<label>Figure 1</label>
<caption>
<p>The map outlines the BTH region&#x2019;s administrative boundaries and core geographical features, including major cities, adjacent provinces, transportation networks, key water bodies (rivers, reservoirs, flood storage areas), northwest&#x2013;southeast terrain transition (mountains to plains), eastern Bohai Sea boundary, and isohyets for annual precipitation distribution.</p>
</caption>
<graphic xlink:href="frwa-08-1767400-g001.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Map of a region in China, highlighting Beijing and its surroundings with provincial boundaries in purple. Features include cities, rivers, roads, and water bodies. An inset map shows the location within China. The legend explains symbols for capitals, rivers, coastlines, metropolitan areas, and terrain types like mountainous areas and plains. Scale and directional indicators are included.</alt-text>
</graphic>
</fig>
<p>The BTH region, characterized by high population density and rapid economic development, faces significantly heightened flood risks due to its intensive urbanization. As of 2023, the region&#x2019;s permanent resident population reached approximately 109&#x202F;million, with the urbanization rate increasing to 79% over the past decade and GDP nearly doubling during the same period (<xref ref-type="bibr" rid="ref56">Zhou and Zhao, 2025</xref>). This dense development pattern has rendered the region particularly vulnerable to historical extreme rainfall events, including those labeled &#x201C;63.8,&#x201D; &#x201C;96.8,&#x201D; &#x201C;21.7,&#x201D; and &#x201C;23.7.&#x201D; The &#x201C;23.7&#x201D; flood event alone resulted in economic losses exceeding 160&#x202F;billion yuan and affected more than 5.5&#x202F;million people, underscoring the pressing need for effective flood control and disaster mitigation strategies.</p>
</sec>
<sec id="sec4">
<label>2.2</label>
<title>Flood inventory map</title>
<p>To construct a training dataset for flood-susceptibility modeling, multi-source archival flood records were collated to generate sample locations. Systematic extraction of geographic coordinates was conducted from flood events documented on municipal government portals of Hebei Province, the municipalities of Tianjin and Beijing, and supplemented by historical annals of the Haihe River Basin, yielding 2000 geo-referenced flood points. Positional accuracy was verified by GPS field surveys and cross-validation against water-level gaging stations, with residual mislocations eliminated through high-resolution remote-sensing interpretation. To mitigate class imbalance inherent in machine learning algorithms, non-flood points were delimited by GIS-based cluster analysis and then randomly sampled in equal number from buffer zones peripheral to flood pixels, producing an initial pool of 4,000 samples. Finally, a stratified random split (70% training, 30% validation) was applied to ensure proportional representation of both classes.</p>
</sec>
<sec id="sec5">
<label>2.3</label>
<title>Data sources</title>
<p>Flood disasters are triggered by the synergistic interaction of numerous factors, which can be broadly classified into four main categories: topographic and geomorphological features, meteorological and hydrological conditions, underlying surface properties, and human activities (<xref ref-type="bibr" rid="ref20">Hou et al., 2018</xref>; <xref ref-type="bibr" rid="ref53">Zhang, 2016</xref>). As fundamental expressions of surface configuration, topography and geomorphology capture regional-scale terrain heterogeneity. These attributes critically govern the velocity and persistence of rainfall derived runoff, thereby modulating flood susceptibility. Meteorological and hydrological variables act as direct triggers of flood episodes, where precipitation produces surface runoff that converges into river networks, culminating in flood events. The underlying surface is mainly defined by vegetation coverage and soil characteristics, whereas land use configurations offer a clear reflection of anthropogenic impact.</p>
<p>Building upon the distinct environmental setting of the BTH region, historical flood inventories, and flood formation mechanisms, and supported by existing literature (<xref ref-type="bibr" rid="ref30">Pham et al., 2021</xref>; <xref ref-type="bibr" rid="ref31">Prasad et al., 2022</xref>), this research selects 15 representative flood-conditioning factors to construct a preliminary factor inventory. Multi-source datasets were utilized in this research. From the 90-meter resolution SRTM Digital Elevation Model (DEM) obtained from the Geospatial Data Cloud,<xref ref-type="fn" rid="fn0001"><sup>1</sup></xref> the following factors were extracted: elevation, slope, aspect, Plan Curvature (PlaC), Profile Curvature (ProC), Stream Power Index (SPI), Topographic Wetness Index (TWI), and Terrain Ruggedness Index (TRI). Lithology data were derived from the 1:2,500,000 scale vectorized geological map provided by the Resource and Environmental Science Data Platform.<xref ref-type="fn" rid="fn0002"><sup>2</sup></xref> Land use data were sourced from the Global Land Cover Database,<xref ref-type="fn" rid="fn0003"><sup>3</sup></xref> specifically the GlobeLand30 global land cover product with a spatial resolution of 30 meters. The Normalized Difference Vegetation Index (NDVI) data employed in this study were derived from the MOD13Q1 product&#x2014;a standard vegetation index dataset released via NASA&#x2019;s Earthdata platform,<xref ref-type="fn" rid="fn0004"><sup>4</sup></xref> which is generated by the Moderate Resolution Imaging Spectroradiometer (MODIS) sensor onboard the Terra/Aqua satellites. This product is characterized by a spatial resolution of 250&#x202F;m, a temporal resolution of 16&#x202F;days, and a data time span ranging from 2000 to 2022. The raw MOD13Q1 data were preprocessed through a series of standard procedures, including image mosaicking, reprojection (unified to the WGS84 coordinate system), and spatial clipping to match the boundary of the BTH region. Subsequently, the Maximum Value Composite (MVC) method was applied to the preprocessed multi-temporal images to eliminate noise interference from clouds, atmospheric scattering, and solar altitude angle variations, ultimately generating the NDVI dataset of the BTH region covering the period 2001&#x2013;2022. The multi-year average value was calculated to characterize the typical spatial pattern of vegetation cover in the study area. In terms of hydrological characteristics, based on the hydrological dataset provided by the National Geographic Information Resource Directory Service System,<xref ref-type="fn" rid="fn0005"><sup>5</sup></xref> the Distance From Rivers (DFR) was calculated using the multi-ring buffer analysis method, and the River Density (RD) was extracted by means of hydrological analysis. Precipitation data, including the Annual Average Rainfall (AAR) and the 24-h Average Maximum Rainfall (AMR24), were sourced from the National Earth System Science Data Center,<xref ref-type="fn" rid="fn0006"><sup>6</sup></xref> which were calculated based on a 20-year precipitation sequence. The spatial distribution patterns of these factors are illustrated in <xref ref-type="fig" rid="fig2">Figure 2</xref>.</p>
<fig position="float" id="fig2">
<label>Figure 2</label>
<caption>
<p>Spatial distribution of 15 flood conditioning factors across the BTH region: <bold>(A)</bold> Elevation, <bold>(B)</bold> Slope, <bold>(C)</bold> Aspect, <bold>(D)</bold> PlaC, <bold>(E)</bold> ProC, <bold>(F)</bold> TRI, <bold>(G)</bold> SPI, <bold>(H)</bold> TWI, <bold>(I)</bold> Lithology, <bold>(J)</bold> RD, <bold>(K)</bold> DFR, <bold>(L)</bold> ANN, <bold>(M)</bold> AMR24, <bold>(N)</bold> LU, and <bold>(O)</bold> NDVI. Each map uses color gradients to represent data variations and includes flood disaster points marked with symbols, with a consistent layout showing geographical orientation.</p>
</caption>
<graphic xlink:href="frwa-08-1767400-g002.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Multiple thematic maps of a region display factors related to flooding: elevation, slope, aspect, plan and profile curvature, terrain indices, lithology, river density, distance from rivers, rainfall patterns, land use, and vegetation index. Each map uses color gradients to represent data variations and includes flood disaster points marked with symbols. Maps are labeled A to O, with a consistent layout showing geographical orientation.</alt-text>
</graphic>
</fig>
<p>To verify the rationality of the flood susceptibility probability zoning map generated by the machine learning model, the Sentinel-1A Synthetic Aperture Radar (SAR) imagery data downloaded from the Copernicus Data Centre of the European Space Agency<xref ref-type="fn" rid="fn0007"><sup>7</sup></xref> were adopted in this study. The flood inundation extent of the study area was extracted based on the polarimetric water index, which was further used to evaluate the zoning results. The imagery were acquired in the Interferometric Wide-swath (IW) mode with the product type of IW_GRDH_1S, and all images featured a consistent ascending orbit direction as well as VV&#x0026;VH dual-polarization configuration. The image acquisition period ranged from July 1 to September 5, 2023. The detailed image names, orbit numbers and acquisition times are provided in the <xref rid="SM1" ref-type="supplementary-material">Appendix Table 2</xref>.</p>
</sec>
<sec id="sec6">
<label>2.4</label>
<title>Flood conditioning factors</title>
<p>As continuous predictors offer no direct insight into the relative contribution of individual factors to flood occurrence or into the spatial association between specific value ranges and flood likelihood, they were discretised into ordinal classes to enhance model interpretability and ensure the plausibility of the resultant assessment. Frequency Ratio (FR) converts continuous variables into categorical variables through discretization processing, which can effectively reveal the spatial correlation between each factor category and flood disasters (<xref ref-type="bibr" rid="ref8">Bonham-Carter, 1994</xref>). The <inline-formula>
<mml:math id="M1">
<mml:mi mathvariant="italic">FR</mml:mi>
</mml:math>
</inline-formula> value is defined as the ratio of the probability of flood occurrence to the probability of non-occurrence within the study area. When <inline-formula>
<mml:math id="M2">
<mml:mi mathvariant="italic">FR</mml:mi>
</mml:math>
</inline-formula> &#x003E; 1, it indicates a positive correlation between the classified interval and flood occurrence. A higher <inline-formula>
<mml:math id="M3">
<mml:mi mathvariant="italic">FR</mml:mi>
</mml:math>
</inline-formula> value signifies a stronger positive correlation, implying an increased likelihood of flooding in that interval (<xref ref-type="bibr" rid="ref2">Akg&#x00FC;n et al., 2008</xref>). The calculation formula of <inline-formula>
<mml:math id="M4">
<mml:mi>FR</mml:mi>
</mml:math>
</inline-formula> is as follows:</p>
<disp-formula id="E1">
<mml:math id="M5">
<mml:mi mathvariant="italic">FR</mml:mi>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>/</mml:mo>
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mo>/</mml:mo>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:math>
<label>(1)</label>
</disp-formula>
<p>Among these parameters, <inline-formula>
<mml:math id="M6">
<mml:mi>N</mml:mi>
<mml:mspace width="0.25em"/>
</mml:math>
</inline-formula> denotes the flood affected area within a specific classification interval, which in this study is approximated by the number of flood events occurring within that interval. <inline-formula>
<mml:math id="M7">
<mml:msub>
<mml:mi>N</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
</mml:math>
</inline-formula> represents the total flood-affected area across the entire study region and is substituted by the total number of flood events recorded. <inline-formula>
<mml:math id="M8">
<mml:mi>S</mml:mi>
</mml:math>
</inline-formula> refers to the spatial extent of the classification interval, while <inline-formula>
<mml:math id="M9">
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
</mml:math>
</inline-formula> indicates the total area of the study region. The <inline-formula>
<mml:math id="M10">
<mml:mi mathvariant="italic">FR</mml:mi>
</mml:math>
</inline-formula> values for the 15 influencing factors are presented in <xref ref-type="fig" rid="fig3">Figure 3</xref>. Considering the uneven spatial distribution of flood data points, the application of equal-interval classification to influencing factors may lead to biased interpretations. To address this issue, this study employs the natural breaks method to classify the 12 continuous influencing factors (excluding aspect, land use type, and lithology) into five intervals, based on the inherent distribution patterns of the data.</p>
<fig position="float" id="fig3">
<label>Figure 3</label>
<caption>
<p>Statistics of Flood Counts and FR Values by Classification of Flood Conditioning Factors in the BTH Region: <bold>(A)</bold> Elevation, <bold>(B)</bold> Slope, <bold>(C)</bold> Aspect, <bold>(D)</bold> PlaC, <bold>(E)</bold> ProC, <bold>(F)</bold> TRI, <bold>(G)</bold> SPI, <bold>(H)</bold> TWI, <bold>(I)</bold> Lithology, <bold>(J)</bold> RD, <bold>(K)</bold> DFR, <bold>(L)</bold> ANN, <bold>(M)</bold> AMR24, <bold>(N)</bold> LU, and <bold>(O)</bold> NDVI.</p>
</caption>
<graphic xlink:href="frwa-08-1767400-g003.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Eighteen bar charts display the number of floods and frequency ratio (FR) across various geographical and meteorological factors. Each chart, labeled from (A) to (O), shows different factors: Elevation, Slope, Aspect, Plan Curvature, Profile Curvature, Terrain Roughness Index (TRI), Stream Power Index (SPI), Topographic Wetness Index (TWI), Lithology, River Density (RD), Drainage Frequency Ratio (DFR), Annual Rainfall (ANN), 24-hour maximum rainfall (AMR24), Land Use (LU), and Normalized Difference Vegetation Index (NDVI). Blue bars represent floods, and red lines with points show FR trends across different ranges for each factor.</alt-text>
</graphic>
</fig>
<p>The elevation of the study area varies from &#x2212;38 to 2,760 meters (<xref ref-type="fig" rid="fig3">Figure 3A</xref>). Within the elevation interval of &#x2212;38 to 227 meters, the <inline-formula>
<mml:math id="M11">
<mml:mi mathvariant="italic">FR</mml:mi>
</mml:math>
</inline-formula> exceeds 1, indicating a higher susceptibility to flood occurrence in this range. The slope gradient ranges from 0 to 87.92&#x00B0; (<xref ref-type="fig" rid="fig3">Figure 3B</xref>). Overall, the <inline-formula>
<mml:math id="M12">
<mml:mi mathvariant="italic">FR</mml:mi>
</mml:math>
</inline-formula> value decreases as slope increases, with the highest <inline-formula>
<mml:math id="M13">
<mml:mi>FR</mml:mi>
</mml:math>
</inline-formula> observed in the 0 to 4.82&#x00B0; slope interval. Aspect was classified into nine directional categories (<xref ref-type="fig" rid="fig3">Figure 3C</xref>), among which flat (<inline-formula>
<mml:math id="M14">
<mml:mi mathvariant="italic">FR</mml:mi>
</mml:math>
</inline-formula>=1.84), north-facing (<inline-formula>
<mml:math id="M15">
<mml:mi mathvariant="italic">FR</mml:mi>
</mml:math>
</inline-formula>=1.1), and west-facing (<inline-formula>
<mml:math id="M16">
<mml:mi mathvariant="italic">FR</mml:mi>
</mml:math>
</inline-formula>=1.2) slopes exhibit a stronger association with flood occurrence. PlaC spans from &#x2212;1 to 360 (<xref ref-type="fig" rid="fig3">Figure 3D</xref>), while ProC ranges from 0 to 56.22 (<xref ref-type="fig" rid="fig3">Figure 3E</xref>). According to FR analysis, the maximum <italic>FR</italic> value of 1.52 corresponds to the ProC interval of 0 to 1.98, whereas the PlaC interval of 251.61 to 287.81 yields the highest <inline-formula>
<mml:math id="M17">
<mml:mi mathvariant="italic">FR</mml:mi>
</mml:math>
</inline-formula> value of 1.12.</p>
<p><inline-formula>
<mml:math id="M18">
<mml:mi mathvariant="italic">TRI</mml:mi>
<mml:mspace width="0.25em"/>
</mml:math>
</inline-formula> refers to the height difference between the highest and lowest points within a certain area, which can quantitatively describe the undulating form of the terrain in a region. It is an important indicator for classifying landforms and characterizing their features (<xref ref-type="bibr" rid="ref11">Chi et al., 2025</xref>).</p>
<p>The calculation formula is as follows:</p>
<disp-formula id="E2">
<mml:math id="M19">
<mml:mi mathvariant="italic">TRI</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi mathvariant="italic">Abs</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mo mathvariant="italic">max</mml:mo>
<mml:msup>
<mml:mi>m</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>&#x2212;</mml:mo>
<mml:mo mathvariant="italic">min</mml:mo>
<mml:mspace width="0em"/>
<mml:msup>
<mml:mi>m</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
<label>(2)</label>
</disp-formula>
<p>In the formula, <inline-formula>
<mml:math id="M20">
<mml:mtext mathvariant="italic">maxm</mml:mtext>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math id="M21">
<mml:mtext mathvariant="italic">minm</mml:mtext>
</mml:math>
</inline-formula> represent the absolute highest and lowest elevations within the region, respectively. The <italic>TRI</italic> value range of the region is 0 to 1942 (<xref ref-type="fig" rid="fig3">Figure 3F</xref>). The <inline-formula>
<mml:math id="M22">
<mml:mi mathvariant="italic">FR</mml:mi>
</mml:math>
</inline-formula> value decreases as the <italic>TRI</italic> increases, and within the range of <italic>TRI</italic> being 0 to 53, the <italic>FR</italic> value reaches its maximum(1.52), indicating that the smaller the <italic>TRI</italic>, the more prone the area is to flood disasters.</p>
<p>The <italic>SPI</italic> characterizes the erosive potential of water flow and quantitatively describes the relationship between water flow erosion and geomorphic evolution. Its calculation formula is as follows:</p>
<disp-formula id="E3">
<mml:math id="M23">
<mml:mi mathvariant="italic">SPI</mml:mi>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
<mml:mtext mathvariant="italic">tan&#x03B2;</mml:mtext>
</mml:math>
<label>(3)</label>
</disp-formula>
<p>In the formula, <inline-formula>
<mml:math id="M24">
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
<mml:mspace width="0.25em"/>
</mml:math>
</inline-formula> represents the catchment area of the basin, and <inline-formula>
<mml:math id="M25">
<mml:mi>&#x03B2;</mml:mi>
</mml:math>
</inline-formula> is the slope.</p>
<p>The <inline-formula>
<mml:math id="M26">
<mml:mi mathvariant="italic">SPI</mml:mi>
</mml:math>
</inline-formula> values in the study area range from &#x2212;11.41 to 18.32 (<xref ref-type="fig" rid="fig3">Figure 3G</xref>). Except for the intervals &#x2212;11.41 to &#x2212;4.53 and &#x2212;4.53 to 1.3, where <italic>FR</italic> &#x003E; 1, for other intervals, <italic>FR</italic> &#x003C; 1. This indicates that within the range of &#x2212;11.41 to 1.3, <italic>SPI</italic> is positively correlated with the occurrence of flood disasters.</p>
<p><inline-formula>
<mml:math id="M27">
<mml:mi mathvariant="italic">TWI</mml:mi>
</mml:math>
</inline-formula> is used to predict the spatial distribution pattern of soil moisture and quantify the control intensity of topography on hydrological processes. Its calculation formula is as follows:</p>
<disp-formula id="E4">
<mml:math id="M28">
<mml:mi mathvariant="italic">TWI</mml:mi>
<mml:mo>=</mml:mo>
<mml:mo mathvariant="italic">ln</mml:mo>
<mml:mfrac>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
<mml:mtext mathvariant="italic">tan&#x03B2;</mml:mtext>
</mml:mfrac>
</mml:math>
<label>(4)</label>
</disp-formula>
<p>As the <inline-formula>
<mml:math id="M29">
<mml:mi mathvariant="italic">TWI</mml:mi>
</mml:math>
</inline-formula> value increases, the <italic>FR</italic> value shows an increasing trend. Within the range of 22.56 to 36.13, the <italic>FR</italic> value reaches its maximum of 2.06 (<xref ref-type="fig" rid="fig3">Figure 3H</xref>), indicating that the higher the <italic>TWI</italic> value, the more likely it is to cause flood disasters.</p>
<p>Based on rock strength characteristics, the lithologies in the study area are systematically divided into six groups (<xref ref-type="fig" rid="fig3">Figure 3I</xref>): UF represents fluvial sedimentary rocks, IA1 is granite, UL is lacustrine sedimentary rocks, UG is glacially derived rocks, SC2 includes sandstone, argillaceous sandstone, and feldspathic sandstone, MA1 is quartzite, and SC3 includes siltstone, mudstone, and claystone. Statistical analysis reveals that over 86% of the hazard sites in the study area are located within the UF lithology distribution, indicating that the lithologic characteristics of fluvial sedimentary rocks may significantly influence the development and occurrence of disasters.</p>
<p>River network density serves as a critical indicator for assessing the development level of a regional water system. It is mathematically defined as the ratio of the total river length to the area of the study region. Based on the three-level water bodies, a hierarchical buffer zone was constructed. By coupling the weighted assignment and spatial overlay techniques, the absolute distance from the river was normalized into a dimensionless relative index ranging from 1 to 10, precisely quantifying the spatial proximity of the region to the river network. A higher river network density is associated with an increased likelihood of flood disasters (<xref ref-type="fig" rid="fig3">Figure 3J</xref>). Furthermore, areas located closer to rivers face a greater risk of being affected by flood events (<xref ref-type="fig" rid="fig3">Figure 3K</xref>).</p>
<p>The annual average rainfall in the study area ranges from 343.07 to 759.2&#x202F;mm (<xref ref-type="fig" rid="fig3">Figure 3L</xref>). The <italic>FR</italic> values exceed 1 within the rainfall intervals of 455.67&#x202F;~&#x202F;520.94, 520.94&#x202F;~&#x202F;576.43, and 576.43&#x202F;~&#x202F;644.96, with the maximum <italic>FR</italic> value observed in the 576.43&#x202F;~&#x202F;644.96 interval. Similarly, for the average 24-h maximum rainfall, FR values are greater than 1 in the intervals of 59.76&#x202F;~&#x202F;76.13, 76.13&#x202F;~&#x202F;87.47, and 87.47&#x202F;~&#x202F;101.32, where the highest <italic>FR</italic> value occurs in the 87.47&#x2013;101.32 interval (<xref ref-type="fig" rid="fig3">Figure 3M</xref>).</p>
<p>The study area encompasses eight land use types: farmland, forest, grassland, shrubland, wetland, water body, impervious surface, and bare land (<xref ref-type="fig" rid="fig3">Figure 3N</xref>). Among these, the <italic>FR</italic> values for impervious surface, farmland, and bare land all exceed 1, with the impervious surface exhibiting the highest <italic>FR</italic> value of 2.71. The <italic>FR</italic> value of NDVI in the study area ranges from &#x2212;0.18 to 0.97 (<xref ref-type="fig" rid="fig3">Figure 3O</xref>), and the <italic>FR</italic> value decreases with the increase of NDVI value. The smaller the NDVI value, the lower the vegetation coverage and the more likely it is to cause flood disasters.</p>
</sec>
</sec>
<sec sec-type="methods" id="sec7">
<label>3</label>
<title>Methods</title>
<p>The flood susceptibility mapping process in this study comprises four key stages (<xref ref-type="fig" rid="fig4">Figure 4</xref>): (1) preparation of the flood inventory, influencing factors, and division of training and validation datasets; (2) feature selection through multicollinearity analysis and the information gain method; (3) flood susceptibility modeling using machine learning algorithms;(4) generation of the flood susceptibility map; and (4) To validate and inter-compare flood-susceptibility models and thereby identify the optimal algorithm for mapping regional flood likelihood.</p>
<fig position="float" id="fig4">
<label>Figure 4</label>
<caption>
<p>Methodology flowchart for flood susceptibility mapping, including four stages: (1) Data preparation, (2) feature selection, (3) model development, (4) validation and zoning.</p>
</caption>
<graphic xlink:href="frwa-08-1767400-g004.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Flowchart illustrating a flood prediction model. It begins with flood conditioning factors from topography, meteorology, human activities, and surfaces, leading to feature selection. Datasets are split into training and testing. Multicollinearity diagnosis and information gain ratio (IGR) are used. Models such as SVM, XGBoost, RF, and MLP are applied for training and testing. The process concludes with model verification against documented flood disasters, resulting in an optimal model. Outputs include a flood susceptibility map and areas of undetermined susceptibility.</alt-text>
</graphic>
</fig>
<sec id="sec8">
<label>3.1</label>
<title>Feature selection methods</title>
<sec id="sec9">
<label>3.1.1</label>
<title>Multicollinearity diagnosis</title>
<p>When there are two or more highly linearly correlated factors in the flood disaster classification model, it will cause multicollinearity problems, leading to distorted classification accuracy of the model. Therefore, conducting multicollinearity diagnosis among factors is a necessary step before modeling. In this paper, Tolerances (<inline-formula>
<mml:math id="M30">
<mml:mi>Tol</mml:mi>
</mml:math>
</inline-formula>) and Variance Inflation Factors <inline-formula>
<mml:math id="M31">
<mml:mi>VIF</mml:mi>
</mml:math>
</inline-formula> are used to diagnose multicollinearity among factors. When<inline-formula>
<mml:math id="M32">
<mml:mspace width="0.25em"/>
<mml:mi>Tol</mml:mi>
</mml:math>
</inline-formula> &#x003E; 0.1 and <inline-formula>
<mml:math id="M33">
<mml:mi>VIF</mml:mi>
</mml:math>
</inline-formula> &#x003C; 10 (<xref ref-type="bibr" rid="ref17">Hasanzuzzaman et al., 2022</xref>), it indicates that the factors are independent of each other; otherwise, there is a collinearity problem. The calculation formulas of <inline-formula>
<mml:math id="M34">
<mml:mi>Tol</mml:mi>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math id="M35">
<mml:mi>VIF</mml:mi>
</mml:math>
</inline-formula> are:</p>
<disp-formula id="E5">
<mml:math id="M36">
<mml:mtext mathvariant="italic">To</mml:mtext>
<mml:msub>
<mml:mi>l</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mi>R</mml:mi>
<mml:mi>j</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:math>
<label>(5)</label>
</disp-formula>
<disp-formula id="E6">
<mml:math id="M37">
<mml:mi mathvariant="italic">VI</mml:mi>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mtext mathvariant="italic">To</mml:mtext>
<mml:msub>
<mml:mi>l</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:math>
<label>(6)</label>
</disp-formula>
<p>In the equation, <inline-formula>
<mml:math id="M38">
<mml:msubsup>
<mml:mi>R</mml:mi>
<mml:mi>j</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:math>
</inline-formula> denotes the coefficient of determination derived from the regression of the <inline-formula>
<mml:math id="M39">
<mml:mi>j</mml:mi>
<mml:mo>&#x2212;</mml:mo>
</mml:math>
</inline-formula>th independent variable on all other independent variables.</p>
</sec>
<sec id="sec10">
<label>3.1.2</label>
<title>Information gain ratio</title>
<p>This study utilizes the <italic>IGR</italic> to identify core indicators for assessing flood disaster susceptibility. As a feature selection technique, IGR effectively quantifies the contribution of each disaster-related factor to the discriminatory power and stability of the predictive model: a higher <italic>IGR</italic> value indicates a greater amount of classification information within the feature and a more significant contribution to model prediction performance (<xref ref-type="bibr" rid="ref58">Zhu et al., 2021</xref>). By applying <italic>IGR</italic> based screening, the most informative and relatively independent key indicators can be selected, thereby enabling the development of a more efficient, accurate, and interpretable assessment model. The mathematical formulation of <italic>IGR</italic> is presented as follows:</p>
<disp-formula id="E7">
<mml:math id="M40">
<mml:mi mathvariant="italic">IGR</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>X</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>Y</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>H</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>Y</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>H</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>Y</mml:mi>
<mml:mo>&#x2223;</mml:mo>
<mml:mi>X</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>H</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>X</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
</mml:mfrac>
</mml:math>
<label>(7)</label>
</disp-formula>
<p>In the equation, <inline-formula>
<mml:math id="M41">
<mml:mi>H</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>Y</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
</inline-formula> denotes the entropy associated with the target variable, <inline-formula>
<mml:math id="M42">
<mml:mi>H</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>Y</mml:mi>
<mml:mo>&#x2223;</mml:mo>
<mml:mi>X</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
</inline-formula> denotes the conditional entropy of the target variable given the factor <inline-formula>
<mml:math id="M43">
<mml:mi>X</mml:mi>
</mml:math>
</inline-formula>, and <inline-formula>
<mml:math id="M44">
<mml:mi>H</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>X</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
</inline-formula> denotes the entropy of the factor <inline-formula>
<mml:math id="M45">
<mml:mi>X</mml:mi>
</mml:math>
</inline-formula> its.</p>
</sec>
</sec>
<sec id="sec11">
<label>3.2</label>
<title>Machine learning algorithms</title>
<sec id="sec12">
<label>3.2.1</label>
<title>Support vector machine</title>
<p>SVM performs pattern recognition by finding the optimal hyperplane (linear decision boundary) that maximizes the classification interval. Its core mechanism is: for linearly separable data, it directly solves the convex optimization problem to determine the hyperplane; for nonlinear data, it uses kernel functions to map to high-dimensional space to achieve linear partitioning. The decision function of this method depends only on the support vector and has strong generalization ability (<xref ref-type="bibr" rid="ref23">Kamran et al., 2021</xref>). Formally, given a training set <inline-formula>
<mml:math id="M46">
<mml:msubsup>
<mml:mrow>
<mml:mo stretchy="true">{</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo stretchy="true">}</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:msubsup>
</mml:math>
</inline-formula>, where <inline-formula>
<mml:math id="M47">
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mo stretchy="true">{</mml:mo>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo stretchy="true">}</mml:mo>
</mml:math>
</inline-formula>, the optimization objective of linear SVM can be formulated as:</p>
<disp-formula id="E8">
<mml:math id="M48">
<mml:munder>
<mml:mo mathvariant="italic">min</mml:mo>
<mml:mrow>
<mml:mi>w</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>b</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x03BE;</mml:mi>
</mml:mrow>
</mml:munder>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mn>2</mml:mn>
</mml:mfrac>
<mml:mo>&#x2225;</mml:mo>
<mml:mi>w</mml:mi>
<mml:msup>
<mml:mo>&#x2225;</mml:mo>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>+</mml:mo>
<mml:mi>C</mml:mi>
<mml:munderover>
<mml:mo movablelimits="false">&#x2211;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="normal">i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:munderover>
<mml:msub>
<mml:mi>&#x03BE;</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>s</mml:mi>
<mml:mo>.</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>.</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
<mml:mo>&#x2265;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>&#x03BE;</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x03BE;</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
<mml:mo>&#x2265;</mml:mo>
<mml:mn>0</mml:mn>
</mml:math>
<label>(8)</label>
</disp-formula>
<p>In the formulation, <inline-formula>
<mml:math id="M49">
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mn>2</mml:mn>
</mml:mfrac>
<mml:mo>&#x2225;</mml:mo>
<mml:mi mathvariant="normal">w</mml:mi>
<mml:msup>
<mml:mo>&#x2225;</mml:mo>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:math>
</inline-formula> denotes the regularization term, <inline-formula>
<mml:math id="M50">
<mml:msub>
<mml:mi>&#x03BE;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> represents the slack variable, which allows certain samples to be misclassified or lie within the margin; <inline-formula>
<mml:math id="M51">
<mml:mi>C</mml:mi>
</mml:math>
</inline-formula> is the regularization parameter that governs the trade-off between the margin width and the training error.</p>
<p>Following dualization, the decision function depends solely on the support vectors and can be generalized to nonlinear scenarios:</p>
<disp-formula id="E9">
<mml:math id="M52">
<mml:mspace width="0.25em"/>
<mml:mi>f</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mo mathvariant="italic">sign</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:munder>
<mml:mo movablelimits="false">&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>S</mml:mi>
</mml:mrow>
</mml:munder>
<mml:msub>
<mml:mi>&#x03B1;</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
<mml:mi>K</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>+</mml:mo>
<mml:mi>b</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
<label>(9)</label>
</disp-formula>
<p>In the formula, <inline-formula>
<mml:math id="M53">
<mml:mi>S</mml:mi>
</mml:math>
</inline-formula> represents the set of support vectors, <inline-formula>
<mml:math id="M54">
<mml:msub>
<mml:mi>&#x03B1;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> is the Lagrange multiplier, <inline-formula>
<mml:math id="M55">
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mspace width="0.25em"/>
</mml:math>
</inline-formula> is the support vector in the feature space, <inline-formula>
<mml:math id="M56">
<mml:mi>x</mml:mi>
</mml:math>
</inline-formula> is the input vector in the feature space, <inline-formula>
<mml:math id="M57">
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> is the label of the support vector, <inline-formula>
<mml:math id="M58">
<mml:mi>K</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
</inline-formula> is the kernel function, and <inline-formula>
<mml:math id="M59">
<mml:mi>b</mml:mi>
</mml:math>
</inline-formula> is the bias term of the decision boundary.</p>
</sec>
<sec id="sec13">
<label>3.2.2</label>
<title>Random forest</title>
<p>Random forest is a representative method of ensemble learning. Its core mechanism is to improve classification performance by constructing multiple decision trees and fusing their prediction results. The algorithm generates multiple sub-datasets of the same size from the original training set based on bootstrap sampling, and trains a decision tree for each sub-dataset. During the node splitting process of a single tree, the optimal splitting feature is selected only from a randomly selected feature subset to reduce the correlation between trees (<xref ref-type="bibr" rid="ref35">Taalab et al., 2018</xref>). The final classification result is determined through majority voting. This ensemble mechanism substantially mitigates overfitting and enhances the model&#x2019;s generalization capability. The predicted category is formulated as:</p>
<disp-formula id="E10">
<mml:math id="M60">
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo stretchy="true">&#x0302;</mml:mo>
</mml:mover>
<mml:mo>=</mml:mo>
<mml:mo mathvariant="italic">arg</mml:mo>
<mml:mspace width="0em"/>
<mml:munder>
<mml:mo mathvariant="italic">max</mml:mo>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:munder>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mi>K</mml:mi>
</mml:mfrac>
<mml:munderover>
<mml:mo movablelimits="false">&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>K</mml:mi>
</mml:munderover>
<mml:mi>I</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mi>c</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
<label>(10)</label>
</disp-formula>
<p>In the formula, <inline-formula>
<mml:math id="M61">
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo stretchy="true">&#x0302;</mml:mo>
</mml:mover>
</mml:math>
</inline-formula> denotes the predicted category of input sample <inline-formula>
<mml:math id="M62">
<mml:mi mathvariant="normal">x</mml:mi>
</mml:math>
</inline-formula> by the random forest; <inline-formula>
<mml:math id="M63">
<mml:mi>K</mml:mi>
<mml:mspace width="0.25em"/>
</mml:math>
</inline-formula> represents the total number of decision trees; <inline-formula>
<mml:math id="M64">
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
</inline-formula> is the prediction result of the <inline-formula>
<mml:math id="M65">
<mml:mi>k</mml:mi>
</mml:math>
</inline-formula><italic>-</italic>th decision tree for <inline-formula>
<mml:math id="M66">
<mml:mi>x</mml:mi>
<mml:mspace width="0.25em"/>
</mml:math>
</inline-formula>; <inline-formula>
<mml:math id="M67">
<mml:mi>c</mml:mi>
</mml:math>
</inline-formula> is the set of all possible classes; and <inline-formula>
<mml:math id="M68">
<mml:mi>I</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
</inline-formula> denotes the indicator function.</p>
</sec>
<sec id="sec14">
<label>3.2.3</label>
<title>Multilayer perceptron</title>
<p>MLP is a feedforward neural network consisting of an input layer, a hidden layer, and an output layer connected in sequence. The computational process is as follows: the input data enters the network in the form of a vector <inline-formula>
<mml:math id="M69">
<mml:mi mathvariant="normal">x</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi mathvariant="normal">R</mml:mi>
<mml:msub>
<mml:mi mathvariant="normal">n</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
</mml:msup>
</mml:math>
</inline-formula> and propagates forward layer by layer. Each hidden layer gradually maps the input space to a high-dimensional feature space through a combination of linear transformation and nonlinear activation, transforming the linearly inseparable problem into a linearly separable one. Finally, the output layer generates the prediction result (<xref ref-type="bibr" rid="ref44">Wang et al., 2024</xref>).</p>
<p>The mapping function for the <inline-formula>
<mml:math id="M70">
<mml:mi mathvariant="normal">l</mml:mi>
</mml:math>
</inline-formula>-th layer is defined as:</p>
<disp-formula id="E11">
<mml:math id="M71">
<mml:msup>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi mathvariant="normal">l</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>=</mml:mo>
<mml:msup>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi mathvariant="normal">l</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo stretchy="true">(</mml:mo>
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi mathvariant="normal">l</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:msup>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi mathvariant="normal">l</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>+</mml:mo>
<mml:msup>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi mathvariant="normal">l</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="normal">l</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>L</mml:mi>
</mml:math>
<label>(11)</label>
</disp-formula>
<p>In the formula, <inline-formula>
<mml:math id="M72">
<mml:msup>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>l</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mspace width="0.25em"/>
</mml:math>
</inline-formula> represents the activation vector of the <inline-formula>
<mml:math id="M73">
<mml:mi>l</mml:mi>
</mml:math>
</inline-formula><italic>-</italic>th layer, <inline-formula>
<mml:math id="M74">
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>l</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula> is the weight matrix, <inline-formula>
<mml:math id="M75">
<mml:msup>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>l</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mspace width="0.25em"/>
</mml:math>
</inline-formula> is the bias vector, and <inline-formula>
<mml:math id="M76">
<mml:msup>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>l</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo stretchy="true">(</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo stretchy="true">)</mml:mo>
<mml:mspace width="0.25em"/>
</mml:math>
</inline-formula> is the element-wise nonlinear activation function.</p>
</sec>
<sec id="sec15">
<label>3.2.4</label>
<title>Extreme gradient boosting</title>
<p>XGBoost is an efficient ensemble learning algorithm built upon the gradient boosting framework. Its fundamental principle lies in iteratively constructing decision trees to fit the residuals of predictions, with the final output obtained through an additive accumulation of these trees (<xref ref-type="bibr" rid="ref1">Abedi et al., 2022</xref>). The algorithm employs a second-order Taylor expansion to approximate the loss function, thereby transforming the nonlinear optimization problem into a quadratic one, which significantly enhances computational efficiency. Furthermore, XGBoost incorporates a regularization term into the objective function to control model complexity. This regularization mechanism balances the trade-off between fitting capability and generalization performance by imposing constraints on both the number of leaf nodes and the magnitude of their associated weights.</p>
<p>During the <inline-formula>
<mml:math id="M77">
<mml:mi>t</mml:mi>
</mml:math>
</inline-formula>-th iteration, the predicted value for sample <inline-formula>
<mml:math id="M78">
<mml:mi>i</mml:mi>
</mml:math>
</inline-formula> is expressed as:</p>
<disp-formula id="E12">
<mml:math id="M79">
<mml:msubsup>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo stretchy="true">&#x0302;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:msubsup>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo stretchy="true">&#x0302;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo stretchy="true">(</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
<label>(12)</label>
</disp-formula>
<p>In the formula, <inline-formula>
<mml:math id="M80">
<mml:msubsup>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo stretchy="true">&#x0302;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> denotes the cumulative predicted value obtained after the <inline-formula>
<mml:math id="M81">
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>t</mml:mi>
</mml:math>
</inline-formula><italic>-1</italic>)-th iteration; <inline-formula>
<mml:math id="M82">
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo stretchy="true">(</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
</inline-formula> represents the prediction output of the <inline-formula>
<mml:math id="M83">
<mml:mi mathvariant="normal">t</mml:mi>
</mml:math>
</inline-formula>-th decision tree for the input sample <inline-formula>
<mml:math id="M84">
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:math>
</inline-formula>.</p>
<p>The objective function of XGBoost consists of a loss function and a regularization term:</p>
<disp-formula id="E13">
<mml:math id="M85">
<mml:mi mathvariant="italic">Ob</mml:mi>
<mml:msup>
<mml:mi>j</mml:mi>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>=</mml:mo>
<mml:munderover>
<mml:mo movablelimits="false">&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:munderover>
<mml:mo stretchy="true">[</mml:mo>
<mml:mi>L</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo stretchy="true">&#x0302;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo stretchy="true">(</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo stretchy="true">]</mml:mo>
<mml:mo>+</mml:mo>
<mml:mi mathvariant="italic">&#x03B3;T</mml:mi>
<mml:mo>+</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mn>2</mml:mn>
</mml:mfrac>
<mml:mi>&#x03BB;</mml:mi>
<mml:munderover>
<mml:mo movablelimits="false">&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>T</mml:mi>
</mml:munderover>
<mml:msubsup>
<mml:mi>w</mml:mi>
<mml:mi>j</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:math>
<label>(13)</label>
</disp-formula>
<p>In the formula, <inline-formula>
<mml:math id="M86">
<mml:mi>L</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
</inline-formula> denotes the loss function, <inline-formula>
<mml:math id="M87">
<mml:mi>&#x03B3;</mml:mi>
<mml:mspace width="0.25em"/>
</mml:math>
</inline-formula> represents the splitting cost for a single node, <inline-formula>
<mml:math id="M88">
<mml:mi>T</mml:mi>
</mml:math>
</inline-formula> indicates the total number of leaf nodes, <inline-formula>
<mml:math id="M89">
<mml:mi>&#x03BB;</mml:mi>
</mml:math>
</inline-formula> is the regularization coefficient, and <inline-formula>
<mml:math id="M90">
<mml:mo>&#x2225;</mml:mo>
<mml:mi>w</mml:mi>
<mml:msup>
<mml:mo>&#x2225;</mml:mo>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:math>
</inline-formula> signifies the sum of squared weights.</p>
</sec>
</sec>
<sec id="sec16">
<label>3.3</label>
<title>Model evaluation criteria</title>
<p>The Receiver Operating Characteristic (ROC) curve serves as a critical analytical tool for assessing the performance of binary classification models. By plotting the false positive rate (<inline-formula>
<mml:math id="M91">
<mml:mi mathvariant="italic">FPR</mml:mi>
</mml:math>
</inline-formula>) on the horizontal axis and the true positive rate (<inline-formula>
<mml:math id="M92">
<mml:mi mathvariant="italic">TPR</mml:mi>
</mml:math>
</inline-formula>) on the vertical axis, it visually illustrates how model performance varies across different classification thresholds (<xref ref-type="bibr" rid="ref18">He et al., 2024</xref>). The TPR, also known as sensitivity, quantifies the proportion of actual positive samples that are correctly identified by the model, whereas the <italic>FPR</italic>, mathematically defined as 1-specificity, measures the proportion of negative samples that are erroneously classified as positive. The AUC, which represents the total area beneath the ROC curve, ranges in value from 0 to 1. A higher AUC value indicates greater discriminatory power; an ideal classifier achieves an AUC of 1.0, while a random classifier typically yields an AUC close to 0.5.</p>
<disp-formula id="E14">
<mml:math id="M93">
<mml:mi mathvariant="italic">TPR</mml:mi>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mi mathvariant="italic">TP</mml:mi>
<mml:mrow>
<mml:mi mathvariant="italic">TP</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi mathvariant="italic">FN</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:math>
<label>(14)</label>
</disp-formula>
<disp-formula id="E15">
<mml:math id="M94">
<mml:mi mathvariant="italic">FPR</mml:mi>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mi mathvariant="italic">FP</mml:mi>
<mml:mrow>
<mml:mi mathvariant="italic">FP</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi mathvariant="italic">TN</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:math>
<label>(15)</label>
</disp-formula>
<p>In the formula, <inline-formula>
<mml:math id="M95">
<mml:mi mathvariant="italic">TP</mml:mi>
</mml:math>
</inline-formula>, <inline-formula>
<mml:math id="M96">
<mml:mi mathvariant="italic">TN</mml:mi>
</mml:math>
</inline-formula>, <inline-formula>
<mml:math id="M97">
<mml:mi mathvariant="italic">FP</mml:mi>
</mml:math>
</inline-formula>, and <inline-formula>
<mml:math id="M98">
<mml:mi mathvariant="italic">FN</mml:mi>
</mml:math>
</inline-formula> respectively represent the number of samples that are actually positive and predicted as positive, actually negative and predicted as negative, actually negative but predicted as positive, and actually positive but predicted as negative.</p>
<p>The Accuracy (<inline-formula>
<mml:math id="M99">
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mtext mathvariant="italic">ccuracy</mml:mtext>
</mml:msub>
</mml:math>
</inline-formula>), Sensitivity (<inline-formula>
<mml:math id="M100">
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mtext mathvariant="italic">ensitivity</mml:mtext>
</mml:msub>
</mml:math>
</inline-formula>) and Kappa (<inline-formula>
<mml:math id="M101">
<mml:msub>
<mml:mi>K</mml:mi>
<mml:mtext mathvariant="italic">appa</mml:mtext>
</mml:msub>
</mml:math>
</inline-formula>) are incorporated to assess the overall correctness, the recall performance for the minority class, and the agreement beyond chance, respectively, thereby enabling a comprehensive and robust evaluation of model performance. The corresponding computational formulas are: presented as follows.</p>
<disp-formula id="E16">
<mml:math id="M102">
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mtext mathvariant="italic">ccuracy</mml:mtext>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi mathvariant="italic">TP</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi mathvariant="italic">TN</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">TP</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi mathvariant="italic">TN</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi mathvariant="italic">FP</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi mathvariant="italic">FN</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:math>
<label>(16)</label>
</disp-formula>
<disp-formula id="E17">
<mml:math id="M103">
<mml:msub>
<mml:mi>K</mml:mi>
<mml:mtext mathvariant="italic">appa</mml:mtext>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mtext mathvariant="italic">ccuracy</mml:mtext>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>e</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>e</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:math>
<label>(17)</label>
</disp-formula>
<disp-formula id="E18">
<mml:math id="M104">
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>e</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi mathvariant="italic">TP</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi mathvariant="italic">FN</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi mathvariant="italic">TP</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi mathvariant="italic">FP</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>+</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi mathvariant="italic">FP</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi mathvariant="italic">TN</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi mathvariant="italic">FN</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi mathvariant="italic">TN</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi mathvariant="italic">TP</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi mathvariant="italic">TN</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi mathvariant="italic">FP</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi mathvariant="italic">FN</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mfrac>
<mml:mspace width="0.25em"/>
<mml:mspace width="0.25em"/>
<mml:mspace width="0.25em"/>
</mml:math>
<label>(18)</label>
</disp-formula>
<sec id="sec17">
<label>3.3.1</label>
<title>Model hyperparameter optimization</title>
<p>Model performance depends not only on the algorithm itself, but is also highly sensitive to hyperparameter configurations. Untuned hyperparameters can easily lead to underfitting or overfitting, severely weakening the model&#x2019;s generalization ability. Grid search discretizes candidate values for each hyperparameter into a multidimensional grid, exhaustively enumerates and cross-validates each combination, and thus systematically locks in the optimal configuration (<xref ref-type="bibr" rid="ref34">Shams et al., 2024</xref>).</p>
</sec>
</sec>
</sec>
<sec sec-type="results" id="sec18">
<label>4</label>
<title>Results</title>
<sec id="sec19">
<label>4.1</label>
<title>Selection of the conditioning factors</title>
<p>Prior to model development, a comprehensive multicollinearity diagnosis and information gain ratio assessment were conducted for all flood disaster influencing factors. As illustrated in <xref ref-type="fig" rid="fig5">Figure 5</xref>, the AAR factor exhibited significant multicollinearity (<italic>VIF</italic>&#x202F;=&#x202F;10.06, <italic>Tol</italic>&#x202F;=&#x202F;0.082), based on the criteria of variance inflation factor (<italic>VIF</italic> &#x003C; 10) and tolerance (<italic>Tol</italic> &#x003E; 0.1). Following its removal, the <italic>VIF</italic> values of all remaining variables fell below 10, indicating a substantial reduction in multicollinearity. Further <italic>IGR</italic> analysis revealed that the AAR contributed minimally to the model (<italic>IGR</italic>&#x202F;=&#x202F;0.0257), while PlaC demonstrated no predictive contribution (<italic>IGR</italic>&#x202F;=&#x202F;0). To enhance model efficiency by mitigating multicollinearity and eliminating redundant variables, both the AAR and PlaC were excluded from the final set of influencing factors.</p>
<fig position="float" id="fig5">
<label>Figure 5</label>
<caption>
<p>Results of multicollinearity diagnosis and information gain ratio (IGR) analysis for the 15 initial factors.</p>
</caption>
<graphic xlink:href="frwa-08-1767400-g005.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Bar and line graph displaying IGR, Tolerance, and VIF values for various factors like Slope and Elevation. IGR is shown with blue bars, Tolerance with a green line, and VIF with a red line. Tolerance and VIF thresholds are marked with dashed lines. The graph is used to analyze these parameters against factors such as AMR24 and PlaC.</alt-text>
</graphic>
</fig>
</sec>
<sec id="sec20">
<label>4.2</label>
<title>Training and validating the models</title>
<p>After optimizing hyperparameters using grid search and 5-fold cross-validation, the final parameter combinations for SVM, RF, XGBoost, and MLP are shown in <xref ref-type="table" rid="tab1">Table 1</xref>. On the independent test set, the AUC of each model exceeded 0.85 (<xref ref-type="fig" rid="fig6">Figure 6</xref>), specifically: SVM 0.854, RF 0.920, XGBoost 0.938, and MLP 0.867. XGBoost performed best, with its AUC improving by 9.84% compared to the lowest-performing SVM. Further evaluation of model performance on the training and validation sets (<xref ref-type="table" rid="tab2">Table 2</xref>) showed that XGBoost significantly outperformed other models in both Accuracy (0.981 on the training set, 0.917 on the validation set) and Kappa (0.962, 0.833), demonstrating the best overall performance. RF followed, while SVM and MLP performed relatively weakly, with SVM achieving only 0.808 Accuracy on the validation set. Furthermore, XGBoost exhibits the strongest generalization ability while maintaining the highest recall rate, with the smallest performance difference between the training and validation sets, further demonstrating the model&#x2019;s advantages in stability and reliability.</p>
<table-wrap position="float" id="tab1">
<label>Table 1</label>
<caption>
<p>Model optimization parameters.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Algorithms</th>
<th align="left" valign="top">Hyperparameter</th>
<th align="center" valign="top">Search scope</th>
<th align="center" valign="top">Optimal value</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top" rowspan="5">XGBoost</td>
<td align="left" valign="top">n_estimators</td>
<td align="center" valign="top">[10, 300]</td>
<td align="center" valign="top">100</td>
</tr>
<tr>
<td align="left" valign="top">max_depth</td>
<td align="center" valign="top">[3, 30]</td>
<td align="center" valign="top">15</td>
</tr>
<tr>
<td align="left" valign="top">learning_rate</td>
<td align="center" valign="top">[0.01, 1]</td>
<td align="center" valign="top">0.1</td>
</tr>
<tr>
<td align="left" valign="top">gamma</td>
<td align="center" valign="top">(0, 1)</td>
<td align="center" valign="top">0.72</td>
</tr>
<tr>
<td align="left" valign="top">subsample</td>
<td align="center" valign="top">(0, 1)</td>
<td align="center" valign="top">0.7</td>
</tr>
<tr>
<td align="left" valign="top" rowspan="4">RF</td>
<td align="left" valign="top">n_estimators</td>
<td align="center" valign="top">[10, 300]</td>
<td align="center" valign="top">300</td>
</tr>
<tr>
<td align="left" valign="top">max_depth</td>
<td align="center" valign="top">[3, 30]</td>
<td align="center" valign="top">6</td>
</tr>
<tr>
<td align="left" valign="top">min_samples_split</td>
<td align="center" valign="top">[2, 30]</td>
<td align="center" valign="top">5</td>
</tr>
<tr>
<td align="left" valign="top">min_samples_leaf</td>
<td align="center" valign="top">[1, 10]</td>
<td align="center" valign="top">4</td>
</tr>
<tr>
<td align="left" valign="top" rowspan="3">SVM</td>
<td align="left" valign="top">C</td>
<td align="center" valign="top">(0, 200)</td>
<td align="center" valign="top">10</td>
</tr>
<tr>
<td align="left" valign="top">gamma</td>
<td align="center" valign="top">(0, 0.1)</td>
<td align="center" valign="top">0.1</td>
</tr>
<tr>
<td align="left" valign="top">kernel</td>
<td align="center" valign="top">rbf,sigmoid,poly, sigmoid</td>
<td align="center" valign="top">rbf</td>
</tr>
<tr>
<td align="left" valign="top" rowspan="4">MLP</td>
<td align="left" valign="top">hidden_layer_sizes</td>
<td align="center" valign="top">[10, 100]</td>
<td align="center" valign="top">30</td>
</tr>
<tr>
<td align="left" valign="top">learning_rate_init</td>
<td align="center" valign="top">[0.0001, 0.1]</td>
<td align="center" valign="top">0.001</td>
</tr>
<tr>
<td align="left" valign="top">alpha</td>
<td align="center" valign="top">(0, 0.1)</td>
<td align="center" valign="top">0.001</td>
</tr>
<tr>
<td align="left" valign="top">batch_size</td>
<td align="center" valign="top">[32, 128]</td>
<td align="center" valign="top">64</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig position="float" id="fig6">
<label>Figure 6</label>
<caption>
<p>ROC curves of four ML models (SVM, RF, XGBoost, MLP) on the validation dataset.</p>
</caption>
<graphic xlink:href="frwa-08-1767400-g006.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">ROC curve comparison chart showing true positive rate versus false positive rate for four models. SVM has an AUC of 0.854, RF 0.920, XGBoost 0.938, and MLP 0.867. XGBoost performs best.</alt-text>
</graphic>
</fig>
<table-wrap position="float" id="tab2">
<label>Table 2</label>
<caption>
<p>Performance of the models for training and validation dataset.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top" rowspan="3">Model accuarcy statistic</th>
<th align="center" valign="top" colspan="9">Dataset</th>
</tr>
<tr>
<th align="center" valign="top" colspan="4">Training</th>
<th align="center" valign="top" colspan="5">Validation</th>
</tr>
<tr>
<th align="center" valign="top">XGBoost</th>
<th align="center" valign="top">RF</th>
<th align="center" valign="top">SVM</th>
<th align="center" valign="top">MLP</th>
<th align="center" valign="top">XGBoost</th>
<th align="center" valign="top">RF</th>
<th align="center" valign="top">SVM</th>
<th align="center" valign="top" colspan="2">MLP</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">TP</td>
<td align="center" valign="top">1,375</td>
<td align="center" valign="top">1,362</td>
<td align="center" valign="top">1,325</td>
<td align="center" valign="top">1,330</td>
<td align="center" valign="top">553</td>
<td align="center" valign="top">539</td>
<td align="center" valign="top">525</td>
<td align="center" valign="top" colspan="2">535</td>
</tr>
<tr>
<td align="left" valign="top">TN</td>
<td align="center" valign="top">1,372</td>
<td align="center" valign="top">1,305</td>
<td align="center" valign="top">1,100</td>
<td align="center" valign="top">1,290</td>
<td align="center" valign="top">547</td>
<td align="center" valign="top">521</td>
<td align="center" valign="top">445</td>
<td align="center" valign="top" colspan="2">505</td>
</tr>
<tr>
<td align="left" valign="top">FP</td>
<td align="center" valign="top">28</td>
<td align="center" valign="top">95</td>
<td align="center" valign="top">300</td>
<td align="center" valign="top">110</td>
<td align="center" valign="top">53</td>
<td align="center" valign="top">79</td>
<td align="center" valign="top">155</td>
<td align="center" valign="top" colspan="2">95</td>
</tr>
<tr>
<td align="left" valign="top">FN</td>
<td align="center" valign="top">25</td>
<td align="center" valign="top">38</td>
<td align="center" valign="top">75</td>
<td align="center" valign="top">70</td>
<td align="center" valign="top">47</td>
<td align="center" valign="top">61</td>
<td align="center" valign="top">75</td>
<td align="center" valign="top" colspan="2">65</td>
</tr>
<tr>
<td align="left" valign="top">Accuracy</td>
<td align="center" valign="top">0.981</td>
<td align="center" valign="top">0.953</td>
<td align="center" valign="top">0.866</td>
<td align="center" valign="top">0.936</td>
<td align="center" valign="top">0.917</td>
<td align="center" valign="top">0.883</td>
<td align="center" valign="top">0.808</td>
<td align="center" valign="top" colspan="2">0.867</td>
</tr>
<tr>
<td align="left" valign="top">Kappa</td>
<td align="center" valign="top">0.962</td>
<td align="center" valign="top">0.905</td>
<td align="center" valign="top">0.732</td>
<td align="center" valign="top">0.871</td>
<td align="center" valign="top">0.833</td>
<td align="center" valign="top">0.766</td>
<td align="center" valign="top">0.617</td>
<td align="center" valign="top" colspan="2">0.734</td>
</tr>
<tr>
<td align="left" valign="top">Recall</td>
<td align="center" valign="top">0.982</td>
<td align="center" valign="top">0.973</td>
<td align="center" valign="top">0.946</td>
<td align="center" valign="top">0.950</td>
<td align="center" valign="top">0.922</td>
<td align="center" valign="top">0.898</td>
<td align="center" valign="top">0.875</td>
<td align="center" valign="top" colspan="2">0.892</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="sec21">
<label>4.3</label>
<title>Generation of flood susceptibility map</title>
<p>Using the filtered raster data of flood disaster influencing factors in the BTH region along with their corresponding sample labels as input, each machine learning model generated a flood susceptibility index (FSI) for every raster cell. The FSI values range from 0 to 1, with higher values indicating greater flood occurrence probability. Based on the overall probability calculations, a 90&#x202F;&#x00D7;&#x202F;90&#x202F;m resolution probability raster map was produced using ArcGIS. The probability values were classified into five susceptibility levels&#x2014; very low, low, moderate, high, and very high&#x2014;using the natural break method. Subsequently, flood susceptibility zoning maps for the four models were obtained (<xref ref-type="fig" rid="fig7">Figure 7</xref>). In these maps, black solid dots denote training set flood points and dark blue solid dots denote validation set flood points. The overlaid points illustrate that flood occurrences are predominantly concentrated in High and Very High susceptibility zones across all four model-generated maps, with minimal presence in low-susceptibility areas. This pattern aligns with the premise that floods occur predominantly in high-risk regions, supporting the rationality of the training process. The spatial consistency between predicted high-risk areas and observed flood locations across both datasets demonstrates the reliability of susceptibility predictions. Moreover, the validation set closely mirrors the spatial distribution of the training set despite being excluded from model fitting, confirming both the representativeness of the sample split and the generalizability of the models.</p>
<fig position="float" id="fig7">
<label>Figure 7</label>
<caption>
<p>Flood susceptibility maps for the BTH region generated by four ML models: <bold>(A)</bold> RF, <bold>(B)</bold> XGBoost, <bold>(C)</bold> SVM, and <bold>(D)</bold> MLP. Susceptibility is classified into five levels, with training and validation flood points overlaid for reference.</p>
</caption>
<graphic xlink:href="frwa-08-1767400-g007.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Four maps display flood susceptibility in a region using different machine learning models: (A) RF, (B) XGBoost, (C) SVM, and (D) MLP. Colors indicate susceptibility levels from very low (green) to very high (red). Blue dots represent validating datasets and black dots training datasets. Each map uses the same legend and scale, covering coordinates from 108&#x00B0;E to 126&#x00B0;E longitude and 30&#x00B0;N to 34&#x00B0;N latitude. North is indicated.</alt-text>
</graphic>
</fig>
<p>To quantitatively evaluate these maps, the area proportion, flood point proportion, and frequency ratio (<italic>FR</italic>) within each susceptibility level were statistically analyzed (<xref ref-type="fig" rid="fig8">Figure 8</xref>). Notable differences were observed in the spatial distribution of FSI values across the four models within each risk level (<xref ref-type="fig" rid="fig8">Figure 8A</xref>). The RF model predicted that very low, low, moderate, high, and very high risk areas accounted for 43.38, 10.96, 16.23, 13.16, and 16.27% of the total area, respectively. For the XGBoost model, the corresponding proportions were 46.62, 12.21, 16.01, 11.56, and 13.60%. The MLP model yielded area proportions of 46.01, 8.73, 14.55, 15.83, and 14.88%, while the SVM model produced values of 31.84, 20.08, 9.33, 10.88, and 27.87%, respectively.</p>
<fig position="float" id="fig8">
<label>Figure 8</label>
<caption>
<p>Quantitative evaluation of four models&#x2019; flood susceptibility zonation: <bold>(A)</bold> Total area percentage, <bold>(B)</bold> historical flood point percentage, <bold>(C)</bold> FR value for each class.</p>
</caption>
<graphic xlink:href="frwa-08-1767400-g008.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Three bar charts compare flood risk levels using different models: RF, XGBoost, MLP, and SVM. Chart A shows area percentage by risk level, Chart B shows the number of floods percentage, and Chart C shows FR. Each chart categorizes data into very low, low, moderate, high, and very high levels. RF and XGBoost often show higher values at very high risk levels.</alt-text>
</graphic>
</fig>
<p>Although the models produced varying zoning results, their spatial distribution patterns were largely consistent, revealing the general flood susceptibility characteristics of the BTH region: (1) Very high and High susceptibility zones are primarily located in terrain transition zones, key hydrological nodes, and major flood detention areas. These include Beijing&#x2019;s Fangshan and Mentougou Districts; central and eastern parts of Baoding City in Hebei Province (Qingyuan District, Mancheng District, Zhuozhou City, Gaobeidian City); Yongnian and Congtai Districts in Handan City; Xiqing and Jinnan Districts in Tianjin; and eastern parts of Xingtai City in Hebei Province. (2) Moderate susceptibility zones are concentrated in Jinghai and Wuqing Districts of Tianjin, eastern Cangzhou in Hebei Province, and Tongzhou District in Beijing. These areas serve as secondary disaster-bearing zones within the regional flood control system, influenced by residual upstream floods, local heavy rainfall, and reduced drainage capacity. (3) Very low and Low susceptibility zones are predominantly found in the high-altitude mountainous regions of the northwest, including northern Yanqing and Huairou Districts, northeastern Miyun District in Beijing; the Bashang Plateau in Zhangjiakou City; northern Chengde (Weichang and Fengning Counties) in Hebei Province; and the northern part of Jizhou District in Tianjin. These areas are characterized by steep topography, efficient runoff drainage, low rainfall frequency due to semi-arid climate, sparse river networks, and limited human exposure, resulting in significantly lower flood risk compared to lowland areas.</p>
<p>As illustrated in <xref ref-type="fig" rid="fig8">Figure 8B</xref>, the spatial distribution of flood disasters across different risk levels indicates that all four models achieved high predictive accuracy. Specifically, the Very high and high susceptibility zones identified by the RF, XGBoost, MLP, and SVM models encompassed 84, 86.3, 79.05, and 76.95% of historical flood disaster points, respectively. In contrast, the Very low and Low susceptibility zones accounted for only 5.2, 5.1, 5.5, and 6.95% of the disaster points, thereby validating the reliability and rationality of the model predictions. Among the four models, XGBoost demonstrated the highest capture rate of flood events in high-risk areas and the lowest misclassification rate in low-risk areas, indicating superior overall performance.</p>
<p>The <italic>FR</italic> values of flood disasters across different susceptibility levels serve as indicators of the predictive reliability of the susceptibility maps. As depicted in <xref ref-type="fig" rid="fig8">Figure 8C</xref>, the <italic>FR</italic> values for all four models exhibit a consistent upward trend with increasing susceptibility levels, demonstrating their effectiveness in differentiating flood susceptibility grades within the study area. Among the models, XGBoost achieved the highest <italic>FR</italic> value (4.73) in the extremely high susceptibility category, followed by RF (4.07), SVM (3.18), and MLP (2.59). These results indicate that the susceptibility map generated by the XGBoost model possesses the greatest predictive reliability and provides a more accurate representation of the spatial distribution patterns and clustering characteristics of flood disasters.</p>
</sec>
</sec>
<sec sec-type="discussion" id="sec22">
<label>5</label>
<title>Discussion</title>
<sec id="sec23">
<label>5.1</label>
<title>Rationale for factor selection</title>
<p>With the ongoing impacts of climate change and intensified human activities, the frequency of extreme precipitation events has increased, leading to a continuous rise in flood disaster risks. Scientific assessment of flood susceptibility has thus become an essential foundation for effective disaster prevention and risk mitigation strategies. Regional flood susceptibility mapping enables systematic identification of potential risk levels and spatial distribution patterns, providing critical scientific support for disaster risk management and planning. This study established a comprehensive 15-factor evaluation framework for the BTH region, incorporating variables related to topography, hydrology, soil properties, and land cover. Given the intercorrelation among certain factors and the relatively low predictive contribution of others, direct model construction without prior screening could result in unstable predictions and reduced model reliability. Therefore, the rational selection and optimization of influencing factors are essential for developing a robust and accurate flood susceptibility assessment model. To address this, the study integrated multicollinearity analysis with IGR screening to effectively remove redundant variables and identify key driving factors. This approach facilitated the construction of a more stable and precise flood susceptibility model. The results indicated that annual average rainfall exhibited significant multicollinearity with other variables. The <italic>IGR</italic> analysis results show that factors such as Elevation, RD, AMR24, and Slope have the greatest contribution, while SPI and PlaC have relatively minor influences. This result is consistent with the conclusions of existing studies (<xref ref-type="bibr" rid="ref36">Tehrany et al., 2015a</xref>; <xref ref-type="bibr" rid="ref3">Andaryani et al., 2021</xref>; <xref ref-type="bibr" rid="ref30">Pham et al., 2021</xref>).</p>
</sec>
<sec id="sec24">
<label>5.2</label>
<title>Assessment of model reliability</title>
<p>To scientifically identify high-risk flood areas and provide a reliable basis for disaster prevention planning, it is necessary to use the best-performing model to map susceptibility zones. This study evaluates the performance of four machine learning models&#x2014;SVM, RF, MLP, and XGBoost in assessing flood susceptibility within the BTH region, with the objective of identifying the most suitable predictive approach. ROC curve analysis and statistical evaluation indices indicate that all four models achieved AUC values greater than 0.85 and Accuarcy exceeding 0.8, demonstrating their applicability for flood susceptibility assessment in the study area. The predictive performance ranking is as follows: XGBoost &#x003E; RF&#x202F;&#x003E;&#x202F;MLP&#x202F;&#x003E;&#x202F;SVM. The validation set results show that XGBoost, relying on the gradient boosting ensemble mechanism, effectively suppresses overfitting while maintaining a high Accuracy of 0.917 through iterative optimization of residuals and regularization constraints. It achieves the best Sensitivity of 0.922 and a Kappa of 0.833, demonstrating a significant lead in overall performance. RF, using Bootstrap aggregation and random feature subspace strategies, has good robustness, but due to the lack of gradient optimization mechanism, its classification Accuracy (0.883) and Sensitivity (0.898) are slightly lower than those of XGBoost. SVM, when dealing with high-dimensional geographic data, is limited by the mapping ability of the kernel function and the structure of the samples, resulting in insufficient classification boundary fitting, with the lowest Accuracy (0.808) and Sensitivity (0.875) among the four. Although MLP has strong nonlinear fitting potential, due to the influence of the training sample size and the sensitivity of hyperparameters, the Kappa of the validation set (0.734) decreases by approximately 15.7% compared to the training set, reflecting a certain insufficiency in generalization ability.</p>
<p>The aforementioned results further underscore the importance of aligning algorithmic characteristics with the specific requirements of practical problems. Compared to single-model approaches, ensemble learning models demonstrate superior performance by effectively integrating the strengths of multiple base models. These models not only enable more precise identification and quantification of key driving factors, but also exhibit enhanced prediction accuracy and generalization capabilities in complex environments. This makes them particularly well-suited for disaster risk assessment in large-scale and high-risk regions (<xref ref-type="bibr" rid="ref33">Shahabi et al., 2020</xref>; <xref ref-type="bibr" rid="ref57">Zhu et al., 2024</xref>).</p>
</sec>
<sec id="sec25">
<label>5.3</label>
<title>Flood susceptibility zoning and sensitivity analysis</title>
<p>Although the four models differ in the proportion of risk-affected areas, their spatial distribution patterns of high-risk zones show overall consistency, further validating the reliability of the model assessment results. To further clarify the key influencing factors and their interactions in flood occurrence, this study evaluates the XGBoost model with the highest prediction accuracy. By calculating the correlation degree between the model and each influencing factor, we analyze the response sensitivity of these factors to flood events. The Spearman correlation coefficient method, which assesses correlations through variable rank order without requiring data to follow specific distributions, demonstrates advantages in analyzing nonlinear relationships (<xref ref-type="bibr" rid="ref24">Lan et al., 2020</xref>). Calculations revealed that the Spearman non-parametric correlation coefficient vector between the XGBoost model and 13 influencing factors was (&#x2212;0.822,-0.096,-0.504,-0.561,-0.205,-0.645, &#x2212;0.137,0.000,0.569,0.417,-0.536,0.579, &#x2212;0.692)<sup>T</sup>. Elevation and TRI showed extremely strong negative correlations with risk, while TWI and AMR24 exhibited strong positive correlations. RD demonstrated moderate to low positive correlation with risk. Therefore, topographic conditions, water system development, and soil moisture saturation status are key influencing factors for flood disaster generation and incubation. Building on this foundation, further identification of interaction patterns and relationships among these key factors will help clarify the mechanisms of disaster susceptibility and causation in high-risk areas, thereby providing scientific basis for regional flood prevention and resilience-building strategies. Statistical analysis of corresponding indicators in high-risk zones reveals that low altitude (&#x003C;227&#x202F;m), gentle slopes(&#x003C;4.82&#x00B0;),higher terrain humidity (TWI between 7.51&#x2013;10.87),well-developed river networks, and heavy short-term rainfall collectively form the combination conditions prone to triggering flood disasters. These areas are predominantly pre-mountain alluvial plains, confluence zones of major water systems in middle-lower reaches, or flood control project-affected regions, characterized by weak natural drainage capacity, high proportions of impermeable surfaces, and susceptibility to human activities. Assessment results show good alignment between high-risk areas and the actual flooding conditions of the &#x201C;23&#x00B7;7&#x201D; flood event. Specifically, Fangshan District and Mentougou District, which recorded process rainfall of 1014.5&#x202F;mm, served as extreme rainstorm centers, experiencing severe mountain floods and urban flooding (<xref ref-type="bibr" rid="ref55">Zhao et al., 2024</xref>). The rapid discharge of floodwaters from the Yongding River&#x2019;s Yanchi to Lugou Bridge section in the Taihang Mountains caused damage to the Lugou Bridge hub, riverbank protection structures, and multiple bridge foundation risks. In Baoding&#x2019;s Zhuozhou City, areas along the Juma River and Dashih River were affected by excessive floodwaters from the Dashih River and Juma River in the Daqing River system, resulting in large-scale flooding and relocation efforts.</p>
<p>Notably, the Xiaoqing River Flood Storage Area in southern Tianjin has been activated during flood regulation, with a storage capacity of 9.6&#x202F;km<sup>2</sup>, playing a cr-ucial role in flood diversion and disaster mitigation. Therefore, systematic and adaptive resilience construction based on regional flood distribution patterns and disaster mechanisms is essential for enhancing overall disaster prevention efficiency and safeguarding public safety in socio-economic systems (<xref ref-type="bibr" rid="ref52">Zhang et al., 2020</xref>). For instance, in urban areas like Beijing&#x2019;s City Sub-center, Tianjin Binhai New Area, and Xiongan New Area, water storage spaces should be reserved during urban renewal while strengthening infrastructure resilience. This includes developing permeable green spaces and ecological drainage facilities, as well as advancing smart drainage network upgrades. In high-risk zones with dense farmland such as eastern Baoding and Handan, adaptive agriculture integrated with ecological protection should be promoted. This involves cultivating flood-resistant crops, improving farmland drainage systems, and exploring agricultural disaster insurance to enhance resilience against extreme weather. Additionally, in key ecological protection areas along the Yongding River, Daqing River wetlands, and Baiyangdian Lake, environmental protection must be reinforced while prohibiting occupation of flood channels. Measures like converting farmland to wetlands and constructing ecological shorelines should enhance flood control and storage capabilities. Flood risk boundaries should be clearly defined in territorial spatial planning, with preventive planning management implemented.</p>
<p>Through the coordinated promotion of flood control and disaster reduction engineering construction, facilities industry adaptation adjustment, ecological construction and storage function coordination, the regional flood disaster prevention and mitigation work is promoted to a more adaptive and resilient disaster prevention path.</p>
</sec>
<sec id="sec26">
<label>5.4</label>
<title>Limitation and future work</title>
<p>This study has certain limitations in the selection of model input factors. While the current analysis primarily incorporates static variables related to terrain, hydrology, and land cover, it does not account for dynamic factors such as urban drainage system efficiency, real-time radar-based rainfall data, flood control infrastructure distribution, and human activity patterns. As a result, the model exhibits reduced accuracy in simulating extreme rainstorm scenarios, limiting its ability to capture the rapid interaction mechanisms between short-duration intense rainfall and urban flooding. Future research should aim to integrate multi-source datasets&#x2014;including urban drainage infrastructure data, real-time precipitation monitoring, and dynamic human activity indicators&#x2014;to develop a collaborative flood risk assessment framework capable of enhancing predictive performance under extreme weather conditions.</p>
</sec>
</sec>
<sec sec-type="conclusions" id="sec27">
<label>6</label>
<title>Conclusion</title>
<list list-type="simple">
<list-item>
<p>(1) A comprehensive spatial dataset was established by selecting 15 flood disaster influencing factors related to topography and geomorphology, meteorological and hydrological conditions, characteristics of the underlying surface, and human activities within the BTH region. Multicollinearity among the selected factors was assessed using TOL and VIF analyses. The results indicated significant multicollinearity between annual average rainfall and other variables. The IGR for each factor was calculated, revealing that the IGR of planar curvature approached zero. Elevation, distance from rivers, average 24-h maximum rainfall, and slope were identified as the most critical factors influencing flood occurrence in the region.</p>
</list-item>
<list-item>
<p>(2) The performance of four machine learning models&#x2014;XGBoost, RF, MLP, and SVM&#x2014;was evaluated and compared using the receiver operating characteristic (ROC) curve and various statistical evaluation metrics. The XGBoost model demonstrated the highest predictive Accuracy, achieving an AUC value of 0.938. The RF model performed slightly lower (AUC&#x202F;=&#x202F;0.920), followed by the MLP model (AUC&#x202F;=&#x202F;0.867), while the SVM model exhibited the lowest performance (AUC&#x202F;=&#x202F;0.854). Furthermore, the XGBoost model outperformed the others in terms of Accuracy (0.917), Kappa (0.833), and Sensitivity (0.922). Based on these findings, the XGBoost model was selected as the optimal model for generating the flood susceptibility map of the BTH region.</p>
</list-item>
<list-item>
<p>(3) Very High and High flood susceptibility zones in the BTH region are predominantly located in Fangshan District and Mentougou District of Beijing; Qingyuan District, Mancheng District, Zhuozhou City, and Gaobeidian City in central and eastern Baoding; Yongnian District and Congtai District of Handan City; eastern parts of Xingtai City; and Xiqing District and Jinnan District of Tianjin. These areas are characterized by complex hydrological conditions and are often associated with flood detention zones or critical river confluences.</p>
</list-item>
<list-item>
<p>(4) Statistical analysis of flood point distribution across susceptibility zones revealed that the proportions of Very high susceptibility areas identified by the XGBoost, RF, SVM, and MLP models were 13.6, 16.27, 27.87, and 14.88%, respectively. Evaluation results further indicated that 67.05, 62, 52.35, and 47.35% of historical flood points fell within the extremely high susceptibility zones predicted by the XGBoost, RF, SVM, and MLP models, respectively. The XGBoost and RF models exhibited a higher degree of alignment between predicted susceptibility zones and actual flood occurrences, demonstrating superior capability in concentrating flood points within the extremely high susceptibility areas. In contrast, the SVM and MLP models showed relatively weaker performance, with MLP being the least accurate. Overall, the XGBoost model demonstrated greater rationality in delineating high-risk zones and exhibited superior flood point capture capability.</p>
</list-item>
</list>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="sec28">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/<xref rid="SM1" ref-type="supplementary-material">Supplementary material</xref>, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec sec-type="author-contributions" id="sec29">
<title>Author contributions</title>
<p>HJL: Conceptualization, Data curation, Funding acquisition, Methodology, Writing &#x2013; review &#x0026; editing. JD: Formal analysis, Investigation, Software, Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing. SZ: Formal analysis, Project administration, Resources, Writing &#x2013; review &#x0026; editing. YZ: Data curation, Project administration, Writing &#x2013; review &#x0026; editing. HTL: Methodology, Supervision, Writing &#x2013; review &#x0026; editing. YP: Investigation, Resources, Software, Visualization, Writing &#x2013; review &#x0026; editing. BD: Data curation, Investigation, Methodology, Writing &#x2013; review &#x0026; editing.</p>
</sec>
<sec sec-type="COI-statement" id="sec30">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="sec31">
<title>Generative AI statement</title>
<p>The author(s) declared that Generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="sec32">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec sec-type="supplementary-material" id="sec33">
<title>Supplementary material</title>
<p>The Supplementary material for this article can be found online at: <ext-link xlink:href="https://www.frontiersin.org/articles/10.3389/frwa.2026.1767400/full#supplementary-material" ext-link-type="uri">https://www.frontiersin.org/articles/10.3389/frwa.2026.1767400/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Data_Sheet_1.pdf" id="SM1" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="ref1"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Abedi</surname><given-names>R.</given-names></name> <name><surname>Gholamnia</surname><given-names>M.</given-names></name> <name><surname>Ghasemi</surname><given-names>S.</given-names></name> <name><surname>Mohammadi</surname><given-names>F.</given-names></name> <name><surname>Shahabi</surname><given-names>H.</given-names></name></person-group> (<year>2022</year>). <article-title>Flash-flood susceptibility mapping based on XGBoost, random forest and boosted regression trees</article-title>. <source>Geocarto Int.</source> <volume>37</volume>, <fpage>5479</fpage>&#x2013;<lpage>5496</lpage>. doi: <pub-id pub-id-type="doi">10.1080/10106049.2021.1913577</pub-id></mixed-citation></ref>
<ref id="ref2"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Akg&#x00FC;n</surname><given-names>A.</given-names></name> <name><surname>Da&#x011F;</surname><given-names>S.</given-names></name> <name><surname>Bulut</surname><given-names>F.</given-names></name></person-group> (<year>2008</year>). <article-title>Landslide susceptibility mapping for a landslide-prone area (Findikli, NE of Turkey) by likelihood-frequency ratio and weighted linear combination models</article-title>. <source>Environ. Geol.</source> <volume>54</volume>, <fpage>1127</fpage>&#x2013;<lpage>1143</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s00254-007-0897-5</pub-id></mixed-citation></ref>
<ref id="ref3"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Andaryani</surname><given-names>S.</given-names></name> <name><surname>Pourghasemi</surname><given-names>H. R.</given-names></name> <name><surname>Panahi</surname><given-names>M.</given-names></name> <name><surname>Rezaie</surname><given-names>F.</given-names></name> <name><surname>Blaschke</surname><given-names>T.</given-names></name> <name><surname>Nhu</surname><given-names>V.-H.</given-names></name></person-group> (<year>2021</year>). <article-title>Integration of hard and soft supervised machine learning for flood susceptibility mapping</article-title>. <source>J. Environ. Manag.</source> <volume>291</volume>:<fpage>112731</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jenvman.2021.112731</pub-id></mixed-citation></ref>
<ref id="ref4"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Antangelo</surname><given-names>N.</given-names></name> <name><surname>Conversini</surname><given-names>P.</given-names></name> <name><surname>Rotigliano</surname><given-names>E.</given-names></name> <name><surname>Agnesi</surname><given-names>V.</given-names></name></person-group> (<year>2011</year>). <article-title>Flood susceptibility assessment in a highly urbanized alluvial fan: the case study of Sala Consilina (southern Italy)</article-title>. <source>Nat. Hazards Earth Syst. Sci.</source> <volume>11</volume>, <fpage>2765</fpage>&#x2013;<lpage>2780</lpage>. doi: <pub-id pub-id-type="doi">10.5194/nhess-11-2765-2011</pub-id></mixed-citation></ref>
<ref id="ref5"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Avand</surname><given-names>M.</given-names></name> <name><surname>Mohammady</surname><given-names>M.</given-names></name> <name><surname>Pourghasemi</surname><given-names>H. R.</given-names></name> <name><surname>Naghibi</surname><given-names>S. A.</given-names></name></person-group> (<year>2019</year>). <article-title>A comparative assessment of random forest and k-nearest neighbor classifiers for gully erosion susceptibility mapping</article-title>. <source>Water</source> <volume>11</volume>:<fpage>2076</fpage>. doi: <pub-id pub-id-type="doi">10.3390/w11102076</pub-id></mixed-citation></ref>
<ref id="ref6"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Basirian</surname><given-names>S.</given-names></name> <name><surname>Najafzadeh</surname><given-names>M.</given-names></name> <name><surname>Demir</surname><given-names>I.</given-names></name></person-group> (<year>2026</year>). <article-title>Water quality monitoring for coastal hypoxia: integration of satellite imagery and machine learning models</article-title>. <source>Mar. Pollut. Bull.</source> <volume>222</volume>:<fpage>118735</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.marpolbul.2026.118735</pub-id></mixed-citation></ref>
<ref id="ref7"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Benito</surname><given-names>G.</given-names></name> <name><surname>Ouarda</surname><given-names>T. B. M.</given-names></name> <name><surname>B&#x00E1;rdossy</surname><given-names>A.</given-names></name></person-group> (<year>2005</year>). <article-title>Applications of palaeoflood hydrology and historical data in flood risk analysis</article-title>. in <person-group person-group-type="author"><name><surname>Wang</surname><given-names>J. P.</given-names></name> <name><surname>Wang</surname><given-names>Z. Y.</given-names></name></person-group> <source>Proceedings of the international symposium on flood Defence</source>. <publisher-loc>Beijing</publisher-loc> <publisher-name>Tsinghua University Press</publisher-name>, pp. <fpage>165</fpage>&#x2013;<lpage>176</lpage>.</mixed-citation></ref>
<ref id="ref8"><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Bonham-Carter</surname><given-names>G. F.</given-names></name></person-group> (<year>1994</year>). <source>Geographic information Systems for Geoscientists: Modelling with GIS</source>. <publisher-loc>Oxford</publisher-loc>: <publisher-name>Pergamon Press</publisher-name>.</mixed-citation></ref>
<ref id="ref9"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Cao</surname><given-names>W. G.</given-names></name> <name><surname>Pan</surname><given-names>D.</given-names></name> <name><surname>Xu</surname><given-names>Z. J.</given-names></name> <name><surname>Chen</surname><given-names>H. Y.</given-names></name> <name><surname>Li</surname><given-names>X.</given-names></name></person-group> (<year>2025</year>). <article-title>Landslide disaster vulnerability mapping study in Henan Province: comparison of different machine learning models</article-title>. <source>Geol. Sci. Technol. Bull.</source> <volume>44</volume>, <fpage>101</fpage>&#x2013;<lpage>111</lpage>. doi: <pub-id pub-id-type="doi">10.19509/j.cnki.dzkq.2025.0109</pub-id></mixed-citation></ref>
<ref id="ref10"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname><given-names>J.</given-names></name> <name><surname>Li</surname><given-names>Q.</given-names></name> <name><surname>Wang</surname><given-names>H.</given-names></name> <name><surname>Deng</surname><given-names>M.</given-names></name> <name><surname>Liang</surname><given-names>Q.</given-names></name></person-group> (<year>2020</year>). <article-title>A machine learning ensemble approach based on random forest and radial basis function neural network for risk evaluation of regional flood disaster: a case study of the Yangtze River Delta, China</article-title>. <source>Int. J. Environ. Res. Public Health</source> <volume>17</volume>:<fpage>49</fpage>. doi: <pub-id pub-id-type="doi">10.3390/ijerph17010049</pub-id>, <pub-id pub-id-type="pmid">31861677</pub-id></mixed-citation></ref>
<ref id="ref11"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chi</surname><given-names>H. Q.</given-names></name> <name><surname>Peng</surname><given-names>X. D.</given-names></name> <name><surname>Hu</surname><given-names>X. Z.</given-names></name> <name><surname>Yang</surname><given-names>C.</given-names></name> <name><surname>Zhang</surname><given-names>Y.</given-names></name></person-group> (<year>2025</year>). <article-title>Quantitative analysis of the optimal extraction area of relief amplitude and geomorphological classification research: a case study of Wuan City</article-title>. <source>Sci. Technol. Eng.</source> <volume>25</volume>, <fpage>4017</fpage>&#x2013;<lpage>4026</lpage>. doi: <pub-id pub-id-type="doi">10.3969/j.issn.1671-1815.2025.10.037</pub-id></mixed-citation></ref>
<ref id="ref12"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Demissie</surname><given-names>Z. A.</given-names></name> <name><surname>Ahmed</surname><given-names>M.</given-names></name> <name><surname>Ates</surname><given-names>S.</given-names></name> <name><surname>Demissie</surname><given-names>T. A.</given-names></name></person-group> (<year>2024</year>). <article-title>Flood susceptibility mapping: integrating machine learning and GIS for enhanced risk assessment</article-title>. <source>Appl. Comput. Geosci.</source> <volume>23</volume>:<fpage>100183</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.acags.2024.100183</pub-id></mixed-citation></ref>
<ref id="ref13"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Desta</surname><given-names>M. D.</given-names></name> <name><surname>Tesseme</surname><given-names>T.</given-names></name> <name><surname>Yigezu</surname><given-names>T. T.</given-names></name> <name><surname>Nigussie</surname><given-names>A. B.</given-names></name></person-group> (<year>2025</year>). <article-title>Assessment of landfill site suitability using GIS, remote sensing, and the multi-criteria decision-making (AHP) approach, Ethiopia</article-title>. <source>Geol. Ecol. Landsc.</source> <volume>9</volume>, <fpage>662</fpage>&#x2013;<lpage>675</lpage>. doi: <pub-id pub-id-type="doi">10.1080/24749508.2024.2361246</pub-id></mixed-citation></ref>
<ref id="ref14"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Elsafi</surname><given-names>S. H.</given-names></name></person-group> (<year>2014</year>). <article-title>Artificial neural networks (ANNs) for flood forecasting at Dongola Station in the River Nile, Sudan</article-title>. <source>Alex. Eng. J.</source> <volume>53</volume>, <fpage>655</fpage>&#x2013;<lpage>662</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.aej.2014.06.007</pub-id></mixed-citation></ref>
<ref id="ref15"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Farhadi</surname><given-names>H.</given-names></name> <name><surname>Esmaeily</surname><given-names>A.</given-names></name> <name><surname>Najafzadeh</surname><given-names>M.</given-names></name></person-group> (<year>2021</year>). <article-title>Flood monitoring by integration of remote sensing technique and multi-criteria decision making method</article-title>. <source>Comput. Geosci.</source> <volume>160</volume>:<fpage>105045</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.cageo.2021.105045</pub-id></mixed-citation></ref>
<ref id="ref16"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Farhadi</surname><given-names>H.</given-names></name> <name><surname>Najafzadeh</surname><given-names>M.</given-names></name></person-group> (<year>2021</year>). <article-title>Flood risk mapping by remote sensing data and random forest technique</article-title>. <source>Water</source> <volume>13</volume>:<fpage>3115</fpage>. doi: <pub-id pub-id-type="doi">10.3390/w13213115</pub-id></mixed-citation></ref>
<ref id="ref17"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hasanzuzzaman</surname><given-names>M.</given-names></name> <name><surname>Saha</surname><given-names>S.</given-names></name> <name><surname>Kundu</surname><given-names>S.</given-names></name> <name><surname>Pham</surname><given-names>Q. B.</given-names></name> <name><surname>Zhang</surname><given-names>H.</given-names></name></person-group> (<year>2022</year>). <article-title>A comparison of performance measures of three machine learning algorithms for flood susceptibility mapping of river Silabati (tropical river, India)</article-title>. <source>Phys. Chem. Earth Parts A/B/C</source> <volume>127</volume>:<fpage>103198</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.pce.2022.103198</pub-id></mixed-citation></ref>
<ref id="ref18"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>He</surname><given-names>H. L.</given-names></name> <name><surname>Zhang</surname><given-names>Y. S.</given-names></name> <name><surname>Zhang</surname><given-names>F.</given-names></name> <name><surname>Zhang</surname><given-names>J. H.</given-names></name> <name><surname>Li</surname><given-names>J.</given-names></name></person-group> (<year>2024</year>). <article-title>Enhancing seismic landslide susceptibility analysis for sustainable disaster risk management through machine learning</article-title>. <source>Sustainability</source> <volume>16</volume>:<fpage>3828</fpage>. doi: <pub-id pub-id-type="doi">10.3390/su16093828</pub-id></mixed-citation></ref>
<ref id="ref19"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hou</surname><given-names>J. L.</given-names></name> <name><surname>Ma</surname><given-names>Z. Q.</given-names></name> <name><surname>Yang</surname><given-names>C.</given-names></name> <name><surname>Liu</surname><given-names>Y. H.</given-names></name> <name><surname>Wang</surname><given-names>X. Y.</given-names></name></person-group> (<year>2024</year>). <article-title>Analysis of spatio-temporal variation of vegetation carbon sources and sinks in the Beijing-Tianjin-Hebei region and influencing factors</article-title>. <source>Ecol. Environ. Sci.</source> <volume>33</volume>, <fpage>1329</fpage>&#x2013;<lpage>1338</lpage>. doi: <pub-id pub-id-type="doi">10.16258/j.cnki.1674-5906.2024.09.002</pub-id></mixed-citation></ref>
<ref id="ref20"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hou</surname><given-names>J. W.</given-names></name> <name><surname>Ye</surname><given-names>A. Z.</given-names></name> <name><surname>Gan</surname><given-names>Y. J.</given-names></name> <name><surname>Wang</surname><given-names>H.</given-names></name></person-group> (<year>2018</year>). <article-title>Research and improvement on assessment method of flood hazard</article-title>. <source>South-to-North Water Transf. Water Sci. Technol.</source> <volume>16</volume>, <fpage>57</fpage>&#x2013;<lpage>62</lpage>. doi: <pub-id pub-id-type="doi">10.13476/j.cnki.nsbdqk.2018.01.010</pub-id></mixed-citation></ref>
<ref id="ref21"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Huang</surname><given-names>G. R.</given-names></name> <name><surname>Luo</surname><given-names>H. W.</given-names></name> <name><surname>Chen</surname><given-names>W. J.</given-names></name> <name><surname>Li</surname><given-names>J.</given-names></name></person-group> (<year>2019</year>). <article-title>Scenario simulation and risk assessment of urban flood in Donghaochong basin, Guangzhou</article-title>. <source>Adv. Water Sci.</source> <volume>30</volume>, <fpage>643</fpage>&#x2013;<lpage>652</lpage>. doi: <pub-id pub-id-type="doi">10.14042/j.cnki.32.1309.2019.05.004</pub-id></mixed-citation></ref>
<ref id="ref22"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Jiao</surname><given-names>Z. J.</given-names></name> <name><surname>Zhang</surname><given-names>Z. M.</given-names></name> <name><surname>Wu</surname><given-names>L. X.</given-names></name></person-group> (<year>2024</year>). <article-title>SAR-based dynamic information retrieving of the Beijing-Tianjin-Hebei flood-inundation happened in July 2023, North China</article-title>. <source>Geomat. Nat. Hazards Risk</source> <volume>15</volume>:<fpage>2366361</fpage>. doi: <pub-id pub-id-type="doi">10.1080/19475705.2024.2366361</pub-id></mixed-citation></ref>
<ref id="ref23"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kamran</surname><given-names>K.</given-names></name> <name><surname>Valizadeh</surname><given-names>K.</given-names></name> <name><surname>Pourghasemi</surname><given-names>H. R.</given-names></name> <name><surname>Heidari</surname><given-names>Z.</given-names></name></person-group> (<year>2021</year>). <article-title>A comparative approach of support vector machine kernel functions for GIS-based landslide susceptibility mapping</article-title>. <source>Appl. Geomat.</source> <volume>13</volume>, <fpage>837</fpage>&#x2013;<lpage>851</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s12518-021-00365-9</pub-id></mixed-citation></ref>
<ref id="ref24"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lan</surname><given-names>W.</given-names></name> <name><surname>Che</surname><given-names>C.</given-names></name> <name><surname>Tao</surname><given-names>C.</given-names></name></person-group> (<year>2020</year>). <article-title>Selection of single spectral components based on spearman rank correlation and its application in SAR target recognition</article-title>. <source>J. Wave Sci.</source> <volume>35</volume>, <fpage>414</fpage>&#x2013;<lpage>421</lpage>. doi: <pub-id pub-id-type="doi">10.13443/j.cjors.2019063001</pub-id></mixed-citation></ref>
<ref id="ref25"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Li</surname><given-names>W.</given-names></name> <name><surname>Kiaghadi</surname><given-names>A.</given-names></name> <name><surname>Dawson</surname><given-names>C.</given-names></name></person-group> (<year>2021</year>). <article-title>Exploring the best sequence LSTM modeling architecture for flood prediction</article-title>. <source>Neural Comput. &#x0026; Applic.</source> <volume>33</volume>, <fpage>5571</fpage>&#x2013;<lpage>5580</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s00521-021-06241-x</pub-id></mixed-citation></ref>
<ref id="ref26"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Li</surname><given-names>X. C.</given-names></name> <name><surname>Zhang</surname><given-names>Y. X.</given-names></name> <name><surname>Li</surname><given-names>W.</given-names></name> <name><surname>Wang</surname><given-names>J.</given-names></name></person-group> (<year>2023</year>). <article-title>Extreme characteristics of &#x201C;23&#x00B7;7&#x201D; heavy rain in Beijing-Tianjin-Hebei and its implications for urban flood control in China</article-title>. <source>China Flood Drought Manag.</source> <volume>33</volume>, <fpage>13</fpage>&#x2013;<lpage>18</lpage>. doi: <pub-id pub-id-type="doi">10.16867/j.issn.1673-9264.2023381</pub-id></mixed-citation></ref>
<ref id="ref27"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lin</surname><given-names>P. Y.</given-names></name> <name><surname>Lin</surname><given-names>P. H.</given-names></name> <name><surname>Wang</surname><given-names>J.</given-names></name> <name><surname>Chen</surname><given-names>Y. H.</given-names></name></person-group> (<year>2023</year>). <article-title>Typhoon disaster risk assessment and dynamic risk forecasts in Zhejiang Province based on machine learning methods</article-title>. <source>J. Nat. Disaster.</source> <volume>32</volume>, <fpage>13</fpage>&#x2013;<lpage>24</lpage>. doi: <pub-id pub-id-type="doi">10.13577/j.jnd.2023.0402</pub-id></mixed-citation></ref>
<ref id="ref28"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Madhuri</surname><given-names>R.</given-names></name> <name><surname>Sistla</surname><given-names>S.</given-names></name> <name><surname>Raju</surname><given-names>K. S.</given-names></name></person-group> (<year>2021</year>). <article-title>Application of machine learning algorithms for flood susceptibility assessment and risk management</article-title>. <source>J. Water Clim. Change</source> <volume>12</volume>, <fpage>2608</fpage>&#x2013;<lpage>2623</lpage>. doi: <pub-id pub-id-type="doi">10.2166/wcc.2021.321</pub-id></mixed-citation></ref>
<ref id="ref29"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mojaddadi</surname><given-names>H.</given-names></name> <name><surname>Pradhan</surname><given-names>B.</given-names></name> <name><surname>Nampak</surname><given-names>H.</given-names></name> <name><surname>Ahmad</surname><given-names>N.</given-names></name> <name><surname>Ghazali</surname><given-names>A. H. B.</given-names></name></person-group> (<year>2017</year>). <article-title>Ensemble machine-learning-based geospatial approach for flood risk assessment using multi-sensor remote-sensing data and GIS</article-title>. <source>Geomat. Nat. Hazards Risk</source> <volume>8</volume>, <fpage>1080</fpage>&#x2013;<lpage>1102</lpage>. doi: <pub-id pub-id-type="doi">10.1080/19475705.2017.1294113</pub-id></mixed-citation></ref>
<ref id="ref30"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Pham</surname><given-names>B. T.</given-names></name> <name><surname>Nguyen</surname><given-names>M. D.</given-names></name> <name><surname>Nguyen</surname><given-names>H.</given-names></name> <name><surname>Bui</surname><given-names>D. T.</given-names></name> <name><surname>Prakash</surname><given-names>I.</given-names></name></person-group> (<year>2021</year>). <article-title>Flood risk assessment using deep learning integrated with multi-criteria decision analysis</article-title>. <source>Knowl. Based Syst.</source> <volume>219</volume>:<fpage>106899</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.knosys.2021.106899</pub-id></mixed-citation></ref>
<ref id="ref31"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Prasad</surname><given-names>P.</given-names></name> <name><surname>Roy</surname><given-names>P. K.</given-names></name> <name><surname>Sahana</surname><given-names>M.</given-names></name> <name><surname>Chakraborty</surname><given-names>A.</given-names></name></person-group> (<year>2022</year>). <article-title>Novel ensemble machine learning models in flood susceptibility mapping</article-title>. <source>Geocarto Int.</source> <volume>37</volume>, <fpage>4571</fpage>&#x2013;<lpage>4593</lpage>. doi: <pub-id pub-id-type="doi">10.1080/10106049.2021.1885234</pub-id></mixed-citation></ref>
<ref id="ref32"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rahman</surname><given-names>M.</given-names></name> <name><surname>Islam</surname><given-names>A. R. M. T.</given-names></name> <name><surname>Islam</surname><given-names>M. A.</given-names></name> <name><surname>Khan</surname><given-names>M.</given-names></name></person-group> (<year>2019</year>). <article-title>Flood susceptibility assessment in Bangladesh using machine learning and multi-criteria decision analysis</article-title>. <source>Earth Syst. Environ.</source> <volume>3</volume>, <fpage>585</fpage>&#x2013;<lpage>601</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s41748-019-00105-2</pub-id></mixed-citation></ref>
<ref id="ref33"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Shahabi</surname><given-names>H.</given-names></name> <name><surname>Rahimzadeh</surname><given-names>M.</given-names></name> <name><surname>Alizadeh</surname><given-names>M.</given-names></name> <name><surname>Pourghasemi</surname><given-names>H. R.</given-names></name></person-group> (<year>2020</year>). <article-title>Flood detection and susceptibility mapping using sentinel-1 remote sensing data and a machine learning approach: hybrid intelligence of bagging ensemble based on k-nearest neighbor classifier</article-title>. <source>Remote Sens</source> <volume>12</volume>:<fpage>266</fpage>. doi: <pub-id pub-id-type="doi">10.3390/rs12020266</pub-id></mixed-citation></ref>
<ref id="ref34"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Shams</surname><given-names>M. Y.</given-names></name> <name><surname>El-Baz</surname><given-names>A. A.</given-names></name> <name><surname>Mohamed</surname><given-names>A. S.</given-names></name> <name><surname>Alshehri</surname><given-names>A. H.</given-names></name></person-group> (<year>2024</year>). <article-title>Water quality prediction using machine learning models based on grid search method</article-title>. <source>Multimed. Tools Appl.</source> <volume>83</volume>, <fpage>35307</fpage>&#x2013;<lpage>35334</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s11042-024-18933-7</pub-id></mixed-citation></ref>
<ref id="ref35"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Taalab</surname><given-names>K.</given-names></name> <name><surname>Cheng</surname><given-names>T.</given-names></name> <name><surname>Zhang</surname><given-names>Y.</given-names></name></person-group> (<year>2018</year>). <article-title>Mapping landslide susceptibility and types using random forest</article-title>. <source>Big Earth Data</source> <volume>2</volume>, <fpage>159</fpage>&#x2013;<lpage>178</lpage>. doi: <pub-id pub-id-type="doi">10.1080/20964471.2018.1478701</pub-id></mixed-citation></ref>
<ref id="ref36"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tehrany</surname><given-names>M. S.</given-names></name> <name><surname>Pradhan</surname><given-names>B.</given-names></name> <name><surname>Jebur</surname><given-names>M. N.</given-names></name></person-group> (<year>2015a</year>). <article-title>Flood susceptibility assessment using GIS-based support vector machine model with different kernel types</article-title>. <source>Catena</source> <volume>125</volume>, <fpage>91</fpage>&#x2013;<lpage>101</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.catena.2014.10.017</pub-id></mixed-citation></ref>
<ref id="ref37"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tehrany</surname><given-names>M. S.</given-names></name> <name><surname>Pradhan</surname><given-names>B.</given-names></name> <name><surname>Jebur</surname><given-names>M. N.</given-names></name></person-group> (<year>2015b</year>). <article-title>Flood susceptibility analysis and its verification using a novel ensemble support vector machine and frequency ratio method</article-title>. <source>Stoch. Environ. Res. Risk Assess.</source> <volume>29</volume>, <fpage>1149</fpage>&#x2013;<lpage>1165</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s00477-015-1021-9</pub-id></mixed-citation></ref>
<ref id="ref38"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tsumita</surname><given-names>N.</given-names></name> <name><surname>Piyapong</surname><given-names>S.</given-names></name> <name><surname>Kaewkluengklom</surname><given-names>R.</given-names></name> <name><surname>Jaensirisak</surname><given-names>S.</given-names></name> <name><surname>Fukuda</surname><given-names>A.</given-names></name></person-group> (<year>2025</year>). <article-title>Flood susceptibility mapping of urban flood risk: comparing autoencoder multilayer perceptron and logistic regression models in Ubon Ratchathani, Thailand</article-title>. <source>Nat. Hazards</source> <volume>121</volume>, <fpage>17833</fpage>&#x2013;<lpage>17867</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s11069-025-06983-9</pub-id></mixed-citation></ref>
<ref id="ref39"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Waghwala</surname><given-names>R. K.</given-names></name> <name><surname>Agnihotri</surname><given-names>P. G.</given-names></name></person-group> (<year>2019</year>). <article-title>Flood risk assessment and resilience strategies for flood risk management: a case study of Surat City</article-title>. <source>Int. J. Disaster Risk Reduct.</source> <volume>40</volume>:<fpage>101155</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.ijdrr.2019.101155</pub-id></mixed-citation></ref>
<ref id="ref40"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname><given-names>G. P.</given-names></name> <name><surname>Liu</surname><given-names>L. Y.</given-names></name> <name><surname>Hu</surname><given-names>Z. Y.</given-names></name> <name><surname>Li</surname><given-names>X.</given-names></name> <name><surname>Wang</surname><given-names>Y.</given-names></name></person-group> (<year>2020a</year>). <article-title>Risk assessment of rainstorm and flood disasters at grid-scale in Beijing-Tianjin-Hebei metropolitan area</article-title>. <source>J. Catastrophol.</source> <volume>35</volume>, <fpage>186</fpage>&#x2013;<lpage>193</lpage>. doi: <pub-id pub-id-type="doi">10.3969/j.issn.1000-811X.2020.03.035</pub-id></mixed-citation></ref>
<ref id="ref41"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname><given-names>G. P.</given-names></name> <name><surname>Liu</surname><given-names>L. Y.</given-names></name> <name><surname>Li</surname><given-names>X.</given-names></name> <name><surname>Zhang</surname><given-names>Y.</given-names></name> <name><surname>Wang</surname><given-names>H.</given-names></name></person-group> (<year>2020b</year>). <article-title>Flood risk assessment based on fuzzy synthetic evaluation method in the Beijing-Tianjin-Hebei metropolitan area, China</article-title>. <source>Sustainability</source> <volume>12</volume>:<fpage>1451</fpage>. doi: <pub-id pub-id-type="doi">10.3390/su12041451</pub-id></mixed-citation></ref>
<ref id="ref42"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname><given-names>Y.</given-names></name> <name><surname>Li</surname><given-names>Z.</given-names></name> <name><surname>Tang</surname><given-names>Z.</given-names></name> <name><surname>Zeng</surname><given-names>G.</given-names></name> <name><surname>Yang</surname><given-names>L.</given-names></name></person-group> (<year>2020</year>). <article-title>Flood susceptibility mapping using convolutional neural network frameworks</article-title>. <source>J. Hydrol.</source> <volume>582</volume>:<fpage>124482</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jhydrol.2019.124482</pub-id></mixed-citation></ref>
<ref id="ref43"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname><given-names>Y. N.</given-names></name> <name><surname>Zhang</surname><given-names>M. L.</given-names></name></person-group> (<year>2022</year>). <article-title>Modeling hydrodynamic and hydrological processes in tidal wetlands</article-title>. <source>Wetlands</source> <volume>42</volume>:<fpage>1</fpage>. doi: <pub-id pub-id-type="doi">10.1007/s13157-021-01515-z</pub-id></mixed-citation></ref>
<ref id="ref44"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname><given-names>Y. Q.</given-names></name> <name><surname>Zhang</surname><given-names>J. L.</given-names></name> <name><surname>Chang</surname><given-names>Y. H.</given-names></name></person-group> (<year>2024</year>). <article-title>A probability prediction model for flood disasters based on multi-layer perceptron</article-title>. <source>J. Phys. Conf. Ser.</source> <volume>2905</volume>:<fpage>012001</fpage>. doi: <pub-id pub-id-type="doi">10.1088/1742-6596/2905/1/012001</pub-id></mixed-citation></ref>
<ref id="ref45"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname><given-names>Z. L.</given-names></name> <name><surname>Lai</surname><given-names>C. G.</given-names></name> <name><surname>Chen</surname><given-names>X. H.</given-names></name> <name><surname>Yang</surname><given-names>B.</given-names></name> <name><surname>Zhao</surname><given-names>S. W.</given-names></name></person-group> (<year>2015</year>). <article-title>Flood hazard risk assessment model based on random forest</article-title>. <source>J. Hydrol.</source> <volume>527</volume>, <fpage>1130</fpage>&#x2013;<lpage>1141</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jhydrol.2015.05.049</pub-id></mixed-citation></ref>
<ref id="ref46"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Xu</surname><given-names>Y. X.</given-names></name></person-group> (<year>2017</year>). <article-title>Assessment and regionalization of flood disaster risk in Shaanxi Province based on GIS</article-title>. <source>J. Catastrophol.</source> <volume>32</volume>, <fpage>103</fpage>&#x2013;<lpage>108</lpage>. doi: <pub-id pub-id-type="doi">10.3969/j.issn.1000-811X.2017.02.018</pub-id></mixed-citation></ref>
<ref id="ref47"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yu</surname><given-names>Q.</given-names></name> <name><surname>Wang</surname><given-names>Y.</given-names></name> <name><surname>Li</surname><given-names>N.</given-names></name></person-group> (<year>2022</year>). <article-title>Extreme flood disasters: comprehensive impact and assessment</article-title>. <source>Water</source> <volume>14</volume>:<fpage>1211</fpage>. doi: <pub-id pub-id-type="doi">10.3390/w14081211</pub-id></mixed-citation></ref>
<ref id="ref48"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yuan</surname><given-names>X. M.</given-names></name> <name><surname>Sang</surname><given-names>L. H.</given-names></name> <name><surname>Shen</surname><given-names>F. X.</given-names></name> <name><surname>Wang</surname><given-names>H.</given-names></name></person-group> (<year>2018</year>). <article-title>FAHP-based flood risk assessment on Beijing-Tianjin-Hebei region</article-title>. <source>Water Resour. Hydropower Eng.</source> <volume>49</volume>, <fpage>37</fpage>&#x2013;<lpage>45</lpage>. doi: <pub-id pub-id-type="doi">10.13928/j.cnki.wrahe.2018.10.006</pub-id></mixed-citation></ref>
<ref id="ref49"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname><given-names>B.</given-names></name> <name><surname>Li</surname><given-names>Y.</given-names></name> <name><surname>Liu</surname><given-names>Y.</given-names></name> <name><surname>Wang</surname><given-names>J.</given-names></name></person-group> (<year>2024</year>). <article-title>Risk assessment of rainstorm disaster based on scenario construction</article-title>. <source>Int. J. Disaster Risk Reduct.</source> <volume>114</volume>:<fpage>104990</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.ijdrr.2024.104990</pub-id></mixed-citation></ref>
<ref id="ref50"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname><given-names>X.</given-names></name> <name><surname>Li</surname><given-names>H.</given-names></name> <name><surname>Zhang</surname><given-names>Z.</given-names></name> <name><surname>Wang</surname><given-names>Y.</given-names></name></person-group> (<year>2022</year>). <article-title>Identification of metropolitan area boundaries based on comprehensive spatial linkages of cities: a case study of the Beijing-Tianjin-Hebei region</article-title>. <source>ISPRS Int. J. Geo Inf.</source> <volume>11</volume>:<fpage>396</fpage>. doi: <pub-id pub-id-type="doi">10.3390/ijgi11070396</pub-id></mixed-citation></ref>
<ref id="ref51"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname><given-names>Y.</given-names></name> <name><surname>Wang</surname><given-names>C.</given-names></name></person-group> (<year>2024</year>). <article-title>Spatial governance insights from the coordinated development of the Beijing-Tianjin-Hebei region</article-title>. <source>China Territ. Today</source> <volume>4</volume>, <fpage>28</fpage>&#x2013;<lpage>31</lpage>.</mixed-citation></ref>
<ref id="ref52"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname><given-names>Y. W.</given-names></name> <name><surname>Li</surname><given-names>H. J.</given-names></name> <name><surname>Li</surname><given-names>H.</given-names></name> <name><surname>Wang</surname><given-names>J.</given-names></name></person-group> (<year>2020</year>). <article-title>Flood disaster risk assessment on Beijing-Tianjin-Hebei region at county level</article-title>. <source>Water Resour. Power</source> <volume>38</volume>, <fpage>44</fpage>&#x2013;<lpage>47</lpage>. doi: <pub-id pub-id-type="doi">10.20040/j.cnki.1000-7709.2020.10.011</pub-id></mixed-citation></ref>
<ref id="ref53"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname><given-names>Z. F.</given-names></name></person-group> (<year>2016</year>). <article-title>Discussion on the causes and prevention of mountain torrent disasters</article-title>. <source>Technol. Innov. Appl.</source> <volume>5</volume>:<fpage>155</fpage>. doi: <pub-id pub-id-type="doi">10.19981/j.cn23-1581/g3.2016.05.140</pub-id></mixed-citation></ref>
<ref id="ref54"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhao</surname><given-names>P.</given-names></name> <name><surname>Wen</surname><given-names>G.</given-names></name> <name><surname>He</surname><given-names>Z. C.</given-names></name> <name><surname>Li</surname><given-names>X.</given-names></name></person-group> (<year>2024</year>). <article-title>Shallow landslide susceptibility assessment in Jinsha River basin based on machine learning models</article-title>. <source>Water Resour. Hydropower Eng.</source> <volume>55</volume>, <fpage>53</fpage>&#x2013;<lpage>70</lpage>. doi: <pub-id pub-id-type="doi">10.13928/j.cnki.wrahe.2024.10.006</pub-id></mixed-citation></ref>
<ref id="ref55"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhao</surname><given-names>X. W.</given-names></name> <name><surname>Wang</surname><given-names>C. Y.</given-names></name> <name><surname>Wang</surname><given-names>Y. J.</given-names></name> <name><surname>Ji</surname><given-names>M. F.</given-names></name> <name><surname>Wang</surname><given-names>M. R.</given-names></name> <name><surname>Wang</surname><given-names>J. Y.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>Investigation and evaluation of the "23&#x00B7;7" catastrophic basin-wide flood in the Haihe River basin, Beijing</article-title>. <source>J. Beijing Norm. Univ. (Nat. Sci. Ed.)</source> <volume>60</volume>, <fpage>632</fpage>&#x2013;<lpage>640</lpage>. doi: <pub-id pub-id-type="doi">10.12202/j.0476-0301.2024055</pub-id></mixed-citation></ref>
<ref id="ref56"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhou</surname><given-names>Y.</given-names></name> <name><surname>Zhao</surname><given-names>Y.</given-names></name></person-group> (<year>2025</year>). <article-title>Multi-dimensional analysis of urban growth characteristics integrating remote sensing data: a case study of the Beijing-Tianjin-Hebei region</article-title>. <source>Remote Sens</source> <volume>17</volume>:<fpage>548</fpage>. doi: <pub-id pub-id-type="doi">10.3390/rs17030548</pub-id></mixed-citation></ref>
<ref id="ref57"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhu</surname><given-names>K. L.</given-names></name> <name><surname>Liu</surname><given-names>M.</given-names></name> <name><surname>Zhang</surname><given-names>Y.</given-names></name> <name><surname>Li</surname><given-names>J.</given-names></name></person-group> (<year>2024</year>). <article-title>A novel framework for feature simplification and selection in flood susceptibility assessment based on machine learning</article-title>. <source>J. Hydrol. Reg. Stud.</source> <volume>52</volume>:<fpage>101739</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.ejrh.2024.101739</pub-id></mixed-citation></ref>
<ref id="ref58"><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhu</surname><given-names>Y. D.</given-names></name> <name><surname>Chen</surname><given-names>X. R.</given-names></name> <name><surname>Li</surname><given-names>Q. P.</given-names></name></person-group> (<year>2021</year>). <article-title>Ultra-high dimensional variable selection based on information gain ratio</article-title>. <source>Stat. Decis.</source> <volume>37</volume>, <fpage>18</fpage>&#x2013;<lpage>21</lpage>. doi: <pub-id pub-id-type="doi">10.13546/j.cnki.tjyjc.2021.22.004</pub-id></mixed-citation></ref>
</ref-list>
<fn-group>
<fn fn-type="custom" custom-type="edited-by" id="fn0008">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1744837/overview">Francesco Granata</ext-link>, University of Cassino, Italy</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by" id="fn0009">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1654727/overview">Mohammad Najafzadeh</ext-link>, Graduate University of Advanced Technology, Iran</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3317190/overview">Merabet Khaled</ext-link>, University of Skikda, Algeria</p>
</fn>
</fn-group>
<fn-group>
<fn id="fn0001">
<label>1</label>
<p><ext-link xlink:href="http://www.gscloud.cn" ext-link-type="uri">http://www.gscloud.cn</ext-link></p>
</fn>
<fn id="fn0002">
<label>2</label>
<p><ext-link xlink:href="http://www.resdc.cn" ext-link-type="uri">http://www.resdc.cn</ext-link></p>
</fn>
<fn id="fn0003">
<label>3</label>
<p><ext-link xlink:href="http://www.globallandcover.com" ext-link-type="uri">http://www.globallandcover.com</ext-link></p>
</fn>
<fn id="fn0004">
<label>4</label>
<p><ext-link xlink:href="https://www.earthdata.nasa.gov" ext-link-type="uri">https://www.earthdata.nasa.gov</ext-link></p>
</fn>
<fn id="fn0005">
<label>5</label>
<p><ext-link xlink:href="https://www.webmap.cn" ext-link-type="uri">https://www.webmap.cn</ext-link></p>
</fn>
<fn id="fn0006">
<label>6</label>
<p><ext-link xlink:href="https://www.geodata.cn" ext-link-type="uri">https://www.geodata.cn</ext-link></p>
</fn>
<fn id="fn0007">
<label>7</label>
<p><ext-link xlink:href="https://browser.dataspace.copernicus.eu" ext-link-type="uri">https://browser.dataspace.copernicus.eu</ext-link></p>
</fn>
</fn-group>
</back>
</article>