<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Environ. Sci.</journal-id>
<journal-title>Frontiers in Environmental Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Environ. Sci.</abbrev-journal-title>
<issn pub-type="epub">2296-665X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1401107</article-id>
<article-id pub-id-type="doi">10.3389/fenvs.2024.1401107</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Environmental Science</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Estimating and mapping the soil total nitrogen contents in black soil region using hyperspectral images towards environmental heterogeneity</article-title>
<alt-title alt-title-type="left-running-head">Lin et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fenvs.2024.1401107">10.3389/fenvs.2024.1401107</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Lin</surname>
<given-names>Nan</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2685658/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Mei</surname>
<given-names>Xianjun</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Li</surname>
<given-names>Jia</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Jiang</surname>
<given-names>Ranzhe</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Wu</surname>
<given-names>Menghong</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhang</surname>
<given-names>Wenchun</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>College of Surveying and Exploration Engineering</institution>, <institution>Jilin Jianzhu University</institution>, <addr-line>Changchun</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Jilin Province Natural Resources Remote Sensing Information Technology Innovation Laboratory</institution>, <addr-line>Changchun</addr-line>, <country>China</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Institute of Science and Technology Information of Jilin Province</institution>, <addr-line>Changchun</addr-line>, <country>China</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>College of Biological and Agricultural Engineering</institution>, <institution>Jilin University</institution>, <addr-line>Changchun</addr-line>, <country>China</country>
</aff>
<aff id="aff5">
<sup>5</sup>
<institution>College of Resource and Environmental Science</institution>, <institution>Jilin Agricultural University</institution>, <addr-line>Changchun</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1477641/overview">Changchun Huang</ext-link>, Nanjing Normal University, China</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1531593/overview">Yulong Guo</ext-link>, Henan Agricultural University, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1954046/overview">Shaohua Lei</ext-link>, Nanjing Hydraulic Research Institute, China</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Nan Lin, <email>linnan@jlju.edu.cn</email>
</corresp>
</author-notes>
<pub-date pub-type="epub">
<day>27</day>
<month>06</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>12</volume>
<elocation-id>1401107</elocation-id>
<history>
<date date-type="received">
<day>14</day>
<month>03</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>07</day>
<month>06</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2024 Lin, Mei, Li, Jiang, Wu and Zhang.</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Lin, Mei, Li, Jiang, Wu and Zhang</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>
<bold>Introduction:</bold> Fast and accurate estimation and spatial mapping of soil total nitrogen (TN) content is important for the development of modern precision agriculture, such as soil fertility monitoring and land reclamation decision-making. Hyperspectral remote sensing has been demonstrated to be an accurate real-time technique for rapid estimation and mapping of soil TN content.</p>
<p>
<bold>Methods:</bold> To solve the problem of poor accuracy and generalization of estimation models caused by soil environmental heterogeneity in estimating and mapping soil TN content using hyperspectral images, 502 soil samples were collected from a typical black soil area in Yushu City, Jilin Province, China, as a test area, and three sample grouping strategies were established by soil environmental variables (soil type, thickness of the black soil layer, and topographic factors), and Pearson correlation coefficient and competitive adaptive reweighted sampling algorithm were used to determine the TN characteristic bands of each sample set under different strategies. Based on the data characteristics of the sub-sample set, the local regression estimation model based on sample grouping was constructed using the CatBoost algorithm, and the estimation and distribution mapping of soil TN content was carried out.</p>
<p>
<bold>Results and Discussion:</bold> The results showed that after dividing the samples according to the differences in soil environmental factors, the characteristic information of the samples is more targeted, with more abundant numbers and distribution ranges of TN characteristic bands. Compared to the global regression estimation with all samples, the local regression based on the grouping of soil environment differences showed improved accuracy, with the local regression estimation model constructed with the ST-G strategy exhibiting the highest estimation accuracy (<inline-formula id="inf1">
<mml:math id="m1">
<mml:mrow>
<mml:msubsup>
<mml:mi>R</mml:mi>
<mml:mi>p</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> &#x003D; 0.839). The results can provide a reference for large-area soil properties mapping, and technical support for soil quality digitization and precision fertilization.</p>
</abstract>
<kwd-group>
<kwd>soil total nitrogen</kwd>
<kwd>environmental heterogeneity</kwd>
<kwd>grouping strategies</kwd>
<kwd>characteristic bands</kwd>
<kwd>local regression estimation</kwd>
</kwd-group>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Environmental Informatics and Remote Sensing</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>Soil is an indispensable part of the Earth&#x2019;s ecosystem, with a complex structure and multiple functions (<xref ref-type="bibr" rid="B6">Du and Zhou, 2009</xref>). It provides water, minerals, and nutrients such as organic matter, nitrogen, phosphorus, and potassium to plants and soil organisms, playing a key role in climate regulation, vegetation growth, and maintenance of ecological balance (<xref ref-type="bibr" rid="B48">Wilding and Lin, 2006</xref>). Soil nitrogen is closely related to soil aggregate formation, microbial metabolism, and changes in soil texture (<xref ref-type="bibr" rid="B19">Li et al., 2022</xref>). It is an important nutrient element affecting and limiting plant growth and development and a key element in regulating soil fertility, quality, and agricultural productivity (<xref ref-type="bibr" rid="B26">Lori et al., 2018</xref>). Therefore, determining total nitrogen (TN) content in soil and its spatial distribution is important for soil fertility monitoring, land resource management, and sustainable agricultural development (<xref ref-type="bibr" rid="B34">Peng et al., 2021</xref>). The traditional soil chemical analysis method can obtain accurate information on soil TN content. However, it requires much time, effort and cost, and detailed information on TN content is not possible on a large scale (<xref ref-type="bibr" rid="B38">Sinfield et al., 2010</xref>). The field spectrum measurement using proximal sensors can invert the chemical composition of the soil according to its reflection characteristics and physical and chemical properties, enabling rapid and accurate estimation of the content of various soil components such as organic matter and TN (<xref ref-type="bibr" rid="B52">Yang et al., 2012</xref>; <xref ref-type="bibr" rid="B18">Kawamura et al., 2017</xref>; <xref ref-type="bibr" rid="B16">Jiang et al., 2023a</xref>). However, this method is dependent on point locations, making it difficult to obtain dynamic and continuous spatial distribution information of soil TN content through ground spectral data. Therefore, a fast and accurate method needs to be developed to dynamically obtain the spatial distribution of TN content on a large scale.</p>
<p>Since the stretching and cornering vibrations of many functional groups (N-H, N-C, and N&#x2261;N bonds) in soils induce specific spectral response and absorption radiance in the soil reflectance curves, a certain correlation can be identified between TN content and soil spectrum (<xref ref-type="bibr" rid="B39">Stenberg and Rossel, 2010</xref>; <xref ref-type="bibr" rid="B57">Zhang and He, 2016</xref>). This correlation provides a study basis for the estimation and mapping of soil TN content. With wide coverage, fast information acquisition, and strong timeliness, hyperspectral satellite remote sensing has been largely applied to large-scale soil nutrient estimation, soil characteristic evaluation, and digital soil mapping (<xref ref-type="bibr" rid="B9">Grunwald et al., 2015</xref>; <xref ref-type="bibr" rid="B1">Chatterjee et al., 2021</xref>; <xref ref-type="bibr" rid="B51">Xu et al., 2023</xref>). Currently, soil TN content estimation and mapping by hyperspectral images mainly focuses on hyperspectral data preprocessing, feature variable selection, and estimation model construction, etc. Many studies have demonstrated that imagery preprocessing, such as radiation and atmospheric correction, can effectively reduce or eliminate noise in spectral data acquisition (<xref ref-type="bibr" rid="B31">Minu et al., 2017</xref>; <xref ref-type="bibr" rid="B30">Minu et al., 2018</xref>; <xref ref-type="bibr" rid="B44">Wang J. et al., 2022</xref>). By mathematically converting spectral reflectance, spectral information related to soil nutrients can be enhanced, and the effects of interference factors can be suppressed or eliminated (<xref ref-type="bibr" rid="B13">Hong et al., 2019</xref>; <xref ref-type="bibr" rid="B61">Zhang et al., 2020</xref>). By selecting appropriate spectral feature bands by feature selection algorithms (e.g., the LASSO algorithm, the successive projections algorithm, the uninformative variable elimination, and the genetic algorithm), the data redundancy can be effectively reduced, the training speed can be accelerated, and the interpretability and generalization ability of the model can be improved (<xref ref-type="bibr" rid="B21">Li HY. et al., 2019</xref>; <xref ref-type="bibr" rid="B23">Li XY. et al., 2019</xref>; <xref ref-type="bibr" rid="B35">Peng et al., 2019</xref>). The competitive adaptive reweighted sampling (CARS) algorithm is a feature variable selection algorithm that selects the optimal set of variables by dynamically adjusting the window width and threshold (<xref ref-type="bibr" rid="B3">Cheng et al., 2021</xref>; <xref ref-type="bibr" rid="B11">Guo et al., 2021</xref>). It involves two stages of feature fast elimination and feature fine selection, which can effectively reduce feature inputs and improve the performance of the estimation model (<xref ref-type="bibr" rid="B62">Zhao et al., 2022</xref>). In addition, machine learning algorithms such as support vector machine (SVM), back propagation neural network (BPNN), and random forest (RF) have excellent feature mining, adaptability, and data fitting capabilities, which are widely applied in soil TN content estimation (<xref ref-type="bibr" rid="B5">Deng et al., 2020</xref>; <xref ref-type="bibr" rid="B25">Liu et al., 2022</xref>; <xref ref-type="bibr" rid="B17">Jiang et al., 2023b</xref>). The Categorical Boosting (CatBoost) model is a serial integrated machine learning algorithm using oblivious trees as base learners, providing better stability and generalization in quantitative estimation (<xref ref-type="bibr" rid="B12">Hancock and Khoshgoftaar, 2020</xref>; <xref ref-type="bibr" rid="B45">Wang WC. et al., 2022</xref>). Compared to most machine learning algorithms, it can efficiently process categorical features, reduce overfitting, and have high accuracy (<xref ref-type="bibr" rid="B54">Yu et al., 2022</xref>).</p>
<p>In order to achieve the requirements of precision and digital agriculture and to improve the estimation and mapping accuracy of TN content, studies on the selection of soil nitrogen characteristic variables and the optimization of estimation models have gradually increased (<xref ref-type="bibr" rid="B29">Mendes et al., 2022</xref>; <xref ref-type="bibr" rid="B58">Zhang LY. et al., 2023</xref>; <xref ref-type="bibr" rid="B59">Zhang RR. et al., 2023</xref>). However, most studies only considered the response relationship between spectral reflectance of image pixels and nitrogen content. The influence of spatial differences in soil environment on the estimation results of nitrogen distribution and TN content was only reported by few studies. Topographic factors can affect moisture flow, soil erosion, and material redistribution, significantly contributing to the export, transfer, and distribution of nitrogen in the soil (<xref ref-type="bibr" rid="B49">Wu et al., 2018</xref>; <xref ref-type="bibr" rid="B46">Wang et al., 2023</xref>). For example, differences in elevation and topography can change meteorological conditions such as precipitation, temperature, and relative humidity, affecting soil microbial activity, soil respiration, and photosynthetic rate and ultimately altering the spatial distribution of soil TN (<xref ref-type="bibr" rid="B41">Tesfaye et al., 2016</xref>). Slope can change soil TN content through various mechanisms such as soil moisture redistribution, soil erosion, and vegetation growth (<xref ref-type="bibr" rid="B36">Pennock, 2005</xref>). The black soil layer contains large amounts of plant residues and humic substances, providing abundant nutrient elements such as nitrogen, phosphorus, and potassium to the soil (<xref ref-type="bibr" rid="B10">Gu et al., 2018</xref>). Different thicknesses of the black soil layer lead to differences in soil physical properties, chemical composition, and biological activity (<xref ref-type="bibr" rid="B32">Niu et al., 2022</xref>). These differences influence soil microbial metabolism, water retention, and nutrient cycling ability, which in turn affects nitrogen content. Moreover, soils of different types have varying physicochemical properties, including soil texture, organic matter content, pH, and soil aeration (<xref ref-type="bibr" rid="B8">Ge et al., 2019</xref>). All these properties can affect nitrogen input and output. Due to the heterogeneity of the natural environments (e.g., topography, the thickness of the black soil layer, and soil type), the degree of soil erosion and nitrogen cycling varies in different spatial regions. As a result, the soil TN content in different regions varies, which affect the accuracy of the soil TN estimation model to some extent (<xref ref-type="bibr" rid="B56">Zhang et al., 2013</xref>; <xref ref-type="bibr" rid="B28">Marty et al., 2017</xref>). Therefore, the effect of soil environmental heterogeneity needs to be reduced. Van Waes et al. (<xref ref-type="bibr" rid="B42">Van Waes et al., 2005</xref>) found that establishing local regressions after categorizing soil samples based on their characteristics can reduce the interference of influencing factors on the estimation accuracy. After dividing the study area according to topographic differences, Pan et al. (<xref ref-type="bibr" rid="B33">Pan et al., 2022</xref>) conducted local regression estimation of soil SOM. The results showed that the estimation accuracy of local regression was improved compared to that of the global regression. However, due to limited sample size, distribution density, and other factors, incorporating environmental factors (e.g., soil black soil layer thickness, soil type, and topography) to divide the samples remains uncommon. Moreover, the estimation of soil TN content through local regression by selecting the optimal wavelength variable based on sample characteristics has rarely been used. The improvement in the accuracy of TN content by the local regression model after grouping soil samples by different environmental factors needs to be further explored.</p>
<p>A method was proposed for the estimation of the soil TN content by local regression based on hyperspectral images in this study. This method aimed to reduce the possibility of local optimization of estimation results due to the heterogeneity of soil environments and to enhance the accuracy of soil TN content estimation and mapping. On this basis, 502 soil samples were collected in the typical black soil area of Yushu City, Jilin Province, China, and the spectral characteristics of soil TN were analyzed using the ZY1-02D hyperspectral image as the data source. Three sample grouped local regression strategies were established based on differences in soil environmental factors (soil type, topography, and thickness of the black soil layer), and local regression estimation models were developed using the CatBoost algorithm to estimate TN content. The objectives of this study are as follows: (1) to clarify the distribution range of TN characteristic bands and to analyze the effect of soil environmental heterogeneity on the distribution of TN characteristic bands; (2) Based on the sample characteristics, the optimal wavelength variable is selected for local regression to estimate the soil TN content, to evaluate the influence of different strategy grouping modeling on the estimation accuracy, and to determine the optimal grouping strategy; (3) to establish a local regression estimation model using the optimal TN content estimation scheme and map the spatial distribution of soil TN content.</p>
</sec>
<sec sec-type="materials|methods" id="s2">
<title>2 Materials and methods</title>
<sec id="s2-1">
<title>2.1 Study area</title>
<p>The study area is in Yushu City, Jilin Province, China, and has a temperate continental monsoon climate, with an average annual temperature of 5.3&#xb0;C and precipitation of 536.4&#xa0;mm. The study area (126&#xb0;46&#x2032;-126&#xb0;54&#x2032;E, 44&#xb0;52&#x2032;-45&#xb0;03&#x2032;N) is located in the northeastern part of Yushu City within the concentrated distribution area of black soils (<xref ref-type="fig" rid="F1">Figure 1</xref>), which is rich in natural resources, fertile soils, and rich in nutrients (such as nitrogen, phosphorus, potassium, and organic matter) to sustain and nourish crops, with a total cropland area of about 17,000 ha. The soil types in the study area are diverse, mainly including black soil (BS), albic soil (AS), and meadow soil (MS). Among them, BS and AS account for more than 60% of the soil in the area. The study area is mainly planted with corn and rice, which is an important commodity grain production base in Northeast China.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Map of the study area. The study area in Yushu City, Jilin Province, Northeast China and the soil types and sampling points distribution.</p>
</caption>
<graphic xlink:href="fenvs-12-1401107-g001.tif"/>
</fig>
</sec>
<sec id="s2-2">
<title>2.2 Datasets</title>
<sec id="s2-2-1">
<title>2.2.1 Soil sample collection and analysis</title>
<p>The soil samples were collected in late April 2022. At this time, the area was in the &#x2018;bare soil phase&#x2019; without weeds and straw on the surface. The soil sampling points were set up referencing Chinese soil classification standards and combined with high-resolution remote sensing imagery of the study area. Through field investigation, the preset positions and collection route of sampling points were adjusted according to the soil surface heterogeneity in the area, thus ensuring that the sampling points were evenly distributed in the study area, and 502 soil samples were collected according to the sampling plan. To eliminate the influence of mixed pixels at sampling points on subsequent research, the spacing between sampling points with surrounding objects exceeded 100&#xa0;m. The locations of the sampling points are shown in <xref ref-type="fig" rid="F1">Figure 1</xref>. Soil samples were collected using the five-point sampling method to avoid accidental factors affecting the soil nutrient test results and to ensure the accuracy of the test. Firstly, a square area (30 &#xd7; 30&#xa0;m) was established at the sampling points, then 200&#xa0;g of soil with a depth of 20&#xa0;cm was collected at five points (four corner points and the center point), and the larger stones and debris were removed from the samples, which were finally mixed homogeneously and packed into sample bag. After sampling was completed, the serial number of sample points was set according to location and sampling sequence, and the global positioning system (GPS) was used to record the spatial coordinates, acquisition time and altitude of the center point. While soil samples were collected, the thickness of the black soil layer at each of the five sample points was measured and recorded by drilling and sampling method.</p>
<p>After sample collection, the soil samples were air-dried indoors, and non-soil bodies such as stones, weed roots, and straw were removed. Then, each soil sample was crushed with ceramic tools and sieved using a 100-mesh sieve with a particle size of 0.15&#xa0;mm. The processed soil samples were divided into two parts for chemical analysis and spectral reflectance measurement (<xref ref-type="fig" rid="F2">Figure 2</xref>). In this study, the spectral reflectance of soil samples was measured by an ASD FieldSpec 4 spectrometer. In order to avoid the influence of light sources and improve the measurement accuracy, sample measurement was performed in the darkroom, and the average of ten spectral reflectances was used as the measured spectral data of the soil samples. The content of TN in soil samples was determined by the semi-micro Kjeldahl method. The measuring process strictly follows the specification of land quality geochemical assessment in China.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Soil Sample Pretreatment. Indoor spectral measurement of soil samples and determination of TN content in soil samples.</p>
</caption>
<graphic xlink:href="fenvs-12-1401107-g002.tif"/>
</fig>
</sec>
<sec id="s2-2-2">
<title>2.2.2 Remote sensing data acquisition</title>
<p>This study considers the synchronization of remote sensing imagery generation time with ground testing. According to the sampling time, the ZY1-02D satellite hyperspectral image generated on 26 April 2022 was selected as the spectral data source. The data were provided by the China Center for Resources Satellite Data and Applications. The ZY1-02D satellite is the first civilian hyperspectral operational satellite launched by the Ministry of Natural Resources of China. It is equipped with a visible near-infrared camera for simultaneous acquisition of panchromatic and multispectral data, and a hyperspectral camera with hyperspectral data in 166 bands (<xref ref-type="bibr" rid="B53">Yu et al., 2021</xref>). The visible and near-infrared (VNIR) has a spectral resolution of 10&#xa0;nm and 76 bands in the spectral range. The short-wave infrared (SWIR) has a spectral resolution of 20&#xa0;nm and 90 bands in the spectral range. In addition, the satellite can acquire high-precision geometric and radiometric information while receiving spectral information. It firstly achieves in-orbit yaw calibration of hyperspectral loads, facilitating applications such as quantitative inversion of crop nutrient content (<xref ref-type="bibr" rid="B27">Lu et al., 2021</xref>). <xref ref-type="table" rid="T1">Table 1</xref> shows the parameter information of the ZY1-02D AHSI sensor. The topographic data used in this study are Digital Elevation Model (DEM) data from the United States Geological Survey (<ext-link ext-link-type="uri" xlink:href="https://glovis.usgs.gov/">https://glovis.usgs.gov/</ext-link>) with a spatial resolution of 30&#xa0;m.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Parameters of the ZY1-02D AHSI sensor.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Specification</th>
<th align="center">Parameters</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Spectral range (nm)</td>
<td align="center">400&#x2013;2,500</td>
</tr>
<tr>
<td align="center">Channels</td>
<td align="center">76 (VNIR), 90 (SWIR)</td>
</tr>
<tr>
<td align="center">Spectral resolution (nm)</td>
<td align="center">10 (VNIR), 20 (SWIR)</td>
</tr>
<tr>
<td align="center">Swath width (km)</td>
<td align="center">60</td>
</tr>
<tr>
<td align="center">Spatial resolution (m)</td>
<td align="center">30</td>
</tr>
<tr>
<td align="center">Revisit cycle (d)</td>
<td align="center">3</td>
</tr>
<tr>
<td align="center">Lateral swing capacity (&#xb0;)</td>
<td align="center">&#xb1;26</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec id="s2-3">
<title>2.3 Methods</title>
<p>To effectively estimate soil TN content in a wide range, three different grouping strategies were proposed for local regression estimation according to the differences in soil types, black soil layer thickness, and slope gradient in the study area. Afterward, the optimal grouping strategy was selected for estimating soil TN content. The following steps are mainly involved in estimation: data acquisition and processing, sample grouping and feature selection, TN content estimation model construction, and spatial distribution mapping (<xref ref-type="fig" rid="F3">Figure 3</xref>). Firstly, the TN content in the soil samples was measured, and the soil type and black soil layer thickness at sampling points were statistically analyzed. Spectral curves and terrain parameters for each sample were extracted by preprocessing hyperspectral images and topographic data. Then, the spectral data were mathematically transformed, and the sensitive bands of each transformed spectrum for soil TN were screened based on the Pearson correlation coefficient threshold. In this way, the optimal spectral transformation method was determined. Furthermore, the local regression strategy for grouping soil samples was determined, and CARS was employed to extract the characteristic spectral bands of TN content for each group in different grouping strategies. Finally, the soil TN content estimation model was developed using the CatBoost algorithm, and the optimal grouping strategy was selected to estimate the soil TN content and plot the soil TN content distribution map.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Workflow for soil TN content estimation.</p>
</caption>
<graphic xlink:href="fenvs-12-1401107-g003.tif"/>
</fig>
<sec id="s2-3-1">
<title>2.3.1 Image data processing</title>
<p>Aiming at the fringe phenomenon is obvious in the SWIR band data of the ZY1-02D hyperspectral camera, the &#x201c;global de-stripe&#x201d; method was used to repair the fringe, and the bands with serious water vapor interference and overlapping bands were eliminated. Finally, 400&#x2013;1,341&#xa0;nm, 1,459&#x2013;1795&#xa0;nm and 1963&#x2013;2,470&#xa0;nm were selected as the spectral bands for this experiment, with a total of 145 spectral channels. Based on the ENVI software platform, The ZY1-02D hyperspectral image of the investigated area was subjected to geometric correction, radiometric calibration and atmospheric correction to reduce or eliminate image quality degradation due to radiance distortion, atmospheric extinction and geometric distortion, and to obtain original reflectance data (<xref ref-type="bibr" rid="B19">Li et al., 2022</xref>). Then, eight different transformations were performed on the processed hyperspectral images to reduce the errors caused by noise, environment and other factors, and to enhance the spectral feature information, to extract the sensitive spectral bands of soil TN more accurately (<xref ref-type="bibr" rid="B55">Yumiti and Wang, 2022</xref>). These transformation methods include First Derivative Reflectance (FDR), Continuum Removal (CR), Logarithm Reflectance (log R), Recipro-cal logarithmic Reflectance [log(1/R)], Second Derivative Reflectance (SDR), Multiplicative Scatter Correction Reflectance (MSC-R), Standard Normal Variable Reflectance (SNV-R) and Detrend Reflectance (DT-R) (<xref ref-type="bibr" rid="B7">Gao et al., 2014</xref>; <xref ref-type="bibr" rid="B2">Chen et al., 2017</xref>).</p>
<p>After obtaining the DEM data from USGS, the DEM data of the test area were processed to fill in missing data, remove noise points and data smoothing, and the model was evaluated and calibrated according to the measured elevation values of each sample point, to ensure that the quality and accuracy of the DEM data meet the experimental requirements. Then, six topographic parameters including elevation, slope, aspect, longitudinal curvature (LC), cross-sectional curvature (CC) and surface roughness (SR) were extracted from the DEM data (<xref ref-type="bibr" rid="B40">Taghizadeh-Mehrjardi et al., 2014</xref>).</p>
</sec>
<sec id="s2-3-2">
<title>2.3.2 Competitive adaptive reweighted sampling</title>
<p>Hyperspectral data have many spectral bands and high dimensions, and the obvious multicollinearity between adjacent bands, which will affect the stability of the estimation model to some extent. Therefore, extracting appropriate spectral feature bands as input variables for model construction can effectively reduce or eliminate problems such as low model accuracy and slow speed caused by redundant bands. CARS is a feature variable selection algorithm based on iterative statistical information proposed by drawing on the &#x201c;survival of the fittest&#x201d; rule of Darwin&#x2019;s evolutionary theory (<xref ref-type="bibr" rid="B20">Li et al., 2009</xref>). The algorithm selects the optimal set of variables by dynamically adjusting the window width and threshold, ensuring continuity of effective information. It has two stages of feature fast elimination and feature selection, which can effectively reduce the computation time and improve the prediction performance of the model (<xref ref-type="bibr" rid="B62">Zhao et al., 2022</xref>). It works on the following principle: (1) Monte Carlo iteration and competition are used to select multiple subsets from multicomponent spectral data. (2) The key wavelengths are selected by the key wavelengths are selected by exponential attenuation function and adaptive reweighted sampling (ARS). (3) Multiple rounds of cross-validation (CV) are used to select the variable subset with the minimum root mean square error validation (RMSEV) result (<xref ref-type="bibr" rid="B63">Zheng et al., 2012</xref>).</p>
</sec>
<sec id="s2-3-3">
<title>2.3.3 Categorical boosting</title>
<p>The CatBoost algorithm is an integrated learning predictive model with few parameters, high accuracy and support for categorical features that extensions and improvements on the Gradient Boosting Decision Tree (GBDT) algorithm (<xref ref-type="bibr" rid="B12">Hancock and Khoshgoftaar, 2020</xref>). Unlike the traditional GBDT algorithm, the algorithm randomly sorts all samples and then calculates the average labeled value for that sample, and the same category value placed before the given category value (<xref ref-type="bibr" rid="B45">Wang WC. et al., 2022</xref>). In addition, the algorithm improves Greedy Target-based Statistics by adding prior distribution terms, which can effectively reduce the noise caused by low-frequency categorical data. Suppose a permutation is <inline-formula id="inf2">
<mml:math id="m2">
<mml:mrow>
<mml:mi mathvariant="normal">&#x3c3;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x007C;">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c3;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3c3;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>n</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, it is substituted with:<disp-formula id="equ1">
<mml:math id="m3">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">X</mml:mi>
<mml:msub>
<mml:mi mathvariant="bold-italic">&#x3c3;</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">p</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">p</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="&#x007C;">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">X</mml:mi>
<mml:msub>
<mml:mi mathvariant="bold-italic">&#x3c3;</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">X</mml:mi>
<mml:msub>
<mml:mi mathvariant="bold-italic">&#x3c3;</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">p</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2219;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">Y</mml:mi>
<mml:msub>
<mml:mi mathvariant="bold-italic">&#x3c3;</mml:mi>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:msub>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mi mathvariant="bold-italic">&#x3b3;</mml:mi>
<mml:mo>&#x2219;</mml:mo>
<mml:mi mathvariant="bold-italic">p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">p</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="&#x007C;">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">X</mml:mi>
<mml:msub>
<mml:mi mathvariant="bold-italic">&#x3c3;</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">X</mml:mi>
<mml:msub>
<mml:mi mathvariant="bold-italic">&#x3c3;</mml:mi>
<mml:mrow>
<mml:mi mathvariant="bold-italic">p</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold-italic">k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mi mathvariant="bold-italic">&#x3b3;</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>where <inline-formula id="inf3">
<mml:math id="m4">
<mml:mrow>
<mml:mi mathvariant="normal">p</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the added prior value and <inline-formula id="inf4">
<mml:math id="m5">
<mml:mrow>
<mml:mi mathvariant="normal">&#x3b3;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the weight coefficient. The mean of the data set labels is usually used as the <italic>a priori</italic> term for regression tasks.</p>
<p>Compared with other ensemble learning algorithms, CatBoost has the following characteristics: (1) It uses a combination of category features, which enriches the feature dimensions by exploiting the linkage between the features. (2) It uses sort boosting to counteract the noisy points in the training set, thus avoiding the bias of gradient estimation, and then solving the problem of prediction bias, which leads to a significant increase in the speed of model training speed and accuracy. (3) It uses oblivious trees as the base model, which makes the model better able to deal with the high-dimensional sparse data (<xref ref-type="bibr" rid="B14">Huang et al., 2019</xref>). <xref ref-type="table" rid="T2">Table 2</xref> shows the main parameters of the CatBoost algorithm and the search range of Bayesian optimization.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>The main training parameters and range of the CatBoost algorithm.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Parameters</th>
<th align="center">Search range</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Iterations (maximum number of trees)</td>
<td align="center">100&#x2013;600</td>
</tr>
<tr>
<td align="center">learning_rate (learning rate)</td>
<td align="center">0.01&#x2013;1</td>
</tr>
<tr>
<td align="center">depth (tree depth)</td>
<td align="center">6&#x2013;10</td>
</tr>
<tr>
<td align="center">l2_leaf_reg (Coefficient at the L2 regularization term of the cost function)</td>
<td align="center">1&#x2013;3</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec id="s2-4">
<title>2.4 Estimation accuracy indexes</title>
<p>To assess the stability and estimation performance of the model, three statistical parameters are calculated as the accuracy evaluation index of the model: coefficient of determination (<inline-formula id="inf5">
<mml:math id="m6">
<mml:mrow>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>), RMSE, and residual predictive deviation (RPD). <inline-formula id="inf6">
<mml:math id="m7">
<mml:mrow>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> represents the stability and estimation ability of the estimation model, the value closer to 1, the model is more stable and better fitting effect. The range of RPD values can be categorized into three levels from small to large. When the RPD &#x3c;1.4, the model was unreliable; when the 1.4 &#x3c; RPD &#x3c;2, the model was suitable for estimating soil TN content from hyperspectral data; and when the RPD &#x3e;2, the model provided good quantitative estimation capability. The three parameter equations are as follows:<disp-formula id="equ2">
<mml:math id="m8">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="bold-italic">R</mml:mi>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mi mathvariant="bold-italic">n</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x007C;">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">F</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">A</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:msup>
<mml:mo>/</mml:mo>
</mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mi mathvariant="bold-italic">n</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x007C;">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">F</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mover accent="true">
<mml:mi mathvariant="bold-italic">F</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="equ3">
<mml:math id="m9">
<mml:mrow>
<mml:mi mathvariant="bold-italic">R</mml:mi>
<mml:mi mathvariant="bold-italic">M</mml:mi>
<mml:mi mathvariant="bold-italic">S</mml:mi>
<mml:mi mathvariant="bold-italic">E</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msqrt>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn>
</mml:mrow>
<mml:mi mathvariant="bold-italic">n</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x007C;">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold-italic">F</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold-italic">A</mml:mi>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn mathvariant="bold">2</mml:mn>
</mml:msup>
<mml:mo>/</mml:mo>
<mml:mi mathvariant="bold-italic">n</mml:mi>
</mml:mrow>
</mml:mrow>
</mml:msqrt>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="equ4">
<mml:math id="m10">
<mml:mrow>
<mml:mi mathvariant="bold-italic">R</mml:mi>
<mml:mi mathvariant="bold-italic">P</mml:mi>
<mml:mi mathvariant="bold-italic">D</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="bold-italic">S</mml:mi>
<mml:mi mathvariant="bold-italic">t</mml:mi>
<mml:mi mathvariant="bold-italic">d</mml:mi>
<mml:mo>/</mml:mo>
<mml:mi mathvariant="bold-italic">R</mml:mi>
<mml:mi mathvariant="bold-italic">M</mml:mi>
<mml:mi mathvariant="bold-italic">S</mml:mi>
<mml:mi mathvariant="bold-italic">E</mml:mi>
</mml:mrow>
</mml:math>
</disp-formula>where <inline-formula id="inf7">
<mml:math id="m11">
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> represent the number of soil samples, and <inline-formula id="inf8">
<mml:math id="m12">
<mml:mrow>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the observed value of the samples, and <inline-formula id="inf9">
<mml:math id="m13">
<mml:mrow>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the predicted value of the samples, and <inline-formula id="inf10">
<mml:math id="m14">
<mml:mrow>
<mml:mover accent="true">
<mml:mi>F</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula> is the mean of the observed values, and <inline-formula id="inf11">
<mml:math id="m15">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> represent the standard deviation.</p>
</sec>
</sec>
<sec sec-type="results" id="s3">
<title>3 Results</title>
<sec id="s3-1">
<title>3.1 Soil TN content analysis and sample grouping</title>
<p>This study statistically analyzed the TN content of the collected soil samples, as shown in <xref ref-type="table" rid="T3">Table 3</xref>. The TN content of the sample set in the experimental area ranged from 0.55&#xa0;g/kg to 2.34&#xa0;g/kg, with a mean value (Mean) of 1.31&#xa0;g/kg and a coefficient of variation (CV) of 20.61%. Among the different soil types in the study area, the meadow soil had the highest TN content (mean &#x3d; 1.42&#xa0;g/kg), and the albic soil had the lowest TN content (mean &#x3d; 1.21&#xa0;g/kg) and the highest CV (CV &#x3d; 24.79%). In addition, the CVs for all three soil types were higher than those for the entire sample, indicating that dividing the sample set by soil type increased the spatial variability of sample TN content.</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Descriptive statistics of TN content in soil samples (Unit: g/kg).</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Dataset</th>
<th align="center">Max</th>
<th align="center">Min</th>
<th align="center">Mean</th>
<th align="center">Std</th>
<th align="center">CV</th>
<th align="center">Samples</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Whole</td>
<td align="center">2.34</td>
<td align="center">0.55</td>
<td align="center">1.31</td>
<td align="center">0.27</td>
<td align="center">20.61</td>
<td align="center">502</td>
</tr>
<tr>
<td align="center">BS</td>
<td align="center">2.03</td>
<td align="center">0.58</td>
<td align="center">1.32</td>
<td align="center">0.3</td>
<td align="center">22.73</td>
<td align="center">168</td>
</tr>
<tr>
<td align="center">AS</td>
<td align="center">2.34</td>
<td align="center">0.55</td>
<td align="center">1.21</td>
<td align="center">0.3</td>
<td align="center">24.79</td>
<td align="center">173</td>
</tr>
<tr>
<td align="center">MS</td>
<td align="center">2.3</td>
<td align="center">0.57</td>
<td align="center">1.42</td>
<td align="center">0.33</td>
<td align="center">23.24</td>
<td align="center">161</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>In order to explore the influence of terrain heterogeneity on the estimation accuracy of TN content, the correlation between soil TN content and several topographic factors (e.g., elevation, slope, and aspect) was analyzed in this study. The topographic factor exhibiting the highest correlation was selected as the partitioned data for local regression. The Pearson correlation coefficients between soil TN content and topographic factors are shown in <xref ref-type="table" rid="T4">Table 4</xref>. Because the slope had the highest correlation with soil TN content among different topographic factors, it was selected as the segmentation data for local regression estimation. To clarify the variation in the TN content distribution with different slopes and thicknesses of the black soil layer, the soil samples were divided according to the slope grade and the thickness of the soil black soil layer. The distribution of the soil TN content was plotted, as shown in <xref ref-type="fig" rid="F4">Figure 4</xref>. Most of the study area has a slope of 0&#x2013;3&#xb0;. With increasing slope, the proportion of samples with soil TN content of 0&#x2013;1&#xa0;g/kg increases, and the proportion of samples with TN content of &#x3e;1.5&#xa0;g/kg decreases. In <xref ref-type="fig" rid="F4">Figure 4</xref>, 83.66% of the samples have a black soil layer thickness between 0&#xa0;cm and 60&#xa0;cm. As the thickness of the black soil layer increases, the proportion of samples with soil TN content of &#x3e;1.5&#xa0;g/kg increases, and that of samples with soil TN content between 0 and 1&#xa0;g/kg and one&#x2013;1.5&#xa0;g/kg decreases to varying degrees.</p>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>Pearson correlation between TN and topographic factors.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center"/>
<th align="center">Elevation</th>
<th align="center">Slope</th>
<th align="center">Aspect</th>
<th align="center">LC</th>
<th align="center">CC</th>
<th align="center">SR</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">TN</td>
<td align="center">&#x2212;0.405</td>
<td align="center">&#x2212;0.601</td>
<td align="center">&#x2212;0.142</td>
<td align="center">&#x2212;0.027</td>
<td align="center">0.034</td>
<td align="center">&#x2212;0.018</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Spatial distribution of TN content.</p>
</caption>
<graphic xlink:href="fenvs-12-1401107-g004.tif"/>
</fig>
<p>To determine the optimal local regression strategy for estimating soil TN content, all soil samples were grouped according to different strategies. The grouping results of the three strategies are shown in <xref ref-type="table" rid="T5">Table 5</xref>: (1) grouping by soil type (ST-G): All samples were classified into three groups according to the soil subtypes of albic soil (AS), meadow soil (MS), and black soil (BS); (2) grouping by the thickness of black soil layer (BLT-G): According to the thickness of the black soil layer, the number of sample sets, and the distribution range of TN content, all soil samples were divided into three groups, namely, BLT1 (0&#x2013;30&#xa0;cm), BLT2 (30&#x2013;60&#xa0;cm), and BLT3 (&#x3e;60&#xa0;cm); (3) Grouping by slope grade (Slp-G): Based on the slope values of the soil samples, the number of subsample sets, and the distribution range of soil TN content, all samples were divided into three groups, namely, Slp1 (0&#xb0;&#x2013;2&#xb0;), Slp2 (2&#xb0;&#x2013;3&#xb0;), and Slp3 (&#x3e;3&#xb0;).</p>
<table-wrap id="T5" position="float">
<label>TABLE 5</label>
<caption>
<p>Analysis of grouping results.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th colspan="2" align="center">Factors</th>
<th align="center">Group 1</th>
<th align="center">Group 3</th>
<th align="center">Group 3</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="2" align="center">Soil type</td>
<td align="center">Group</td>
<td align="center">BS</td>
<td align="center">AS</td>
<td align="center">MS</td>
</tr>
<tr>
<td align="center">Sampling points</td>
<td align="center">168</td>
<td align="center">173</td>
<td align="center">161</td>
</tr>
<tr>
<td rowspan="2" align="center">Thickness of the black soil layer</td>
<td align="center">Group (cm)</td>
<td align="center">0&#x2013;30</td>
<td align="center">30&#x2013;60</td>
<td align="center">&#x3e;60</td>
</tr>
<tr>
<td align="center">Sampling points</td>
<td align="center">212</td>
<td align="center">208</td>
<td align="center">82</td>
</tr>
<tr>
<td rowspan="2" align="center">Slope</td>
<td align="center">Group (&#xb0;)</td>
<td align="center">0&#x2013;2</td>
<td align="center">2&#x2013;3</td>
<td align="center">&#x3e;3</td>
</tr>
<tr>
<td align="center">Sampling points</td>
<td align="center">228</td>
<td align="center">197</td>
<td align="center">77</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3-2">
<title>3.2 Soil spectral characteristics analysis</title>
<p>To verify the feasibility of soil TN content estimation after ZY1-02D hyperspectral image correction, the measured spectra of 502 samples were resampled following the spectral resolution of the hyperspectral image. The spectral curves of the resampled spectra were compared with those of the image pixels (<xref ref-type="fig" rid="F5">Figure 5</xref>). It can be seen that the spectral reflectances of the image pixels are lower than the measured soil spectral reflectances, which can be attributed to factors such as soil water content and soil surface roughness. However, the spectral curves of the image pixels and the measured spectral curves have similar characteristic absorption positions, and the shapes and trends of the two curves are highly consistent, validating the reliability of preprocessing, such as radiometric calibration and atmospheric correction. A high correlation can be observed with the correlation coefficients ranging from 0.6 to 0.84 for the entire band, indicating that most of the soil spectral features are retained in the image pixels. These features can be used to estimate soil components and physicochemical information. In addition, the measured spectra and image pixel spectra of all samples were classified into five groups according to the TN content from low to high. The spectral data and the corresponding content data of each group were averaged for comparison and analysis, as shown in <xref ref-type="fig" rid="F5">Figure 5</xref>. In the wavelength range of 400&#x2013;2,500&#xa0;nm, the spectral reflectance decreases with increasing soil TN content, and the patterns of the image pixel spectra and the measured spectra with the soil TN content are generally consistent, further proving the feasibility of using the ZY1-02D hyperspectral image for soil TN content estimation.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Image and measured soil spectrum, and correlation coefficients.</p>
</caption>
<graphic xlink:href="fenvs-12-1401107-g005.tif"/>
</fig>
<p>With the aim of reducing the interference of other factors (e.g., noise and environment) and enhancing the spectral feature information for more accurate identification of the sensitive bands of soil TN, eight different transformations were applied to the raw reflectance data, including FDR, CR, log R, log(1/R), SDR, MSC-R, SNV-R, and DT-R transformations. As shown in <xref ref-type="fig" rid="F6">Figure 6</xref>, the reflectance and absorption characteristics of the spectral curves are substantially increased with more peak and trough information after spectral transformation, and more sensitive spectral bands can be identified.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Transformation spectral reflectance curves.</p>
</caption>
<graphic xlink:href="fenvs-12-1401107-g006.tif"/>
</fig>
</sec>
<sec id="s3-3">
<title>3.3 Correlation analysis and spectral feature selection</title>
<p>To determine the optimal spectral transformation method for estimating TN content, this study was based on three sample grouping strategies, and the original spectral reflectance and eight transformed spectral reflectances of each soil sample group were correlated with their TN content. <xref ref-type="fig" rid="F7">Figure 7</xref> reveals a negative correlation between OR and soil TN content in the wavelength range of 400&#x2013;2,500&#xa0;nm. In most wavelength ranges, the soil TN content exhibits a markedly higher correlation coefficient with the transformed spectral reflectance than with the original reflectance. The bands with an absolute correlation coefficient greater than 0.5 were selected as the sensitive bands, and the number of sensitive bands of each group of soil samples under different transformation methods was counted. As shown in <xref ref-type="table" rid="T6">Table 6</xref>, there are significant differences in the number of TN sensitive bands in each sample group under different transformation methods, and the number of sensitive bands after FDR and CR transformation increases significantly. Among them, the number of sensitive bands in the FDR spectra is the highest, except for the &#x201c;BLT1 (0&#x2013;30&#xa0;cm)&#x201d; group. Based on the above results, the first derivative method was selected to transform the soil spectral characteristics.</p>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>Correlation between soil TN content and the spectral reflectance of each band.</p>
</caption>
<graphic xlink:href="fenvs-12-1401107-g007.tif"/>
</fig>
<table-wrap id="T6" position="float">
<label>TABLE 6</label>
<caption>
<p>Number of sensitive bands of different spectral transformation methods.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Groups</th>
<th align="center">BS</th>
<th align="center">As</th>
<th align="center">MS</th>
<th align="center">BLT1</th>
<th align="center">BLT2</th>
<th align="center">BLT3</th>
<th align="center">Slp1</th>
<th align="center">Slp2</th>
<th align="center">Slp3</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">OR</td>
<td align="center">27</td>
<td align="center">13</td>
<td align="center">10</td>
<td align="center">0</td>
<td align="center">9</td>
<td align="center">4</td>
<td align="center">0</td>
<td align="center">0</td>
<td align="center">0</td>
</tr>
<tr>
<td align="center">FDR</td>
<td align="center">43</td>
<td align="center">53</td>
<td align="center">46</td>
<td align="center">28</td>
<td align="center">49</td>
<td align="center">36</td>
<td align="center">44</td>
<td align="center">38</td>
<td align="center">31</td>
</tr>
<tr>
<td align="center">CR</td>
<td align="center">41</td>
<td align="center">6</td>
<td align="center">29</td>
<td align="center">29</td>
<td align="center">11</td>
<td align="center">25</td>
<td align="center">26</td>
<td align="center">26</td>
<td align="center">26</td>
</tr>
<tr>
<td align="center">LogR</td>
<td align="center">38</td>
<td align="center">13</td>
<td align="center">1</td>
<td align="center">21</td>
<td align="center">23</td>
<td align="center">19</td>
<td align="center">10</td>
<td align="center">1</td>
<td align="center">2</td>
</tr>
<tr>
<td align="center">Log (1/R)</td>
<td align="center">38</td>
<td align="center">20</td>
<td align="center">1</td>
<td align="center">21</td>
<td align="center">23</td>
<td align="center">19</td>
<td align="center">10</td>
<td align="center">1</td>
<td align="center">2</td>
</tr>
<tr>
<td align="center">SDR</td>
<td align="center">2</td>
<td align="center">0</td>
<td align="center">0</td>
<td align="center">0</td>
<td align="center">0</td>
<td align="center">0</td>
<td align="center">0</td>
<td align="center">0</td>
<td align="center">3</td>
</tr>
<tr>
<td align="center">MSC-R</td>
<td align="center">18</td>
<td align="center">21</td>
<td align="center">14</td>
<td align="center">1</td>
<td align="center">6</td>
<td align="center">17</td>
<td align="center">15</td>
<td align="center">10</td>
<td align="center">5</td>
</tr>
<tr>
<td align="center">SNV-R</td>
<td align="center">13</td>
<td align="center">12</td>
<td align="center">16</td>
<td align="center">9</td>
<td align="center">11</td>
<td align="center">16</td>
<td align="center">17</td>
<td align="center">14</td>
<td align="center">7</td>
</tr>
<tr>
<td align="center">DT-R</td>
<td align="center">9</td>
<td align="center">5</td>
<td align="center">7</td>
<td align="center">0</td>
<td align="center">0</td>
<td align="center">6</td>
<td align="center">5</td>
<td align="center">12</td>
<td align="center">0</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>To obtain the set of spectral feature variables with minimum redundancy information and to improve the efficiency and accuracy of the estimation model, the CARS algorithm was applied to choose the best spectrum variables of the sample set. In the CARS feature selection process, the number of Monte Carlo iterations was set to 50. After multiple iterations, the cross-validation RMSE (RMSECV) values of each band combination scheme were compared, and the variable set corresponding to the minimum RMSECV value was selected as the optimal variable set for the model. <xref ref-type="fig" rid="F8">Figure 8</xref> shows the optimal variable set plots for each sample set divided according to different factors. By analyzing the optimal number of variables in all sample sets, the number of bands selected accounts for 11%&#x2013;15% of the total number of bands, significantly reducing redundant information. Furthermore, most of the characteristic wavelengths selected using the whole sample as input data are concentrated in the range of 550&#x2013;850&#xa0;nm. After grouping the samples according to different factors, each grouping strategy corresponded to a wider distribution of characteristic wavelengths, with the ST-G strategy corresponding to the widest distribution of characteristic wavelengths. Combining the optimal results of the whole sample and each sub-sample, the characteristic wavelengths were mainly concentrated in the ranges of 450&#x2013;850&#xa0;nm, 1950&#x2013;2,150&#xa0;nm, and 2,400&#x2013;2,450&#xa0;nm, with a relative concentration in the range of 550&#x2013;850&#xa0;nm. <xref ref-type="fig" rid="F9">Figure 9</xref> shows the CARS variable selection process with the ST-G strategy (BS, AS, and MS), and the variable set with the lowest RMSECV value is marked by a vertical line. The 1st-22nd iterations are the rough selection phase of the CARS selection feature, and the wavelengths containing noise and useless information are quickly eliminated. As the number of iterations increases, the number of variables decreases exponentially. The 23rd to 50th iterations are the accurate selection stage of the CARS selection features. Starting from the 22nd iteration, the RMSECV of the three sample sets gradually reaches the lowest value and then increases, which can be attributed to the elimination of key bands sensitive to TN, resulting in lower model accuracy. After the 44th iteration, the RMSECV values gradually stabilized. The RMSECV values for BS, AS, and MS reach their minimum at the 22nd, 23rd, and 25th iterations, with 21, 19, and 17 retained wavelengths, respectively.</p>
<fig id="F8" position="float">
<label>FIGURE 8</label>
<caption>
<p>Optimal variable set graph (Horizontal coordinates indicate wavelengths from 400&#x2013;2,500&#xa0;nm, and vertical coordinates indicate different groups based on the ST-G, Slp-G, and BLT-G strategies).</p>
</caption>
<graphic xlink:href="fenvs-12-1401107-g008.tif"/>
</fig>
<fig id="F9" position="float">
<label>FIGURE 9</label>
<caption>
<p>Variable selecting process with CARS (taking BS, AS, and MS as examples).</p>
</caption>
<graphic xlink:href="fenvs-12-1401107-g009.tif"/>
</fig>
</sec>
<sec id="s3-4">
<title>3.4 Accuracy of TN content estimation based on different soil grouping strategies</title>
<p>To clarify the influence of local regression according to different grouping strategies on the accuracy of TN content estimation, the CatBoost algorithm was used to perform local regression estimation on ST-G, BLT-G, and Slp-G strategies. The global regression estimation was also conducted based on the full sample (U-G strategy). The FDR data selected by CARS were used as the independent variable <inline-formula id="inf12">
<mml:math id="m16">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, and the corresponding soil TN content data were used as the dependent variable <inline-formula id="inf13">
<mml:math id="m17">
<mml:mrow>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. The number of samples was randomly divided into the training group (for model establishment and parameter optimization) and the verification group (for accuracy evaluation) in a ratio of 3:1. <xref ref-type="table" rid="T7">Table 7</xref> shows the accuracy of regression estimation with different grouping strategies, the local regression model constructed according to the ST-G strategy shows the highest estimation accuracy (<inline-formula id="inf14">
<mml:math id="m18">
<mml:mrow>
<mml:msubsup>
<mml:mi>R</mml:mi>
<mml:mi>p</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> &#x3d; 0.839, RMSEp &#x3d; 0.238), the BLT-G strategy shows the second highest (<inline-formula id="inf15">
<mml:math id="m19">
<mml:mrow>
<mml:msubsup>
<mml:mi>R</mml:mi>
<mml:mi>p</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> &#x3d; 0.821, RMSEp &#x3d; 0.270), and the U-G strategy shows the lowest estimation accuracy (<inline-formula id="inf16">
<mml:math id="m20">
<mml:mrow>
<mml:msubsup>
<mml:mi>R</mml:mi>
<mml:mi>p</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> &#x3d; 0.748, RMSEp &#x3d; 0.402). The estimation accuracy of local regression according to the grouping strategy is higher than that of the global regression, indicating a better performance of the local regression in estimating TN content.</p>
<table-wrap id="T7" position="float">
<label>TABLE 7</label>
<caption>
<p>Estimation accuracy index of different grouping strategies.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Strategies</th>
<th align="center">
<inline-formula id="inf17">
<mml:math id="m21">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="italic">R</mml:mi>
<mml:mi mathvariant="italic">c</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th align="center">RMSEc</th>
<th align="center">
<inline-formula id="inf18">
<mml:math id="m22">
<mml:mrow>
<mml:msubsup>
<mml:mi mathvariant="italic">R</mml:mi>
<mml:mi mathvariant="italic">p</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th align="center">RMSEp</th>
<th align="center">RPD</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">U-G</td>
<td align="center">0.825</td>
<td align="center">0.279</td>
<td align="center">0.748</td>
<td align="center">0.402</td>
<td align="center">1.998</td>
</tr>
<tr>
<td align="center">ST-G</td>
<td align="center">0.921</td>
<td align="center">0.126</td>
<td align="center">0.839</td>
<td align="center">0.238</td>
<td align="center">2.500</td>
</tr>
<tr>
<td align="center">BLT-G</td>
<td align="center">0.903</td>
<td align="center">0.157</td>
<td align="center">0.821</td>
<td align="center">0.270</td>
<td align="center">2.372</td>
</tr>
<tr>
<td align="center">Slp-G</td>
<td align="center">0.898</td>
<td align="center">0.168</td>
<td align="center">0.814</td>
<td align="center">0.283</td>
<td align="center">2.326</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>To verify the effectiveness of local regression in improving the estimation accuracy, this study further analyzed the estimation accuracy of each subgroup based on the CatBoost algorithm, as shown in <xref ref-type="fig" rid="F10">Figure 10</xref>. Estimation accuracies for samples divided according to soil environment differences are higher than those for the whole sample. When adopting the BLT-G and Slp-G strategies for regression estimation, a large difference in estimation accuracy between groups can be observed, and the estimation results easily fall into the local optimum. The difference in estimation accuracy between groups classified with the ST-G strategy is the smallest, and the <inline-formula id="inf19">
<mml:math id="m23">
<mml:mrow>
<mml:msubsup>
<mml:mi>R</mml:mi>
<mml:mi>p</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> of each group is greater than 0.8, indicating a higher stability of the local regression estimation model established using soil type as segmentation data.</p>
<fig id="F10" position="float">
<label>FIGURE 10</label>
<caption>
<p>Accuracy of TN content estimation for each subgroup under different grouping strategies.</p>
</caption>
<graphic xlink:href="fenvs-12-1401107-g010.tif"/>
</fig>
</sec>
<sec id="s3-5">
<title>3.5 Optimal local regression strategy for TN content mapping</title>
<p>To improve the accuracy of TN content estimation and mapping in the study area, the ZY1-02D satellite remote sensing image was used as the data source, the training samples were determined by the region of interest, and the SVM algorithm was used to supervise the classification of hyperspectral images and extract farmland pixels. <xref ref-type="fig" rid="F11">Figure 11A</xref> shows the selection results of the farmland pixels, with approximately 83% of the study area being farmland, the selected farmland soil ranges have clear boundaries with non-farmland pixels (e.g., roads and construction land) and relatively intact patches. It can be seen from <xref ref-type="table" rid="T7">Table 7</xref> and <xref ref-type="fig" rid="F10">Figure 10</xref> that the local regression estimation model constructed according to the ST-G strategy has high estimation accuracy and stability. Therefore, we divided the farmland pixels in the experimental area into three sub-regions according to different soil types, and the TN content of the three sub-regions was mapped using the CatBoost algorithm, and the spatial distribution map of soil TN content in the whole study area was obtained by mosaic and merging. As shown in <xref ref-type="fig" rid="F11">Figure 11B</xref>, the TN content of the cultivated soils in the study area is generally high, mainly concentrated in the range of 1.0&#x2013;2.0&#xa0;g/kg. The soil area in the range of 1.0&#x2013;1.5&#xa0;g/kg accounts for 37.49% of the farmland area (<xref ref-type="fig" rid="F11">Figure 11C</xref>). The soil area in the range of 1.5&#x2013;2.0&#xa0;g/kg accounts for the largest proportion (57.75% of the farmland area) and is evenly distributed throughout the study area (<xref ref-type="fig" rid="F11">Figure 11D</xref>). The spatial distribution of soil TN content was characterized by obvious clustering, with the distribution of high- and low-value areas relatively concentrated. The high-value areas are zonally distributed in the eastern part of the study area (<xref ref-type="fig" rid="F11">Figure 11E</xref>), which is due to the paddy field in this part, where the long-term application of nitrogen fertilizer and irrigation results in high humus content, allowing for the transformation and accumulation of nitrogen in the soil. The low-value areas are mainly distributed in the southern part of the study area (<xref ref-type="fig" rid="F11">Figure 11B</xref>), where the undulating topography leads to soil erosion and lower nitrogen retention. Compared with the high spatial resolution image maps for the study area, we can observe that the estimation result of soil TN content has a high coincidence with the current status of farmland cultivation in the study area, demonstrating the reliability of the local regression estimation model based on the ST-G strategy and Cat Boost algorithm.</p>
<fig id="F11" position="float">
<label>FIGURE 11</label>
<caption>
<p>Mapping of soil TN content for the study area. <bold>(A)</bold> Farmland pixel extraction results, <bold>(B&#x2013;E)</bold> Spatial distribution of TN content.</p>
</caption>
<graphic xlink:href="fenvs-12-1401107-g011.tif"/>
</fig>
</sec>
</sec>
<sec sec-type="discussion" id="s4">
<title>4 Discussion</title>
<sec id="s4-1">
<title>4.1 Estimation accuracy of different soil grouping strategies and advantage of CatBoost algorithm in local regression estimation</title>
<p>To clarify the degree of accuracy improvement in the TN content estimation by local regression according to different grouping strategies and to verify the superiority of the CatBoost algorithm for local regression estimation, a comparative analysis was conducted using different algorithms and grouping strategies. The scatter plots of estimated and measured TN content with different grouping strategies and estimation algorithms are shown in <xref ref-type="fig" rid="F12">Figure 12</xref>. For the CatBoost algorithm, the ST-G strategy has the best fitting effect and the highest estimation accuracy, followed by the BLT-G strategy. The <inline-formula id="inf20">
<mml:math id="m24">
<mml:mrow>
<mml:msubsup>
<mml:mi>R</mml:mi>
<mml:mi>p</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> of the ST-G strategy is 0.091 higher than that of the U-G strategy. For the RF algorithm, the ST-G strategy also has the highest estimation accuracy, with <inline-formula id="inf21">
<mml:math id="m25">
<mml:mrow>
<mml:msubsup>
<mml:mi>R</mml:mi>
<mml:mi>p</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> being 0.083 higher than that of the U-G strategy. However, when using the GBRT algorithm, the estimation accuracy of the ST-G strategy is lower than that of the BLT-G strategy. This difference is due to the large bias of the GBRT algorithm in estimating high and low TN content and the large overestimation ratio of the ST-G strategy. Nevertheless, the <inline-formula id="inf22">
<mml:math id="m26">
<mml:mrow>
<mml:msubsup>
<mml:mi>R</mml:mi>
<mml:mi>p</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> of the ST-G strategy is still 0.061 higher than that of the U-G strategy. The comparison of the regression estimation performances shows that the CatBoost algorithm outperforms the RF and GBRT algorithms in fitting effect and estimation accuracy. This result is consistent with the study on the estimation of TN content in farmland soils by <xref ref-type="bibr" rid="B45">Wang WC. et al. (2022)</xref>. In addition, when performing local regression estimation with different grouping strategies, the local regression estimation model constructed by the CatBoost algorithm shows higher accuracy improvement than that developed with the RF and GBRT algorithms. The possible reason is that the CatBoost algorithm replaces the gradient estimation method in the traditional algorithm by sorting and lifting, sequentially optimizing the loss function and providing strong robustness and generalization ability (<xref ref-type="bibr" rid="B12">Hancock and Khoshgoftaar, 2020</xref>). In summary, local regression according to the grouping strategies has improved estimation accuracy when adopting three different estimation algorithms, indicating that dividing the sample set according to the differences in soil environment can improve the accuracy of large-scale global regression. This finding is similar to the conclusions of previous studies (<xref ref-type="bibr" rid="B60">Zhang et al., 2010</xref>; <xref ref-type="bibr" rid="B15">Jia et al., 2017</xref>).</p>
<fig id="F12" position="float">
<label>FIGURE 12</label>
<caption>
<p>Scatter plots of the estimated and measured TN content with different grouping strategies and estimation models.</p>
</caption>
<graphic xlink:href="fenvs-12-1401107-g012.tif"/>
</fig>
</sec>
<sec id="s4-2">
<title>4.2 Distribution of TN characteristic bands</title>
<p>In this study, eight transformation methods (e.g., FDR, CR, and SNV-R) were used to spectrally transform the original spectrum, and the distribution of TN characteristic bands was determined by the correlation between each transformed spectrum and soil TN content (<xref ref-type="fig" rid="F7">Figure 7</xref>). The characteristic bands of the CR transform are mainly distributed around 700&#x2013;850&#xa0;nm, 1700&#xa0;nm, and 2,400&#xa0;nm, and those of the FDR transform are mainly distributed in ranges of 450&#x2013;850&#xa0;nm, 1,650&#x2013;1750&#xa0;nm, and 1950&#x2013;2,150&#xa0;nm. Furthermore, the MSC-R transform and SNV-R transform show a similar distribution of the characteristic bands near 500&#xa0;nm. The above results are consistent with previous findings (<xref ref-type="bibr" rid="B37">Shen et al., 2020</xref>; <xref ref-type="bibr" rid="B43">Vibhute et al., 2020</xref>; <xref ref-type="bibr" rid="B24">Liu et al., 2023</xref>; <xref ref-type="bibr" rid="B59">Zhang RR. et al., 2023</xref>). However, not all spectral transformations produce results superior to the original reflectance (<xref ref-type="bibr" rid="B50">Xie et al., 2022</xref>). The SDR transform has few characteristic band distributions over the full band range. In this study, the subset of samples for local regression estimation was divided according to differences in soil type, slope, and thickness of the black soil layer. The TN content data and FDR spectral reflectance data of different sample sets were used as input data, and the characteristic bands of each sample set were selected by CARS. According to the results of CARS feature selection (<xref ref-type="fig" rid="F8">Figure 8</xref>), the TN characteristic bands selected using the whole sample as input data were mainly distributed in the rages of 550&#x2013;650&#xa0;nm and 750&#x2013;850&#xa0;nm, and some selected wavelengths are consistent with the previous studies (<xref ref-type="bibr" rid="B18">Kawamura et al., 2017</xref>; <xref ref-type="bibr" rid="B37">Shen et al., 2020</xref>).</p>
<p>When the samples were divided based on different environmental factors for local regression estimation, some differences appeared in the characteristic bands corresponding to the sample sets with different data characteristics. Therefore, to clarify the distribution of TN characteristic bands corresponding to the sample subset, the correlation between the soil TN content and the FDR data was examined in this study, and the spectral bands with the absolute Pearson correlation coefficient greater than 0.5 were used as the characteristic bands. The TN characteristic bands of different sample sets are shown in <xref ref-type="fig" rid="F13">Figure 13</xref>. It can be seen that characteristic bands based on the whole sample are mainly distributed in the range of 450&#x2013;850&#xa0;nm. After dividing samples according to different grouping strategies, the number and distribution range of the characteristic bands are more abundant. When using the ST-G strategy, characteristic bands show wider distribution ranges, mainly in 450&#x2013;850&#xa0;nm, 1,600&#x2013;1750&#xa0;nm, and 1950&#x2013;2,150&#xa0;nm. When using the Slp-G strategy, the number of characteristic bands of three sample sets increases (the added characteristic bands are mainly distributed in the range of 1950&#x2013;2,150&#xa0;nm and 2,300&#x2013;2,450&#xa0;nm). After grouping with the BLT-G strategy, the variation of the soil TN characteristic bands is more significant, with increasing characteristic spectral bands in the ranges of 1,600&#x2013;1750&#xa0;nm and 1950&#x2013;2,450&#xa0;nm as the thickness increases. Therefore, more abundant spectral information can be obtained for the data characteristics of various sample sets after dividing samples with different strategies. It is the key reason for the improved accuracy of local regression estimation.</p>
<fig id="F13" position="float">
<label>FIGURE 13</label>
<caption>
<p>Distribution of TN characteristic bands for different sample sets (The vertical coordinates represent the whole sample and the different sample sets divided according to the set grouping strategy).</p>
</caption>
<graphic xlink:href="fenvs-12-1401107-g013.tif"/>
</fig>
</sec>
<sec id="s4-3">
<title>4.3 Uncertainty analysis of soil TN content estimation and mapping using hyperspectral images</title>
<p>When mapping the distribution of soil TN content based on hyperspectral images, the accuracy of estimation and mapping is influenced by potential factors such as the imaging endmember spectra, the laboratory measurement process, and geographic environmental differences. During hyperspectral image acquisition, differences in radiation intensity and meteorological conditions can result in different endmember spectral reflections, thus affecting the accuracy of estimation and mapping (<xref ref-type="bibr" rid="B22">Li XP. et al., 2019</xref>). In this study, we ensure that the sampling time of the soil samples is consistent with the satellite detection time, the sampling points are within the pure bare soil pixels, and strict image preprocessing is performed to reduce the influence of spectral information. In addition, the soil samples used in this study were measured under the same environments, and the influence of the laboratory measurement process can be ignored. The difference in geographical environment will alter the soil nitrogen content and its distribution to different degrees, thus affecting the accuracy of TN content estimation (<xref ref-type="bibr" rid="B64">Zhong et al., 2019</xref>; <xref ref-type="bibr" rid="B47">Wang et al., 2021</xref>; <xref ref-type="bibr" rid="B4">Dai et al., 2022</xref>). Aiming at the influence of soil environmental factors on the estimation accuracy, a local regression estimation model based on sample grouping was established, which weakens the differences in soil TN content among regions, thus reducing the influence of soil environmental heterogeneity on model training and estimation, and effectively improving the estimation accuracy of soil TN content.</p>
<p>Because soils exist in relatively complex environments over long periods, the distribution of soil nitrogen is influenced by the coupling of various soil environmental factors such as slope, elevation, and the thickness of the black soil layer. Restricted by the number of sampling points and the sample distribution density, this study establishes a local regression model to estimate soil TN content based on the difference of a single environmental factor, which has the problems of low inversion accuracy and poor transferability. Sample splitting by considering multiple factors at the same time can result in an insufficient number of samples, abruptly increasing the modeling difficulty and even reducing the estimation accuracy of soil TN content. Therefore, future research will attempt to increase the sampling density, supplement sample sets, introduce more soil environmental information, and thoroughly analyze the relationship between multiple soil environmental factors and soil nitrogen distribution, and establish a soil TN content estimation model that considers the heterogeneity of multiple environmental factors. Through these attempts, the accuracy and transferability of the estimation model will be strengthened, thus facilitating the rapid and large-scale estimation of soil TN content.</p>
</sec>
</sec>
<sec sec-type="conclusion" id="s5">
<title>5 Conclusion</title>
<p>When hyperspectral remote sensing images are used to construct a model for estimating soil TN content, the accuracy of the model is influenced by image quality and sample characteristics. In this study, based on ZY1-02D hyperspectral remote sensing image and soil environmental data, we constructed a local regression estimation model of nitrogen content taking into account the heterogeneity of soil environment, which effectively improved the accuracy and stability of the soil TN content estimation model. After analyzing the correlation between soil TN content and environmental parameters, three strategies for grouping soil samples were established by dividing the samples according to differences in soil types, thicknesses of the black soil layer, and slope grade, which effectively highlight the characteristic information of each sample subset, weaken the influence of soil TN content difference between regions on the accuracy of the estimation model, and reduce the possibility that the accuracy of the estimation model falling into the local optimum due to soil environmental heterogeneity. In the study, the optimal wavelength variables for local regression according to the data characteristics of each sample subset, which enrich the spectral feature information of the modeled samples, and effectively solve the problems of poor generalization ability and poor robustness faced by the traditional global regression estimation model. By comparing the accuracy indices of each estimation model, the estimation performance of the local regression model constructed according to the ST-G strategy and the CatBoost algorithm is better than that of the global regression model and other local regression models, with a validation set RMSE of 0.238 and <inline-formula id="inf23">
<mml:math id="m27">
<mml:mrow>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> of 0.839. Based on the model estimation results, the distribution of the TN content in the study area has been successfully plotted. This work provides a new research paradigm for the accurate estimation and mapping of soil component content in large regions.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="sec" rid="s11">Supplementary Material</xref>, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec id="s7">
<title>Author contributions</title>
<p>NL: Conceptualization, Data curation, Formal Analysis, Funding acquisition, Investigation, Project administration, Resources, Supervision, Validation, Writing&#x2013;original draft, Writing&#x2013;review and editing. XM: Visualization, Writing&#x2013;original draft, Writing&#x2013;review and editing. JL: Data curation, Resources, Writing&#x2013;original draft. RJ: Resources, Software, Validation, Writing&#x2013;original draft. MW: Data curation, Investigation, Resources, Writing&#x2013;original draft. WZ: Validation, Writing&#x2013;original draft, Formal Analysis.</p>
</sec>
<sec sec-type="funding-information" id="s8">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research, authorship, and/or publication of this article. This research was funded by the Science and Technology Development Project of Jilin Province (20210203016SF), the Natural Science Foundation of Jilin Province (20230101373JC), the National Natural Science Foundation of China (52178042), and the Key Scientific Research Project of the Education Department of Jilin Province (JJKH20200280KJ).</p>
</sec>
<ack>
<p>The authors would like to thank the China Centre for Resources Satellite Data and Application for providing ZY1-02D data. We are most grateful to the reviewers and editors for their valuable comments and recommendations.</p>
</ack>
<sec sec-type="COI-statement" id="s9">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s10">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec id="s11">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fenvs.2024.1401107/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fenvs.2024.1401107/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="DataSheet1.docx" id="SM1" mimetype="application/docx" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chatterjee</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Hartemink</surname>
<given-names>A. E.</given-names>
</name>
<name>
<surname>Triantafilis</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Desai</surname>
<given-names>A. R.</given-names>
</name>
<name>
<surname>Soldat</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Characterization of field-scale soil variation using a stepwise multi-sensor fusion approach and a cost-benefit analysis</article-title>. <source>Catena</source> <volume>201</volume>, <fpage>105190</fpage>. <pub-id pub-id-type="doi">10.1016/j.catena.2021.105190</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>Y. Y.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>R. Y.</given-names>
</name>
<name>
<surname>Qi</surname>
<given-names>T. C.</given-names>
</name>
<name>
<surname>Qi</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Constructing representative calibration dataset based on spectral transformation and kennard-stone algorithm for VNIR modeling of soil total nitrogen in paddy soil</article-title>. <source>Spectrosc. Spectr. Analysis</source> <volume>37</volume> (<issue>7</issue>), <fpage>2133</fpage>&#x2013;<lpage>2139</lpage>. <pub-id pub-id-type="doi">10.3964/j.issn.1000-0593(2017)07-2133-07</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cheng</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Du</surname>
<given-names>Y. K.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Combining multivariate method and spectral variable selection for soil total nitrogen estimation by Vis-NIR spectroscopy</article-title>. <source>Archives Agron. Soil Sci.</source> <volume>67</volume> (<issue>12</issue>), <fpage>1665</fpage>&#x2013;<lpage>1678</lpage>. <pub-id pub-id-type="doi">10.1080/03650340.2020.1802013</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dai</surname>
<given-names>L. J.</given-names>
</name>
<name>
<surname>Ge</surname>
<given-names>J. S.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>L. Q.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Liang</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Bolan</surname>
<given-names>N.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Influence of soil properties, topography, and land cover on soil organic carbon and total nitrogen concentration: a case study in Qinghai-Tibet plateau based on random forest regression and structural equation modeling</article-title>. <source>Sci. Total Environ.</source> <volume>821</volume>, <fpage>153440</fpage>. <pub-id pub-id-type="doi">10.1016/j.scitotenv.2022.153440</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Deng</surname>
<given-names>X. F.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>W. Z.</given-names>
</name>
<name>
<surname>Ren</surname>
<given-names>Z. Q.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>M. H.</given-names>
</name>
<name>
<surname>Grieneisen</surname>
<given-names>M. L.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>X. J.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Spatial and temporal trends of soil total nitrogen and C/N ratio for croplands of East China</article-title>. <source>Geoderma</source> <volume>361</volume>, <fpage>114035</fpage>. <pub-id pub-id-type="doi">10.1016/j.geoderma.2019.114035</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Du</surname>
<given-names>C. W.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>J. M.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Evaluation of soil fertility using infrared spectroscopy: a review</article-title>. <source>Environ. Chem. Lett.</source> <volume>7</volume> (<issue>2</issue>), <fpage>97</fpage>&#x2013;<lpage>113</lpage>. <pub-id pub-id-type="doi">10.1007/s10311-008-0166-x</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Gao</surname>
<given-names>X. H.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Jia</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J. S.</given-names>
</name>
<name>
<surname>Tian</surname>
<given-names>C. M.</given-names>
</name>
<etal/>
</person-group> (<year>2014</year>). &#x201c;<article-title>Visible-near infrared reflectance spectroscopy for estimating soil total nitrogen contents in the Sanjiang Yuan Regions, China -A case study of Yushu county and Maduo county,Qinghai province</article-title>&#x201d;, in <conf-name>Multispectral, Hyperspectral, and Ultraspectral Remote Sensing Technology, Techniques and Applications V.</conf-name> (<publisher-loc>Beijing, China</publisher-loc>: <publisher-name>Spie-Int Soc Optical Engineering</publisher-name>), <volume>Vol. 9263</volume>, <fpage>295</fpage>&#x2013;<lpage>306</lpage>. <pub-id pub-id-type="doi">10.1117/12.2069107</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ge</surname>
<given-names>N. N.</given-names>
</name>
<name>
<surname>Wei</surname>
<given-names>X. R.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>X. T.</given-names>
</name>
<name>
<surname>Shao</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Jia</surname>
<given-names>X. X.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Soil texture determines the distribution of aggregate-associated carbon, nitrogen and phosphorous under two contrasting land use types in the Loess Plateau</article-title>. <source>Catena</source> <volume>172</volume>, <fpage>148</fpage>&#x2013;<lpage>157</lpage>. <pub-id pub-id-type="doi">10.1016/j.catena.2018.08.021</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Grunwald</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Vasques</surname>
<given-names>G. M.</given-names>
</name>
<name>
<surname>Rivero</surname>
<given-names>R. G.</given-names>
</name>
</person-group> (<year>2015</year>). <source>Advances in agronomy</source>. <publisher-loc>Delaware, United States</publisher-loc>: <publisher-name>University of Delaware, Newark</publisher-name>.</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gu</surname>
<given-names>Z. J.</given-names>
</name>
<name>
<surname>Xie</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Ren</surname>
<given-names>X. Y.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>C. C.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>S. C.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Quantitative assessment of soil productivity and predicted impacts of water erosion in the black soil region of northeastern China</article-title>. <source>Sci. Total Environ.</source> <volume>637</volume>, <fpage>706</fpage>&#x2013;<lpage>716</lpage>. <pub-id pub-id-type="doi">10.1016/j.scitotenv.2018.05.061</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guo</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>X. W.</given-names>
</name>
<name>
<surname>Cui</surname>
<given-names>Y. F.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Y. R.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Evaluating calibration and spectral variable selection methods for predicting three soil nutrients using vis-NIR spectroscopy</article-title>. <source>Remote Sens.</source> <volume>13</volume> (<issue>19</issue>), <fpage>4000</fpage>. <pub-id pub-id-type="doi">10.3390/rs13194000</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hancock</surname>
<given-names>J. T.</given-names>
</name>
<name>
<surname>Khoshgoftaar</surname>
<given-names>T. M.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>CatBoost for big data: an interdisciplinary review</article-title>. <source>J. Big Data</source> <volume>7</volume> (<issue>1</issue>), <fpage>94</fpage>. <pub-id pub-id-type="doi">10.1186/s40537-020-00369-8</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hong</surname>
<given-names>Y. S.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y. Y.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Y. F.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Application of fractional-order derivative in the quantitative estimation of soil organic matter content through visible and near-infrared spectroscopy</article-title>. <source>Geoderma</source> <volume>337</volume>, <fpage>758</fpage>&#x2013;<lpage>769</lpage>. <pub-id pub-id-type="doi">10.1016/j.geoderma.2018.10.025</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname>
<given-names>G. M.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>L. F.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>W. Q.</given-names>
</name>
<name>
<surname>Fan</surname>
<given-names>J. L.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>X.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>Evaluation of CatBoost method for prediction of reference evapotranspiration in humid regions</article-title>. <source>J. Hydrology</source> <volume>574</volume>, <fpage>1029</fpage>&#x2013;<lpage>1041</lpage>. <pub-id pub-id-type="doi">10.1016/j.jhydrol.2019.04.085</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jia</surname>
<given-names>S. Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>H. Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y. J.</given-names>
</name>
<name>
<surname>Tong</surname>
<given-names>R. Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Q.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Hyperspectral imaging analysis for the classification of soil types and the determination of soil total nitrogen</article-title>. <source>Sensors</source> <volume>17</volume> (<issue>10</issue>), <fpage>2252</fpage>. <pub-id pub-id-type="doi">10.3390/s17102252</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jiang</surname>
<given-names>C. L.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>J. Y.</given-names>
</name>
<name>
<surname>Ding</surname>
<given-names>Y. Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>G. R.</given-names>
</name>
</person-group> (<year>2023a</year>). <article-title>Vis-NIR spectroscopy combined with gan data augmentation for predicting soil nutrients in degraded alpine meadows on the qinghai-tibet plateau</article-title>. <source>Sensors</source> <volume>23</volume> (<issue>7</issue>), <fpage>3686</fpage>. <pub-id pub-id-type="doi">10.3390/s23073686</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jiang</surname>
<given-names>C. L.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>J. Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>G. R.</given-names>
</name>
</person-group> (<year>2023b</year>). <article-title>Integration of vis-NIR spectroscopy and machine learning techniques to predict eight soil parameters in alpine regions</article-title>. <source>Agronomy-Basel</source> <volume>13</volume> (<issue>11</issue>), <fpage>2816</fpage>. <pub-id pub-id-type="doi">10.3390/agronomy13112816</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kawamura</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Tsujimoto</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Rabenarivo</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Asai</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Andriamananjara</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Rakotoson</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Vis-NIR spectroscopy and PLS regression with waveband selection for estimating the total C and N of paddy soils in Madagascar</article-title>. <source>Remote Sens.</source> <volume>9</volume> (<issue>10</issue>), <fpage>1081</fpage>. <pub-id pub-id-type="doi">10.3390/rs9101081</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Acquah</surname>
<given-names>G. E.</given-names>
</name>
<name>
<surname>Yuan</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Combining variable selection and multiple linear regression for soil organic matter and total nitrogen estimation by DRIFT-MIR spectroscopy</article-title>. <source>Agronomy-Basel</source> <volume>12</volume> (<issue>3</issue>), <fpage>638</fpage>. <pub-id pub-id-type="doi">10.3390/agronomy12030638</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>H. D.</given-names>
</name>
<name>
<surname>Liang</surname>
<given-names>Y. Z.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>Q. S.</given-names>
</name>
<name>
<surname>Cao</surname>
<given-names>D. S.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Key wavelengths screening using competitive adaptive reweighted sampling method for multivariate calibration</article-title>. <source>Anal. Chim. Acta</source> <volume>648</volume> (<issue>1</issue>), <fpage>77</fpage>&#x2013;<lpage>84</lpage>. <pub-id pub-id-type="doi">10.1016/j.aca.2009.06.046</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>H. Y.</given-names>
</name>
<name>
<surname>Jia</surname>
<given-names>S. Y.</given-names>
</name>
<name>
<surname>Le</surname>
<given-names>Z. C.</given-names>
</name>
</person-group> (<year>2019a</year>). <article-title>Quantitative analysis of soil total nitrogen using hyperspectral imaging Technology with extreme learning machine</article-title>. <source>Sensors</source> <volume>19</volume> (<issue>20</issue>), <fpage>4355</fpage>. <pub-id pub-id-type="doi">10.3390/s19204355</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>X. P.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X. P.</given-names>
</name>
</person-group> (<year>2019b</year>). <article-title>Study on differential-based multispectral modeling of soil organic matter in ebinur lake wetland</article-title>. <source>Spectrosc. Spectr. Analysis</source> <volume>39</volume> (<issue>2</issue>), <fpage>535</fpage>&#x2013;<lpage>542</lpage>. <pub-id pub-id-type="doi">10.3964/j.issn.1000-0593(2019)02-0535-08</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>X. Y.</given-names>
</name>
<name>
<surname>Fan</surname>
<given-names>P. P.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>L&#xfc;</surname>
<given-names>M. R.</given-names>
</name>
</person-group> (<year>2019c</year>). <article-title>Extracting characteristic wavelength of soil nutrients based on multi-classifier fusion</article-title>. <source>Spectrosc. Spectr. Analysis</source> <volume>39</volume> (<issue>9</issue>), <fpage>2862</fpage>&#x2013;<lpage>2867</lpage>. <pub-id pub-id-type="doi">10.3964/j.issn.1000-0593(2019)09-2862-06</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y. F.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X. D.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>Z. P.</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>Y. H.</given-names>
</name>
<name>
<surname>Di</surname>
<given-names>H. G.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>Characteristic bands extraction method and prediction of soil nutrient contents based on an analytic hierarchy process</article-title>. <source>Measurement</source> <volume>220</volume>, <fpage>113408</fpage>. <pub-id pub-id-type="doi">10.1016/j.measurement.2023.113408</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>Z. F.</given-names>
</name>
<name>
<surname>Lei</surname>
<given-names>H. C.</given-names>
</name>
<name>
<surname>Lei</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Sheng</surname>
<given-names>H. Y.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Spatial prediction of total nitrogen in soil surface layer based on machine learning</article-title>. <source>Sustainability</source> <volume>14</volume> (<issue>19</issue>), <fpage>11998</fpage>. <pub-id pub-id-type="doi">10.3390/su141911998</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lori</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Symanczik</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Maeder</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Efosa</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Jaenicke</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Buegger</surname>
<given-names>F.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>Distinct nitrogen provisioning from organic amendments in soil as influenced by farming system and water regime</article-title>. <source>Front. Environ. Sci.</source> <volume>6</volume>. <pub-id pub-id-type="doi">10.3389/fenvs.2018.00040</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Qiao</surname>
<given-names>D. Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Y. X.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Deng</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Fusion of China ZY-1 02D hyperspectral data and multispectral data: which methods should Be used?</article-title> <source>Remote Sens.</source> <volume>13</volume> (<issue>12</issue>), <fpage>2354</fpage>. <pub-id pub-id-type="doi">10.3390/rs13122354</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Marty</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Houle</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Gagnon</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Courchesne</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>The relationships of soil total nitrogen concentrations, pools and C:N ratios with climate, vegetation types and nitrate deposition in temperate and boreal forests of eastern Canada</article-title>. <source>Catena</source> <volume>152</volume>, <fpage>163</fpage>&#x2013;<lpage>172</lpage>. <pub-id pub-id-type="doi">10.1016/j.catena.2017.01.014</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mendes</surname>
<given-names>W. D.</given-names>
</name>
<name>
<surname>Sommer</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Koszinski</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Wehrhan</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Peatlands spectral data influence in global spectral modelling of soil organic carbon and total nitrogen using visible-near-infrared spectroscopy</article-title>. <source>J. Environ. Manag.</source> <volume>317</volume>, <fpage>115383</fpage>. <pub-id pub-id-type="doi">10.1016/j.jenvman.2022.115383</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Minu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Shetty</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Gomez</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Hybrid atmospheric correction algorithms and evaluation on VNIR/SWIR Hyperion satellite data for soil organic carbon prediction</article-title>. <source>Int. J. Remote Sens.</source> <volume>39</volume> (<issue>22</issue>), <fpage>8246</fpage>&#x2013;<lpage>8270</lpage>. <pub-id pub-id-type="doi">10.1080/01431161.2018.1483087</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Minu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Shetty</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Minasny</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Gomez</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>The role of atmospheric correction algorithms in the prediction of soil organic carbon from Hyperion data</article-title>. <source>Int. J. Remote Sens.</source> <volume>38</volume> (<issue>23</issue>), <fpage>6435</fpage>&#x2013;<lpage>6456</lpage>. <pub-id pub-id-type="doi">10.1080/01431161.2017.1354265</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Niu</surname>
<given-names>J. C.</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>H. Z.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>L. N.</given-names>
</name>
<name>
<surname>Sang</surname>
<given-names>L. L.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Determinants of soil bacterial diversity in a black soil region in a large-scale area</article-title>. <source>Land</source> <volume>11</volume> (<issue>5</issue>), <fpage>731</fpage>. <pub-id pub-id-type="doi">10.3390/land11050731</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pan</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>X. L.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>H. J.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>D. Q.</given-names>
</name>
<name>
<surname>Dou</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>M. Y.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Remote sensing inversion of soil organic matter by using the subregion method at the field scale</article-title>. <source>Precis. Agric.</source> <volume>23</volume> (<issue>5</issue>), <fpage>1813</fpage>&#x2013;<lpage>1835</lpage>. <pub-id pub-id-type="doi">10.1007/s11119-022-09914-2</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Peng</surname>
<given-names>Y. P.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Z. H.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>C. J.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>Y. M.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Estimation of soil nutrient content using hyperspectral data</article-title>. <source>Agriculture-Basel</source> <volume>11</volume> (<issue>11</issue>), <fpage>1129</fpage>. <pub-id pub-id-type="doi">10.3390/agriculture11111129</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Peng</surname>
<given-names>Y. P.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>Y. M.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>G. X.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Z. H.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Prediction of soil nutrient contents using visible and near-infrared reflectance spectroscopy</article-title>. <source>Isprs Int. J. Geo-Information</source> <volume>8</volume> (<issue>10</issue>), <fpage>437</fpage>. <pub-id pub-id-type="doi">10.3390/ijgi8100437</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pennock</surname>
<given-names>D. J.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>Precision conservation for co-management of carbon and nitrogen on the Canadian prairies</article-title>. <source>J. Soil Water Conservation</source> <volume>60</volume> (<issue>6</issue>), <fpage>396</fpage>&#x2013;<lpage>401</lpage>.</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shen</surname>
<given-names>L. Z.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>M. F.</given-names>
</name>
<name>
<surname>Yan</surname>
<given-names>J. W.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Z. L.</given-names>
</name>
<name>
<surname>Leng</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Q.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Hyperspectral estimation of soil organic matter content using different spectral preprocessing techniques and PLSR method</article-title>. <source>Remote Sens.</source> <volume>12</volume> (<issue>7</issue>), <fpage>1206</fpage>. <pub-id pub-id-type="doi">10.3390/rs12071206</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sinfield</surname>
<given-names>J. V.</given-names>
</name>
<name>
<surname>Fagerman</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Colic</surname>
<given-names>O.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Evaluation of sensing technologies for on-the-go detection of macro-nutrients in cultivated soils</article-title>. <source>Comput. Electron. Agric.</source> <volume>70</volume> (<issue>1</issue>), <fpage>1</fpage>&#x2013;<lpage>18</lpage>. <pub-id pub-id-type="doi">10.1016/j.compag.2009.09.017</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Stenberg</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Rossel</surname>
<given-names>R. A. V.</given-names>
</name>
</person-group> (<year>2010</year>). <source>Diffuse reflectance spectroscopy for high-resolution soil sensing</source>. <publisher-loc>Sydney, Australia</publisher-loc>: <publisher-name>Univ Sydney Fac Agr, Food and Nat Resources</publisher-name>.</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Taghizadeh-Mehrjardi</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Minasny</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Sarmadian</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Malone</surname>
<given-names>B. P.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Digital mapping of soil salinity in Ardakan region, central Iran</article-title>. <source>Geoderma</source> <volume>213</volume>, <fpage>15</fpage>&#x2013;<lpage>28</lpage>. <pub-id pub-id-type="doi">10.1016/j.geoderma.2013.07.020</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tesfaye</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Bravo</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Ruiz-Peinado</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Pando</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Bravo-Oviedo</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Impact of changes in land use, species and elevation on soil organic carbon and total nitrogen in Ethiopian Central Highlands</article-title>. <source>Geoderma</source> <volume>261</volume>, <fpage>70</fpage>&#x2013;<lpage>79</lpage>. <pub-id pub-id-type="doi">10.1016/j.geoderma.2015.06.022</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Van Waes</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Mestdagh</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Lootens</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Carlier</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>Possibilities of near infrared reflectance spectroscopy for the prediction of organic carbon concentrations in grassland soils</article-title>. <source>J. Agric. Sci.</source> <volume>143</volume>, <fpage>487</fpage>&#x2013;<lpage>492</lpage>. <pub-id pub-id-type="doi">10.1017/s0021859605005630</pub-id>
</citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Vibhute</surname>
<given-names>A. D.</given-names>
</name>
<name>
<surname>Kale</surname>
<given-names>K. V.</given-names>
</name>
<name>
<surname>Gaikwad</surname>
<given-names>S. V.</given-names>
</name>
<name>
<surname>Dhumal</surname>
<given-names>R. K.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Estimation of soil nitrogen in agricultural regions by VNIR reflectance spectroscopy</article-title>. <source>Sn Appl. Sci.</source> <volume>2</volume> (<issue>9</issue>), <fpage>1523</fpage>. <pub-id pub-id-type="doi">10.1007/s42452-020-03322-9</pub-id>
</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Shi</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Si</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2022a</year>). <article-title>Transfer-learning-based cloud detection for Zhuhai-1 satellite hyperspectral imagery</article-title>. <source>Front. Environ. Sci.</source> <volume>10</volume>. <pub-id pub-id-type="doi">10.3389/fenvs.2022.1039249</pub-id>
</citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>W. C.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Cui</surname>
<given-names>Y. L.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>M. Z.</given-names>
</name>
</person-group> (<year>2022b</year>). <article-title>Development and performance test of a vehicle-mounted total nitrogen content prediction system based on the fusion of near-infrared spectroscopy and image information</article-title>. <source>Comput. Electron. Agric.</source>, <volume>192</volume>, <fpage>106613</fpage>. <pub-id pub-id-type="doi">10.1016/j.compag.2021.106613</pub-id>
</citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>B.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>Effect of slope shape on soil aggregate stability of slope farmland in black soil region</article-title>. <source>Front. Environ. Sci.</source> <volume>11</volume>. <pub-id pub-id-type="doi">10.3389/fenvs.2023.1127043</pub-id>
</citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>Z. G.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>G. C.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>G. H.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>H. B.</given-names>
</name>
<name>
<surname>Ren</surname>
<given-names>T. Y.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Effects of land use types and environmental factors on spatial distribution of soil total nitrogen in a coalfield on the Loess Plateau, China</article-title>. <source>Soil and Tillage Res.</source>, <volume>211</volume>, <fpage>105027</fpage>. <pub-id pub-id-type="doi">10.1016/j.still.2021.105027</pub-id>
</citation>
</ref>
<ref id="B48">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wilding</surname>
<given-names>L. P.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>Advancing the frontiers of soil science towards a geoscience</article-title>. <source>Geoderma</source> <volume>131</volume> (<issue>3-4</issue>), <fpage>257</fpage>&#x2013;<lpage>274</lpage>. <pub-id pub-id-type="doi">10.1016/j.geoderma.2005.03.028</pub-id>
</citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Qiao</surname>
<given-names>S. S.</given-names>
</name>
<name>
<surname>Peng</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>X. Y.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Coupling loss characteristics of runoff-sediment-adsorbed and dissolved nitrogen and phosphorus on bare loess slope</article-title>. <source>Environ. Sci. Pollut. Res.</source> <volume>25</volume> (<issue>14</issue>), <fpage>14018</fpage>&#x2013;<lpage>14031</lpage>. <pub-id pub-id-type="doi">10.1007/s11356-018-1619-9</pub-id>
</citation>
</ref>
<ref id="B50">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xie</surname>
<given-names>S. G.</given-names>
</name>
<name>
<surname>Ding</surname>
<given-names>F. J.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>S. G.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Y. H.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Prediction of soil organic matter content based on characteristic band selection method</article-title>. <source>Spectrochimica Acta Part a-Molecular Biomol. Spectrosc.</source> <volume>273</volume>, <fpage>120949</fpage>. <pub-id pub-id-type="doi">10.1016/j.saa.2022.120949</pub-id>
</citation>
</ref>
<ref id="B51">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname>
<given-names>Y. M.</given-names>
</name>
<name>
<surname>Tan</surname>
<given-names>Y. Q.</given-names>
</name>
<name>
<surname>Abd-Elrahman</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Fan</surname>
<given-names>T. F.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Q. P.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Incorporation of fused remote sensing imagery to enhance soil organic carbon spatial prediction in an agricultural area in yellow river basin, China</article-title>. <source>Remote Sens.</source> <volume>15</volume> (<issue>8</issue>), <fpage>2017</fpage>. <pub-id pub-id-type="doi">10.3390/rs15082017</pub-id>
</citation>
</ref>
<ref id="B52">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Kuang</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Mouazen</surname>
<given-names>A. M.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Quantitative analysis of soil nitrogen and carbon at a farm scale using visible and near infrared spectroscopy coupled with wavelength reduction</article-title>. <source>Eur. J. Soil Sci.</source> <volume>63</volume> (<issue>3</issue>), <fpage>410</fpage>&#x2013;<lpage>420</lpage>. <pub-id pub-id-type="doi">10.1111/j.1365-2389.2012.01443.x</pub-id>
</citation>
</ref>
<ref id="B53">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Liang</surname>
<given-names>D. Y.</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>H. T.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Study on ground object classification based on the hyperspectral fusion images of ZY-1(02D) satellite</article-title>. <source>J. Appl. Remote Sens.</source> <volume>15</volume> (<issue>4</issue>). <pub-id pub-id-type="doi">10.1117/1.jrs.15.042603</pub-id>
</citation>
</ref>
<ref id="B54">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yu</surname>
<given-names>J. X.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>W. A.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>L. L.</given-names>
</name>
<name>
<surname>Meng</surname>
<given-names>F. Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhangzhong</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>TPE-CatBoost: an adaptive model for soil moisture spatial estimation in the main maize-producing areas of China with multiple environment covariates</article-title>. <source>J. Hydrology</source> <volume>613</volume>, <fpage>128465</fpage>. <pub-id pub-id-type="doi">10.1016/j.jhydrol.2022.128465</pub-id>
</citation>
</ref>
<ref id="B55">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yumiti</surname>
<given-names>M. M.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X. M.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Hyperspectral estimation of soil organic matter content based on continuous wavelet transformation</article-title>. <source>Spectrosc. Spectr. Analysis</source> <volume>42</volume> (<issue>4</issue>), <fpage>1278</fpage>&#x2013;<lpage>1284</lpage>. <pub-id pub-id-type="doi">10.3964/j.issn.1000-0593(2022)04-1278-07</pub-id>
</citation>
</ref>
<ref id="B56">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>G. B.</given-names>
</name>
<name>
<surname>Xue</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>C. L.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Soil organic carbon and total nitrogen storage as affected by land use in a small watershed of the Loess Plateau, China</article-title>. <source>Eur. J. Soil Biol.</source> <volume>54</volume>, <fpage>16</fpage>&#x2013;<lpage>24</lpage>. <pub-id pub-id-type="doi">10.1016/j.ejsobi.2012.10.007</pub-id>
</citation>
</ref>
<ref id="B57">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>H. L.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Measurement of soil total N based on portable short wave NIR spectroscopy Technology</article-title>. <source>Spectrosc. Spectr. Analysis</source> <volume>36</volume> (<issue>1</issue>), <fpage>91</fpage>&#x2013;<lpage>95</lpage>. <pub-id pub-id-type="doi">10.3964/j.issn.1000-0593(2016)01-0091-05</pub-id>
</citation>
</ref>
<ref id="B58">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>L. Y.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>Z. F.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>X. M.</given-names>
</name>
<name>
<surname>Yan</surname>
<given-names>J. Y.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>Y. Q.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>P. J.</given-names>
</name>
<etal/>
</person-group> (<year>2023a</year>). <article-title>Mapping topsoil total nitrogen using random forest and modified regression kriging in agricultural areas of Central China</article-title>. <source>Plants-Basel</source> <volume>12</volume> (<issue>7</issue>), <fpage>1464</fpage>. <pub-id pub-id-type="doi">10.3390/plants12071464</pub-id>
</citation>
</ref>
<ref id="B59">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>R. R.</given-names>
</name>
<name>
<surname>Cui</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>W. E.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>D. J.</given-names>
</name>
<name>
<surname>Dai</surname>
<given-names>W. H.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>H. L.</given-names>
</name>
<etal/>
</person-group> (<year>2023b</year>). <article-title>Estimation of the total soil nitrogen based on a differential evolution algorithm from ZY1-02D hyperspectral satellite imagery</article-title>. <source>Agronomy-Basel</source> <volume>13</volume> (<issue>7</issue>), <fpage>1842</fpage>. <pub-id pub-id-type="doi">10.3390/agronomy13071842</pub-id>
</citation>
</ref>
<ref id="B60">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>X. L.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>X. N.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>J. Y.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>C. F.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Study on the determination of total nitrogen (TN) in different types of soil by near-infrared spectroscopy (NIS)</article-title>. <source>Spectrosc. Spectr. Analysis</source> <volume>30</volume> (<issue>4</issue>), <fpage>906</fpage>&#x2013;<lpage>910</lpage>. <pub-id pub-id-type="doi">10.3964/j.issn.1000-0593(2010)04-0906-05</pub-id>
</citation>
</ref>
<ref id="B61">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>Z. P.</given-names>
</name>
<name>
<surname>Ding</surname>
<given-names>J. L.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>C. M.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J. Z.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Combination of efficient signal pre-processing and optimal band combination algorithm to predict soil organic matter through visible and near-infrared spectra</article-title>. <source>Spectrochimica Acta Part a-Molecular Biomol. Spectrosc.</source> <volume>240</volume>, <fpage>118553</fpage>. <pub-id pub-id-type="doi">10.1016/j.saa.2020.118553</pub-id>
</citation>
</ref>
<ref id="B62">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname>
<given-names>M. S.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>Y. F.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>Y. Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>S. H.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Hyperspectral modeling of soil organic matter based on characteristic wavelength in East China</article-title>. <source>Sustainability</source> <volume>14</volume> (<issue>14</issue>), <fpage>8455</fpage>. <pub-id pub-id-type="doi">10.3390/su14148455</pub-id>
</citation>
</ref>
<ref id="B63">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zheng</surname>
<given-names>K. Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Q. Q.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J. J.</given-names>
</name>
<name>
<surname>Geng</surname>
<given-names>J. P.</given-names>
</name>
<name>
<surname>Cao</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Sui</surname>
<given-names>T.</given-names>
</name>
<etal/>
</person-group> (<year>2012</year>). <article-title>Stability competitive adaptive reweighted sampling (SCARS) and its applications to multivariate calibration of NIR spectra</article-title>. <source>Chemom. Intelligent Laboratory Syst.</source> <volume>112</volume>, <fpage>48</fpage>&#x2013;<lpage>54</lpage>. <pub-id pub-id-type="doi">10.1016/j.chemolab.2012.01.002</pub-id>
</citation>
</ref>
<ref id="B64">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhong</surname>
<given-names>Q. M.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>S. R.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>H. L.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>X. X.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). <article-title>The influence of climate, topography, parent material and vegetation on soil nitrogen fractions</article-title>. <source>Catena</source> <volume>175</volume>, <fpage>329</fpage>&#x2013;<lpage>338</lpage>. <pub-id pub-id-type="doi">10.1016/j.catena.2018.12.027</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>