<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Plant Sci.</journal-id>
<journal-title>Frontiers in Plant Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Plant Sci.</abbrev-journal-title>
<issn pub-type="epub">1664-462X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpls.2022.1090970</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Plant Science</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Wheat yield estimation using remote sensing data based on machine learning approaches</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Cheng</surname>
<given-names>Enhui</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2085878"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhang</surname>
<given-names>Bing</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Peng</surname>
<given-names>Dailiang</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1161522"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhong</surname>
<given-names>Liheng</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Yu</surname>
<given-names>Le</given-names>
</name>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/992076"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Liu</surname>
<given-names>Yao</given-names>
</name>
<xref ref-type="aff" rid="aff6">
<sup>6</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Xiao</surname>
<given-names>Chenchao</given-names>
</name>
<xref ref-type="aff" rid="aff6">
<sup>6</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Li</surname>
<given-names>Cunjun</given-names>
</name>
<xref ref-type="aff" rid="aff7">
<sup>7</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Li</surname>
<given-names>Xiaoyi</given-names>
</name>
<xref ref-type="aff" rid="aff8">
<sup>8</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Chen</surname>
<given-names>Yue</given-names>
</name>
<xref ref-type="aff" rid="aff8">
<sup>8</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Ye</surname>
<given-names>Huichun</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1259995"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Wang</surname>
<given-names>Hongye</given-names>
</name>
<xref ref-type="aff" rid="aff9">
<sup>9</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Yu</surname>
<given-names>Ruyi</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Hu</surname>
<given-names>Jinkang</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2061676"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Yang</surname>
<given-names>Songlin</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>1Key Laboratory of Digital Earth Science, Aerospace Information Research Institute, Chinese Academy of Sciences</institution>, <addr-line>Beijing</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>College of Resource and Environment, University of Chinese Academy of Sciences</institution>, <addr-line>Beijing</addr-line>, <country>China</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>International Research Center of Big Data for Sustainable Development Goals</institution>, <addr-line>Beijing</addr-line>, <country>China</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Ant Group</institution>, <addr-line>Beijing</addr-line>, <country>China</country>
</aff>
<aff id="aff5">
<sup>5</sup>
<institution>Ministry of Education Key Laboratory for Earth System Modeling, Department of Earth System Science, Institute for Global Change Studies, Tsinghua University</institution>, <addr-line>Beijing</addr-line>, <country>China</country>
</aff>
<aff id="aff6">
<sup>6</sup>
<institution>Land Satellite Remote Sensing Application Center, Ministry of Natural Resources of China</institution>, <addr-line>Beijing</addr-line>, <country>China</country>
</aff>
<aff id="aff7">
<sup>7</sup>
<institution>Information Technology Research Center, Beijing Academy of Agriculture and Forestry Sciences</institution>, <addr-line>Beijing</addr-line>, <country>China</country>
</aff>
<aff id="aff8">
<sup>8</sup>
<institution>Aerospace ShuWei High Tech. Co., Ltd.</institution>, <addr-line>Beijing</addr-line>, <country>China</country>
</aff>
<aff id="aff9">
<sup>9</sup>
<institution>Cultivated Land Quality Monitoring and Protection center, Ministry of Agriculture and Rural Affairs</institution>, <addr-line>Beijing</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Vanessa Martos N&#xfa;&#xf1;ez, University of Granada, Spain</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Linsheng Huang, Anhui University, China; Karansher Singh Sandhu, Bayer Crop Science, United States</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Dailiang Peng, <email xlink:href="mailto:pengdl@aircas.ac.cn">pengdl@aircas.ac.cn</email>
</p>
</fn>
<fn fn-type="other" id="fn002">
<p>This article was submitted to Technical Advances in Plant Science, a section of the journal Frontiers in Plant Science</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>23</day>
<month>12</month>
<year>2022</year>
</pub-date>
<pub-date pub-type="collection">
<year>2022</year>
</pub-date>
<volume>13</volume>
<elocation-id>1090970</elocation-id>
<history>
<date date-type="received">
<day>06</day>
<month>11</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>05</day>
<month>12</month>
<year>2022</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2022 Cheng, Zhang, Peng, Zhong, Yu, Liu, Xiao, Li, Li, Chen, Ye, Wang, Yu, Hu and Yang</copyright-statement>
<copyright-year>2022</copyright-year>
<copyright-holder>Cheng, Zhang, Peng, Zhong, Yu, Liu, Xiao, Li, Li, Chen, Ye, Wang, Yu, Hu and Yang</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Accurate predictions of wheat yields are essential to farmers&#x2019;production plans and to the international trade in wheat. However, only poor approximations of the productivity of wheat crops in China can be obtained using traditional linear regression models based on vegetation indices and observations of the yield. In this study, Sentinel-2 (multispectral data) and ZY-1 02D (hyperspectral data) were used together with 15709 gridded yield data (with a resolution of 5&#xa0;m &#xd7; 5&#xa0;m) to predict the winter wheat yield. These estimates were based on four mainstream data-driven approaches: Long Short-Term Memory (LSTM), Random Forest (RF), Gradient Boosting Decision Tree (GBDT), and Support Vector Regression (SVR). The method that gave the best estimate of the winter wheat yield was determined, and the accuracy of the estimates based on multispectral and hyperspectral data were compared. The results showed that the LSTM model, for which the RMSE of the estimates was 0.201 t/ha, performed better than the RF (RMSE = 0.260 t/ha), GBDT (RMSE = 0.306 t/ha), and SVR (RMSE = 0.489 t/ha) methods. The estimates based on the ZY-1 02D hyperspectral data were more accurate than those based on the 30-m Sentinel-2 data: RMSE = 0.237 t/ha for the ZY-1 02D data, which is about a 5% improvement on the RSME of 0.307 t/ha for the 30-m Sentinel-2 data. However, the 10-m Sentinel-2 data performed even better, giving an RMSE of 0.219 t/ha. In addition, it was found that the greenness vegetation index SR (simple ratio index) outperformed the traditional vegetation indices. The results highlight the potential of the shortwave infrared bands to replace the visible and near-infrared bands for predicting crop yields Our study demonstrates the advantages of the deep learning method LSTM over machine learning methods in terms of its ability to make accurate estimates of the winter wheat yield.</p>
</abstract>
<kwd-group>
<kwd>band selection</kwd>
<kwd>deep learning</kwd>
<kwd>google earth engine (GEE)</kwd>
<kwd>hyperspectral</kwd>
<kwd>winter wheat</kwd>
<kwd>yield estimation</kwd>
</kwd-group>
<contract-sponsor id="cn001">National Natural Science Foundation of China<named-content content-type="fundref-id">10.13039/501100001809</named-content>
</contract-sponsor>
<contract-sponsor id="cn002">National Natural Science Foundation of China<named-content content-type="fundref-id">10.13039/501100001809</named-content>
</contract-sponsor>
<counts>
<fig-count count="8"/>
<table-count count="3"/>
<equation-count count="6"/>
<ref-count count="72"/>
<page-count count="16"/>
<word-count count="8801"/>
</counts>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<title>1 Introduction</title>
<p>Wheat is one of the most important food crops in China and has the greatest cultivation area and total production among all cereal crops. It has been predicted that world&#x2019;s total wheat yield will increase by 17% by 2030 due to global warming (<xref ref-type="bibr" rid="B27">J&#xe4;germeyr et&#xa0;al., 2021</xref>). Therefore, using scientific methods to study the various parameters of wheat growth is very important to ensuring the stability of the country&#x2019;s wheat market (<xref ref-type="bibr" rid="B62">Weiss et&#xa0;al., 2020</xref>). Accurate forecasts of wheat production are of vital importance to farmers&#x2019; production plans, the international wheat trade, and import/export plans, and make a direct contribution to the development of China&#x2019;s wheat market, especially in the context of the COVID-19 pandemic (<xref ref-type="bibr" rid="B44">Mawani and Li, 2020</xref>).</p>
<p>Traditional agricultural yield forecasting methods mainly include agronomic forecasting methods (<xref ref-type="bibr" rid="B14">Feng and Wu, 2006</xref>), crop-growth models (<xref ref-type="bibr" rid="B58">Thorp et&#xa0;al., 2008</xref>), and meteorological statistical methods (<xref ref-type="bibr" rid="B3">Betbeder et&#xa0;al., 2016</xref>), and these are used to establish crop yield models based on different perspectives. However, these methods not only consume a lot of manpower and material resources, there are also spatial and temporal gaps in the results. Since 2000, satellite remote sensing technology has played an important role in related fields such as resource surveys (<xref ref-type="bibr" rid="B45">Mitchell, 2021</xref>), urban planning (<xref ref-type="bibr" rid="B19">Guo et&#xa0;al., 2019</xref>), agricultural development (<xref ref-type="bibr" rid="B52">Qiao et&#xa0;al., 2021</xref>), and national security (<xref ref-type="bibr" rid="B70">Zhang et&#xa0;al., 2022</xref>). The use of satellite remote sensing has become an effective way of making yield predictions due to its advantages of simple data acquisition, low cost, efficiency, wide spatial coverage, and short operating cycles (<xref ref-type="bibr" rid="B49">Peng et&#xa0;al., 2014</xref>; <xref ref-type="bibr" rid="B69">Zhang et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B60">Wang et&#xa0;al., 2020</xref>).</p>
<p>Vegetation indices (VIs) have been widely used to predict crop yields over the past few decades (<xref ref-type="bibr" rid="B30">Jin et&#xa0;al., 2017</xref>; <xref ref-type="bibr" rid="B32">Kamir et&#xa0;al., 2020</xref>). In most such studies, indices such as the Normalized Difference Vegetation Index (NDVI) and Enhanced Vegetation Index (EVI) which are based on visible and near-infrared bands (<xref ref-type="bibr" rid="B50">Peng et&#xa0;al., 2010</xref>; <xref ref-type="bibr" rid="B12">Cunha and Silva, 2020</xref>) are used. However, these vegetation indices mainly reflect the greenness of vegetation and cannot fully capture environmental stresses on crops (<xref ref-type="bibr" rid="B72">Zhang et&#xa0;al., 2021</xref>). This means that the role of other vegetation indices such as the Normalized Difference Water Index (NDWI) (<xref ref-type="bibr" rid="B16">Gao, 1996</xref>), which reflects the crop water content, and the Red Edge Position Index (REP), which is sensitive to changes in chlorophyll concentration, should be also considered when making yield estimates. In our study, four different types of vegetation indices reflecting crop growth status were used as described above.</p>
<p>Both broadband multispectral data and narrowband hyperspectral data can be used to calculate spectral VIs, but the former are limited and prone to oversaturation where vegetation cover is high (<xref ref-type="bibr" rid="B29">Jiang and Huete, 2010</xref>) and thus have difficulty reflecting changes in biophysical and chemical parameters. Narrow hyperspectral bands are more sensitive to crop growth changes than multispectral broad bands (<xref ref-type="bibr" rid="B53">Sellami et&#xa0;al., 2022</xref>), but there have been fewer quantitative studies involving the former compared to the latter. Based on narrowband data, the hyperspectral vegetation index (HVI) can fully describe the changes in biophysical and chemical parameters that occur as crops grow, which is important to improving the accuracy of yield estimates made by models (<xref ref-type="bibr" rid="B63">Xiao et&#xa0;al., 2022</xref>). Therefore, an increasing number of hyperspectral vegetation indices have been applied to the prediction of crop parameters including the crop yield (<xref ref-type="bibr" rid="B66">Yang et&#xa0;al., 2021</xref>), leaf area index (<xref ref-type="bibr" rid="B65">Xing et&#xa0;al., 2013</xref>), and nitrogen content (<xref ref-type="bibr" rid="B41">Ma et&#xa0;al., 2022</xref>). Further, hyperspectral data typically require sophisticated data mining and filtering techniques given the large number of bands and low signal-to-noise ratio (<xref ref-type="bibr" rid="B43">Marshall et&#xa0;al., 2022</xref>). In previous studies, hyperspectral band selection methods include band-by-band combination method (<xref ref-type="bibr" rid="B65">Xing et&#xa0;al., 2013</xref>), Optimum Index Factor (<xref ref-type="bibr" rid="B34">Kong et&#xa0;al., 2022</xref>), the successive projection algorithm. For example, based on PRISMA hyperspectral images and Sentinel-2 multispectral images, <xref ref-type="bibr" rid="B43">Marshall et al. (2022)</xref> used three separate models based on Two-band Vegetation Indices (TBVIs), Random Forest (RF), and Partial Least Squares Regression(PLSR) to estimate the yield of four different crops and revealed the potential complementarity of hyperspectral image PRISMA in predicting crop biomass and yield. However, most studies use only visible wavelengths (<xref ref-type="bibr" rid="B71">Zhang et&#xa0;al., 2018</xref>), there are few reported attempts at directly evaluating the potential of Shortwave infrared bands (1000-2500 nm) in crop yield prediction; and investigating the spectral information captured within full waveband range remains unexplored for yield prediction.</p>
<p>The construction of linear regression models linking vegetation indices or climatic variables that track the evolution of crop canopy spectral reflectance patterns over the growing season and yields is the traditional method of estimating yields (<xref ref-type="bibr" rid="B22">He and He, 2013</xref>). However, although the calculations may be simple, the relationships involved are not simply linear, and these methods do not capture yield variations well. In the last five years, with the advent of the big data era, conditions have been created for machine learning methods (<xref ref-type="bibr" rid="B72">Zhang et&#xa0;al., 2021</xref>), and more and more computer-dependent machine learning models have been applied to crop yield estimation, usually outperforming traditional linear regression. Deep learning (DL) is an advanced Machine Learning (ML) method that uses multiple, stacked nonlinear layers, at each of which the original input data can be transformed into a higher and more abstract representation (<xref ref-type="bibr" rid="B6">Cai et&#xa0;al., 2019</xref>), such as Long Short-Term Memory (LSTM), Deep Neural Network (DNN), Convolutional Neural Network (CNN), and Recurrent Neural Network (RNN), have produced definitively higher accuracies across various regression and classification tasks (<xref ref-type="bibr" rid="B37">LeCun et&#xa0;al., 2015</xref>). The main advantage of using deep learning techniques in agricultural applications is that the data are hierarchically and incrementally trained with high-level features, eliminating the need to generalize the output. Deep learning models are thus becoming a powerful tool for predicting the yields of various crops (<xref ref-type="bibr" rid="B20">Haider et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B54">Sharma et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B59">Tian et&#xa0;al., 2021</xref>). For example, <xref ref-type="bibr" rid="B72">Zhang et&#xa0;al. (2021)</xref> found the LSTM deep learning algorithm outperformed the two other machine learning models in estimating maize yields in China. <xref ref-type="bibr" rid="B25">Huang et&#xa0;al. (2022)</xref> developed a Dual-Stream deep-learning neural network model for improving county-level winter wheat yield estimates in China and achieved an average R<sup>2</sup> of 0.79. <xref ref-type="bibr" rid="B21">Han et&#xa0;al. (2022)</xref> integrated an attention-based deep learning framework and the SAFY-V model for winter wheat yield estimation using time series SAR and optical data. <xref ref-type="bibr" rid="B64">Xie and Huang (2021)</xref> found the estimated yields from LSTM, 1-D CNN, RF correlated strongly with statistical yields, and the LSTM model achieved the highest estimation accuracies for wheat yields at the site, municipal and county levels. However, the application of ML and DL to yield estimation is still in its infancy, especially in China.</p>
<p>In most studies, yield data are obtained from plot-based manual surveys or consist of county-level regional yields that need to be collected from official statistics websites for larger areas (<xref ref-type="bibr" rid="B55">Sun et&#xa0;al., 2020b</xref>). In our study, the yield data used were based on the grid scale (with a resolution of 5&#xa0;m &#xd7; 5&#xa0;m), and accurate measurements were made using specialist instruments at harvest time. These data were more suitable for use as labels to be trained and validated by the models. The number of sample points (15709) was sufficient to allow proper training of the DL and ML models.In this study, using this large number of sampled data together with 30-m ZY-1 02D hyperspectral imagery and 10- and 30-m Sentinel-2 multispectral remote sensing imagery, we established four data-driven models &#x2013; LSTM (Long Short-Term Memory), RF (Random Forest), SVR (Support Vector Regression), and GBDT (Gradient Boosting Decision Tree) &#x2013; to estimate winter wheat yields.</p>
</sec>
<sec id="s2" sec-type="materials|methods">
<title>2 Material and methods</title>
<sec id="s2_1">
<title>2.1 Study areas</title>
<p>Located in the Changping District of Beijing (<xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref>), Xiaotangshan National Precision Agriculture Demonstration Base (40.10&#xb0;N, 116.26&#xb0;E; altitude 39&#xa0;m) has a typical climate of the northern winter wheat zone, with an average of 2506.5 hours of sunshine a year, an average annual temperature of 13.3&#xb0;C, and an average annual rainfall of 563.8&#xa0;mm. The base is used for high-quality agricultural research area relevant to large irrigated areas and high winter wheat yields.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Geographical location and layout of the study area.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-13-1090970-g001.tif"/>
</fig>
</sec>
<sec id="s2_2">
<title>2.2 Datasets and processing</title>
<sec id="s2_2_1">
<title>2.2.1 Wheat yield data and auxiliary data</title>
<p>(1) Yield data: From 2020 to 2021, as part of the key project &#x2018;Remote sensing inversion of wheat vegetation parameters based on deep learning&#x2019;, a yield survey was conducted on winter wheat plots at Xiaotangshan. A total of 15709 dry weight yield data were collected. These data were to be used for training wheat yield estimation models; the values collected ranged from 1.39 to 6.75 t/ha and satisfied the amount of variation that was required. Measurements of the yield were also made at 39 sample points on the ground &#x2013; within the square area in the upper-right corner of the study area shown in <xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref>. These data were used to select the hyperspectral bands.</p>
<p>(2) ASD spectral data: Adjustable speed drives (ASDs) are mainly used to measure the reflectance and transmittance of surface sediments, soils, plants, water bodies, and artificial targets in the range 350&#x2013;2450 nm. Using an ASD, we obtained spectral data for the 39 sample points on April 14, 2021; these data had a spectral resolution of 3 nm in the 350&#x2013;1000 nm interval and 8 nm in the 1000&#x2013;2450 nm interval. From these data, we selected the same 166 bands that are contained in ZY-1 02D data and selected the best vegetation index combination through making correlation analysis with the yield of 39 sample points. The result was then migrated to the band selection of the hyperspectral data.</p>
<p>(3) Grouped data experiments: The data that had been acquired at the 39 sample points were divided into several groups based on the seeding density, irrigation rate, fertilization rate, and seeding method; experiments were then conducted on these different seeding groups of data. A correlation analysis between the different variables and the yield was performed in order to provide data on which the planting of wheat crops could be based.</p>
</sec>
<sec id="s2_2_2">
<title>2.2.2 Remote sensing data</title>
<p>(1) Sentinel-2 imagery: Sentinel-2 is an important optical remote sensing satellite of the European Space Agency&#x2019;s (ESA&#x2019;s) &#x2018;Copernicus&#x2019; satellite series. Sentinel-2 data are used for land monitoring and can provide images of vegetation, soil and water cover, inland waterways and coastal areas. The Sentinel-2 satellite carries a multispectral imager (MSI) that has a swath width of 290&#xa0;km and orbits at an altitude of 786&#xa0;km. The data cover 13 spectral bands and have ground resolutions of 10, 20, and 60&#xa0;m, respectively. Among all satellite data, only Sentinel-2 data contain three bands in the red-edge range, which means that these data are extremely useful for monitoring vegetation health. In this study, three visible bands, one near-infrared band, one shortwave infrared band, and three red-edge bands were used (see <xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>). Winter wheat was sown in the study area on October 7, 2020 and harvested on June 16, 2021.&#xa0;A total of 99 Sentinel-2 images from the &#x2018;COPERNICUS/S2_SR&#x2019; dataset in the Google earth Engine (GEE) that covered the period from sowing to harvest were used in this study.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Details of the data used in this study.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Sensor</th>
<th valign="middle" align="center">Band name</th>
<th valign="middle" align="center">Spectral range (nm)</th>
<th valign="middle" align="center">Band number</th>
<th valign="middle" align="center">Bandwidth (nm)</th>
<th valign="middle" align="center">Spatial resolution raw/resample(m)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">Sentinel-2</td>
<td valign="middle" align="left">Blue</td>
<td valign="middle" align="center">458&#x2013;523</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">65</td>
<td valign="middle" align="center">10/30</td>
</tr>
<tr>
<td valign="middle" align="left"/>
<td valign="middle" align="left">Green</td>
<td valign="middle" align="center">543&#x2013;578</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">35</td>
<td valign="middle" align="center">10/30</td>
</tr>
<tr>
<td valign="middle" align="left"/>
<td valign="middle" align="left">Red</td>
<td valign="middle" align="center">650&#x2013;680</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">30</td>
<td valign="middle" align="center">10/30</td>
</tr>
<tr>
<td valign="middle" align="left"/>
<td valign="middle" align="left">Red-edge 1</td>
<td valign="middle" align="center">698&#x2013;713</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">15</td>
<td valign="middle" align="center">20/30</td>
</tr>
<tr>
<td valign="middle" align="left"/>
<td valign="middle" align="left">Red-edge 2</td>
<td valign="middle" align="center">733&#x2013;748</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">15</td>
<td valign="middle" align="center">20/30</td>
</tr>
<tr>
<td valign="middle" align="left"/>
<td valign="middle" align="left">Red-edge 3</td>
<td valign="middle" align="center">773&#x2013;793</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">20</td>
<td valign="middle" align="center">20/30</td>
</tr>
<tr>
<td valign="middle" align="left"/>
<td valign="middle" align="left">NIR</td>
<td valign="middle" align="center">785&#x2013;900</td>
<td valign="middle" align="center">8</td>
<td valign="middle" align="center">115</td>
<td valign="middle" align="center">10/30</td>
</tr>
<tr>
<td valign="middle" align="left"/>
<td valign="middle" align="left">SWIR1</td>
<td valign="middle" align="center">1565&#x2013;1655</td>
<td valign="middle" align="center">11</td>
<td valign="middle" align="center">90</td>
<td valign="middle" align="center">20</td>
</tr>
<tr>
<td valign="middle" align="left">ZY-1 02D</td>
<td valign="middle" align="left">VNIR</td>
<td valign="middle" align="center">396&#x2013;1040</td>
<td valign="middle" align="center">76</td>
<td valign="middle" align="center">9</td>
<td valign="middle" align="center">30</td>
</tr>
<tr>
<td valign="middle" align="left"/>
<td valign="middle" align="left">SWIR</td>
<td valign="middle" align="center">1006&#x2013;2501</td>
<td valign="middle" align="center">90</td>
<td valign="middle" align="center">17</td>
<td valign="middle" align="center">30</td>
</tr>
<tr>
<td valign="middle" align="left">ASD</td>
<td valign="middle" align="left">VNIR</td>
<td valign="middle" align="center">350&#x2013;1000</td>
<td valign="middle" align="center">217</td>
<td valign="middle" align="center">3</td>
<td valign="middle" align="center">&#x2014;</td>
</tr>
<tr>
<td valign="middle" align="left"/>
<td valign="middle" align="left">SWIR</td>
<td valign="middle" align="center">1000&#x2013;2450</td>
<td valign="middle" align="center">181</td>
<td valign="middle" align="center">8</td>
<td valign="middle" align="center">&#x2014;</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>(2)ZY-1 02D imagery: The ZY-1 02D satellite was successfully launched from the Taiyuan Satellite Launch Center on September 12, 2019 and carries a hyperspectral camera (Advanced HyperSpectral Imager, AHSI) with 166 bands. This instrument has a spatial resolution better than 30&#xa0;m (9 and 17 nm, respectively in the visible&#x2013;near-infrared and shortwave infrared bands), a swath width of 60&#xa0;km, an operating cycle of 55 days, and bands whose wavelengths range from 396 to 2501 nm. Four ZY-1 02D hyperspectral images acquired on March 24, March 30, April 8, and May 1, 2021 were selected for use in this study.</p>
</sec>
</sec>
<sec id="s2_3">
<title>2.3 Methodology</title>
<sec id="s2_3_1">
<title>2.3.1 Feature selection and its importance</title>
<sec id="s2_3_1_1">
<title>2.3.1.1 Selection of the vegetation index</title>
<p>Using spectral information about the amount of chlorophyll and water absorbed or reflected by a crop in specific wavelength bands, information about parameters related to the growth of the crop can be obtained. From our own spectral measurements, we found that there was strong reflectance from the wheat ears at 850 nm (near infrared) and 1800&#x2013;1900 nm (shortwave infrared) and that the ratio of the red to near-infrared bands effectively reflected the grain quality of the crop and was well correlated with the yield. We thus selected the simple ratio vegetation index SR for use in this study. In addition, we selected the enhanced vegetation index EVI as another index related to vegetation greenness; this index is based on the blue, red, and near-infrared bands. At late maturity, the spectral properties of plants are strongly influenced by the water content and thickness of the leaves. Absorption bands close to 1.4 &#xb5;m, 1.9 &#xb5;m, and 2.6 &#xb5;m are formed by the absorption of water molecules, and distinct reflection peaks are located at 1.6&#xb5;m and 2.2&#xb5;m, between the absorption bands.The intensity of these two reflectance peaks is important for detecting the water content of plant leaves, and based on this spectral feature, we chose a vegetation index NDWI, named by GAO in 1996, to study the water content of wheat. The red-edge band is located between an absorption valley and a peak and covers the range from 690 to 730 nm; the leaf reflectance changes abruptly in this interval. The red-edge band is sensitive to changes in chlorophyll content and is the most obvious to use for detecting stress caused by disease in winter wheat (<xref ref-type="bibr" rid="B29">Jiang and Huete, 2010</xref>). We therefore also selected the red-edge position index REP, which is based on the red-edge band of Sentinel-2, for use in this study.</p>
<p>In summary, we selected a total of eight bands in the visible red, green, and blue bands, near-infrared, shortwave infrared and red edge, and calculated the following four vegetation indices: Enhanced Vegetation Index (EVI), Normalized Moisture Index (NDWI), Simple Ratio (SR), and Red Edge Normalized Difference Vegetation Index (REP) (<xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref>).</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>The different vegetation indices used in this study.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">VI</th>
<th valign="middle" align="center">Equation</th>
<th valign="middle" align="center">Reference</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">EVI(Enhanced Vegetation Index)</td>
<td valign="middle" align="left">
<inline-formula>
<mml:math display="inline" id="im1">
<mml:mrow>
<mml:mn>2.5</mml:mn>
<mml:mo>*</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>B</mml:mi>
<mml:mn>8</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>B</mml:mi>
<mml:mn>4</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>B</mml:mi>
<mml:mn>8</mml:mn>
<mml:mo>+</mml:mo>
<mml:mn>6</mml:mn>
<mml:mo>*</mml:mo>
<mml:mi>B</mml:mi>
<mml:mn>4</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>7.5</mml:mn>
<mml:mo>*</mml:mo>
<mml:mi>B</mml:mi>
<mml:mn>2</mml:mn>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td valign="middle" align="left">
<xref ref-type="bibr" rid="B26">Huete et&#xa0;al. (2002)</xref>
</td>
</tr>
<tr>
<td valign="middle" align="left">SR(Simple Ratio)</td>
<td valign="middle" align="left">
<inline-formula>
<mml:math display="inline" id="im2">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mi>B</mml:mi>
<mml:mn>8</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>B</mml:mi>
<mml:mn>4</mml:mn>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td valign="middle" align="left">
<xref ref-type="bibr" rid="B31">Jordan (1969)</xref>
</td>
</tr>
<tr>
<td valign="middle" align="left">NDWI(Normalized Difference Water Index)</td>
<td valign="middle" align="left">
<inline-formula>
<mml:math display="inline" id="im3">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mi>B</mml:mi>
<mml:mn>8</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>B</mml:mi>
<mml:mn>11</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>B</mml:mi>
<mml:mn>8</mml:mn>
<mml:mo>+</mml:mo>
<mml:mi>B</mml:mi>
<mml:mn>11</mml:mn>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td valign="middle" align="left">
<xref ref-type="bibr" rid="B16">Gao (1996)</xref>
</td>
</tr>
<tr>
<td valign="middle" align="left">REP(Red Edge Position Index)</td>
<td valign="middle" align="left">
<inline-formula>
<mml:math display="inline" id="im4">
<mml:mrow>
<mml:mn>705</mml:mn>
<mml:mo>+</mml:mo>
<mml:mn>35</mml:mn>
<mml:mo>*</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>0.5</mml:mn>
<mml:mo>*</mml:mo>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>B</mml:mi>
<mml:mn>4</mml:mn>
<mml:mo>+</mml:mo>
<mml:mi>B</mml:mi>
<mml:mn>7</mml:mn>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>B</mml:mi>
<mml:mn>5</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>B</mml:mi>
<mml:mn>6</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>B</mml:mi>
<mml:mn>5</mml:mn>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</inline-formula>
</td>
<td valign="middle" align="left">
<xref ref-type="bibr" rid="B24">Horler et&#xa0;al. (1983)</xref>
</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>B2, B3, and B4: (visible) blue, green, and red bands; B5, B6, and B7: bands within the red edge; B8: near-infrared band (wide); B11: shortwave infrared band</p>
</sec>
<sec id="s2_3_1_2">
<title>2.3.1.2 Data preprocessing and calculation of vegetation indices</title>
<p>We called Sentinel-2 data from the Google Earth Engine (GEE) and filtered out all the images in which the cloud cover was greater than 30%. After that, using the GEE, we calculated the mean value of each selected vegetation index in one-month steps for the period October 2020 to June 2021 and constructed a sequence of the mean monthly values. The four ZY-1 02D scenes were first preprocessed in ENVI5.3 &#x2013; the preprocessing steps included orthorectification, geometric correction, and atmospheric correction. The processed data were then uploaded to the GEE platform for the feature calculation. All bands of both types of imagery were resampled using nearest-neighbor interpolation to the spatial resolution required for our experiments.</p>
</sec>
<sec id="s2_3_1_3">
<title>2.3.1.3 Feature importance</title>
<p>In recent years, neural networks have been widely used, and they are usually considered black-box models with poor interpretability (<xref ref-type="bibr" rid="B40">Lu et&#xa0;al., 2017</xref>). Usually, feature selection mostly takes place at the data-processing stage. This means that parameters such as the number of features need to be set artificially based on experience (<xref ref-type="bibr" rid="B51">Poria. et&#xa0;al, 2015</xref>). This introduces a lot of uncertainty, which leads to a loss of learning and generalization ability. To avoid the problem, many researchers have tried using different approaches to incorporate the traditional feature selection process into the networks in order to understand their convolution processes (<xref ref-type="bibr" rid="B35">Krizhevsky et&#xa0;al., 2012</xref>; <xref ref-type="bibr" rid="B2">Alain and Bengio, 2018</xref>). Various methods of obtaining the feature importance have been proposed: these include Permutation Feature Importance, SHAP Feature Importance, and LOFO Feature Importance, which are universal and can be applied to any model (<xref ref-type="bibr" rid="B5">Breiman, 2001</xref>; <xref ref-type="bibr" rid="B15">Fisher et&#xa0;al., 2019</xref>). The principle on which the Permutation Feature Importance (PFI) method is based is that the relationship between the features and the true results has been destroyed and that the model prediction error increases after the replacement of the feature values. The PFI approach provides a global insight into the behavior of the LSTM yield-prediction model, and automatically takes into account all interactions with other features. In contrast to methods that remove certain features, PFI does not require the model to be retrained, thus saving time and computational resources. In addition to this, the use of a subset of features seems intuitive; however, the reduced number of features is meaningless in terms of feature importance since we are interested in the importance of the fixed features of the model. In this study, the four vegetation index feature variables (SR, EVI, NDWI, and REP) were input to the proposed model for training, and the importance of each feature was calculated using the PFI method based on the LSTM neural network that we constructed. The steps used to obtain the importance of the neural network features in this study consisted of the following: train the LSTM neural network model; perform a random shuffle on one vegetation index feature to make it not corresponding to yield at a time and put it to the model for prediction to obtain loss; record the corresponding loss of each shuffled feature column. Taking SR as an example, <xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref> show the flow of PFI method.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Flowchart of the PFI method.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-13-1090970-g002.tif"/>
</fig>
</sec>
</sec>
<sec id="s2_3_2">
<title>2.3.2 Hyperspectral band selection</title>
<p>ZY-1 02D data consist of a total of 166 bands: 3 of these bands overlap when the VNIR and SWIR bands are partially spliced, giving 163 effective bands. A statistical approach, the band-by-band combination method, was used to select the bands among the 163 bands that were related to high wheat yields. In a recent study (<xref ref-type="bibr" rid="B71">Zhang et&#xa0;al., 2018</xref>), the band-by-band combination method that was used consisted of taking every possible pair of bands to construct vegetation indices based on specific mathematical algorithms, and the vegetation index with the highest correlation coefficient with the winter wheat LAI was selected as the best band combination. In this study, we calculated the values of three types of vegetation index &#x2013; difference, ratio, and normalized &#x2013; by arbitrarily combining pairs of bands of ASD data that consisted of the same 163 bands as ZY-1 02D data. An analysis of the correlation between the values of these indices and the winter wheat yield was then performed, and the indices that were most relevant to the winter wheat yield were determined. The above results were then used to calculate the values of three ZY-1 02D features:</p>
<p>
<italic>SSI</italic>(<italic>i</italic>,<italic>j</italic>)=<italic>R</italic>
<sub>
<italic>i</italic>
</sub>&#x2212;<italic>R</italic>
<sub>
<italic>j</italic>
</sub> (1)</p>
<disp-formula>
<label>(2)</label>
<mml:math display="block" id="im5">
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>I</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>R</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>R</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula>
<label>(3)</label>
<mml:math display="block" id="M1">
<mml:mrow><mml:mi>N</mml:mi>
<mml:mi>D</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>I</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>R</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>R</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>R</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>R</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Here <italic>i</italic>, and <italic>j</italic> are labels representing any two bands; <italic>R<sub>i</sub>
</italic>, and <italic>R<sub>j</sub>
</italic> represent the corresponding band values.</p>
</sec>
<sec id="s2_3_3">
<title>2.3.3 Establishment of the yield prediction models</title>
<p>All of the feature-yield data were randomly divided into two groups in the ratio 9:1, with 90% of the data used for training and 10% of the data used for testing. In order to allow a comprehensive evaluation of the experimental results to be made, three metrics were used: the mean absolute error (MAE), the root mean squared error (RMSE), and the coefficient of determination (R<sup>2</sup>). The model that had the largest value of R<sup>2</sup> and the smallest values of MAE and RMSE was considered to be the optimal one.</p>
<p>The MAE, the RMSE, and R<sup>2</sup> were calculated as follows:
<inline-formula>
<mml:math display="inline" id="im6">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>A</mml:mi>
<mml:mi>E</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>X</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>h</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mi>m</mml:mi>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>m</mml:mi>
</mml:munderover>
<mml:mrow>
<mml:mrow>
<mml:mo>|</mml:mo>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>|</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mstyle>
</mml:mrow>
</mml:math>
</inline-formula> (4)
</p>
<p>
<inline-formula>
<mml:math display="inline" id="im7">
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>M</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>E</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>X</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>h</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:msqrt>
<mml:mrow>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mi>m</mml:mi>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>m</mml:mi>
</mml:munderover>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:mstyle>
</mml:mrow>
</mml:msqrt>
</mml:mrow>
</mml:math>
</inline-formula> (5)</p>
<disp-formula>
<label>(6)</label>
<mml:math display="block" id="M2">
<mml:mrow>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>X</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>h</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mi>i</mml:mi>
</mml:munder>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>h</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:mstyle>
</mml:mrow>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mi>i</mml:mi>
</mml:munder>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mover accent="true">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="true">&#xaf;</mml:mo>
</mml:mover>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:mstyle>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Here, <italic>y<sub>i</sub>
</italic> is the true value of yield, <italic>h(x<sub>i</sub>)</italic> is the value predicted by the yield estimation model, <italic>m</italic> is the number of sample points, <inline-formula>
<mml:math display="inline" id="im8">
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="true">&#xaf;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula>is the mean value of the yield, and <italic>i</italic> is the i-th sample point.</p>
<sec id="s2_3_3_1">
<label>2.3.3.1</label>
<title>LSTM time-series DL model</title>
<p>The LSTM model used a Recurrent Neural Network (RNN) architecture consisting of an input layer, one or more LSTM layers, and an output layer that could learn time-dependent information to incorporate the crop growth process (<xref ref-type="bibr" rid="B23">Hochreiter and Schmidhuber, 1997</xref>). The LSTM layers were composed of LSTM cells. Each cell contained three types of gates: the input gates determined what input information was retained, the forget gates determined how much of the previous information input was retained, and the output gates combined the previous output with the current input to determine the final output. In the neural network that was designed, the vegetation index time-series data were passed through two LSTM layers that consisted of 100 neurons, then through an ReLU activation function and a fully connected layer. A dropout rate of 0.3 and L2 regularization were applied to avoid overfitting and improve the generalization effect. We set lr to be 0.001, batch_size to be 64, and epoch to be 700 to further reduce the risk of overfitting. The calculation process of a basic LSTM unit is as follows:</p>
<disp-formula>
<mml:math display="block" id="M3">
<mml:mtable columnalign="left">
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mi>i</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:msup>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:msup>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msup>
<mml:mi>b</mml:mi>
<mml:mi>i</mml:mi>
</mml:msup>
<mml:mo stretchy="false">)</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:msup>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:msup>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msup>
<mml:mi>b</mml:mi>
<mml:mi>f</mml:mi>
</mml:msup>
<mml:mo stretchy="false">)</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mi>tanh</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:msup>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:msup>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msup>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo stretchy="false">)</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>i</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2297;</mml:mo>
<mml:msub>
<mml:mi>g</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2297;</mml:mo>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mi>o</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>o</mml:mi>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:msup>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>o</mml:mi>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:msup>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msup>
<mml:mi>b</mml:mi>
<mml:mi>o</mml:mi>
</mml:msup>
<mml:mo stretchy="false">)</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>o</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2297;</mml:mo>
<mml:mi>tanh</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:math>
</disp-formula>
<p>
<italic>W<sup>ih</sup>
</italic>, <italic>W<sup>ix</sup>
</italic>, <italic>b<sup>i</sup>
</italic>, <italic>W<sup>fh</sup>
</italic>, <italic>W<sup>fx</sup>
</italic>, <italic>b<sup>f</sup>
</italic>, <italic>W<sup>gh</sup>
</italic>, <italic>W<sup>gx</sup>
</italic>, <italic>b<sup>g</sup>
</italic>, <italic>W<sup>oh</sup>
</italic>, <italic>W<sup>ox</sup>
</italic> and <italic>b&#xb0;</italic> are model parameters; <italic>g<sub>t</sub>
</italic> is the nonlinear transformation for better representing the input <italic>x<sub>t</sub>
</italic>; <italic>i<sub>t</sub>
</italic>, <italic>f<sub>t</sub>
</italic>, <italic>o<sub>t</sub>
</italic> are the input gate, forget gate and output gate, respectively; <italic>&#x3c3;</italic>, <italic>&#x2297;</italic>are the sigmoid function and the element-wise multiplication (i.e., Hadamard product) operation, respectively.</p>
</sec>
<sec id="s2_3_3_2">
<label>2.3.3.2</label>
<title>RF, GBDT, and SVR ML models</title>
<p>Given that the machine learning models (RF, GBDT, and SVR) could not learn time-series information as the LSTM recurrent neural network could, the data had to be converted from a 4&#xd7;9 matrix format into a 1&#xd7;36 vector format before being input.</p>
<p>A Random Forest (RF) is formed by integrating multiple decision trees that are trained on randomly selected samples (<xref ref-type="bibr" rid="B5">Breiman, 2001</xref>). In our study, 90% of the samples were used for training and the remaining out-of-bag (OOB) samples were used for error assessment. Optimal parameter tuning was performed; the best combination was found to be a setting of 150 for the number of trees and a value of 200 for the random state parameter.The core algorithm of RF is as follows:
<inline-formula>
<mml:math display="inline" id="im9">
<mml:mrow>
<mml:mtext>&#x2009;</mml:mtext>
<mml:munder>
<mml:munder>
<mml:mrow>
<mml:mi>min</mml:mi>
</mml:mrow>
<mml:mo stretchy="true">&#xfe38;</mml:mo>
</mml:munder>
<mml:mrow>
<mml:mtext>A,s</mml:mtext>
</mml:mrow>
</mml:munder>
<mml:mo stretchy="false">[</mml:mo>
<mml:munder>
<mml:munder>
<mml:mrow>
<mml:mi>min</mml:mi>
</mml:mrow>
<mml:mo stretchy="true">&#xfe38;</mml:mo>
</mml:munder>
<mml:mrow>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:munder>
<mml:msup>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>s</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:munder>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mstyle>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>+</mml:mo>
<mml:munder>
<mml:munder>
<mml:mrow>
<mml:mi>min</mml:mi>
</mml:mrow>
<mml:mo stretchy="true">&#xfe38;</mml:mo>
</mml:munder>
<mml:mrow>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:munder>
<mml:msup>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>s</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:munder>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mstyle>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
<p>Where <italic>C<sub>1</sub>
</italic> is the sample output mean of <italic>D<sub>1</sub>
</italic> data set, <italic>C<sub>2</sub>
</italic> is the sample output mean of <italic>D<sub>2</sub>
</italic> data set, <italic>A</italic> is the division feature and <italic>s</italic> are the division point, <italic>y<sub>i</sub>
</italic> the i-th sample point.</p>
<p>A Gradient Boosting Decision Tree (GBDT) is an iterative decision tree algorithm that consists of multiple decision trees and which uses the accumulated conclusions of all the trees as the final result; the advantage of the GBDT method is its robustness to outliers. Parameter tuning was performed for the GBDT model, and the best combination was found to be: number of trees = 1250, subsample = 0.6, and learning rate = 0.1. The core algorithm of GBDT is as follows:</p>
<disp-formula>
<mml:math display="block" id="M4">
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>m</mml:mi>
</mml:munderover>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>X</mml:mi>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mstyle>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Where <italic>T(X,&#x3b8;)</italic> is the decision tree, <italic>&#x3b8;</italic> is the parameter of the decision tree, <italic>m</italic> is the number of trees.</p>
<p>A Support Vector Regression (SVR) model is a tolerant regression model that creates an &#x2018;interval band&#x2019; with a spacing of &#x3f5; (the tolerance bias, an empirical value set by hand) on both sides of the linear function and which does not calculate the loss for all samples falling into the interval band. The model is obtained by minimizing the total loss and maximizing the interval. SVR is sensitive to the choice of hyperparameters. We selected the Gaussian kernel function (RBF) as the kernel function. As for the other models, the best parameter settings were found by experiment. It was found that the best combination was a value of 1 &#xd7; 10<sup>5</sup> for C and a value of 0.5 for gamma. The core algorithm of SVR is as follows:</p>
<disp-formula>
<mml:math display="block" id="M5">
<mml:mrow>
<mml:munder>
<mml:mrow>
<mml:mi>min</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>w</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:munder>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mn>2</mml:mn>
</mml:mfrac>
<mml:msubsup>
<mml:mrow>
<mml:mrow>
<mml:mo>&#x2016;</mml:mo>
<mml:mi>w</mml:mi>
<mml:mo>&#x2016;</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mn>2</mml:mn>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:mo>+</mml:mo>
<mml:mi>C</mml:mi>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>m</mml:mi>
</mml:munderover>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mi>&#x3be;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:mover>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3be;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mo>&#x2227;</mml:mo>
</mml:mover>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mstyle>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Where <italic>w,b</italic> are the model parameters, <italic>m</italic> is the number of sample points, <italic>&#x3be;</italic> is the relaxation variable.</p>
<p>All the source codes are available at <uri xlink:href="https://github.com/limitlesszang/yield_prediction">https://github.com/limitlesszang/yield_prediction</uri>.</p>
</sec>
</sec>
</sec>
</sec>
<sec id="s3" sec-type="results">
<title>3 Results and discussion</title>
<sec id="s3_1">
<title>3.1 Winter wheat yield predictions obtained using the different models</title>
<p>The ability of the four common vegetation indices to predict the winter wheat yield was evaluated using four methods (LSTM, RF, GBDT, and SVR); the results of these predictions are visualized in <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3</bold>
</xref>.&#xa0;A comparison showed that, of the four models, the LSTM made the best predictions, followed by the RF model. The spatial distribution of the yield predicted by the four&#xa0;models roughly matched the true yield distribution: in each case, the yield was high in the middle of all the plots and low near the edges, a pattern that may have been due to human activities and the presence of trees around the plots. Overall, it was shown that the LSTM, RF, GBDT, and SVR models could be used to make estimates of the winter wheat yield that also reflected the spatial distribution.</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Predictions of winter wheat yields obtained using the <bold>(A)</bold> LSTM, <bold>(B)</bold> RF, <bold>(C)</bold> GBDT, and <bold>(D)</bold> SVR models.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-13-1090970-g003.tif"/>
</fig>
<p>All three metrics (R<sup>2</sup>, MAE, and RMSE) showed that the LSTM model produced the best yield estimates (see <xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4</bold>
</xref>), with a value of 0.93 for R<sup>2</sup>. The neural network architecture of the deep learning model also performed well, giving an R<sup>2</sup> of 0.886. The values of R<sup>2</sup> for the GBDT and SVR models were 0.839 and 0.573, respectively. The two tree-based models &#x2013; RF and GBDT &#x2013; were able to explain the yield change at least 10% better than the SVR. In contrast, although the SVR technique could effectively solve multiple collinearity problems among independent variables, it only simulates the limited relationship between input variables/features and modeling targets (i.e., grain yield), and is unable to map highly non-linear and complex relationship between variables. As reported in many previous works, deep learning methods are generally considered to be superior when the number of training samples is sufficiently large (<xref ref-type="bibr" rid="B42">Maimaitijiang et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B33">Khaki et&#xa0;al., 2021</xref>). This is likely due to the fact that DL often exceeds popular machine learning methods when dealing with larger sample size, complex, nonlinear and redundant datasets (<xref ref-type="bibr" rid="B37">LeCun et&#xa0;al., 2015</xref>). Our experiments verified that the network containing two LSTM layers could capture more than 90% of the yield information from the input features. Previous research has also demonstrated that LSTM model performed best through several machine learning models in winter wheat prediction. <xref ref-type="bibr" rid="B64">Xie and Huang (2021)</xref> demonstrated that the accuracy of the LSTM model was significantly higher than that of the 1-D CNN model due to the better ability of the LSTM model to treat time-series satellite data. However, when the amount of data is limited, the RF model has the advantages of being insensitive to outliers, nonlinearity, serial autocorrelation, and high dimensionality. For example, <xref ref-type="bibr" rid="B7">Cao et&#xa0;al. (2021)</xref> found that the performance of RF was not always worse than DL at both the county and field levels. What&#x2019;s more, although LSTM yielded superior performance over RF methods, the improvement in grain yield prediction accuracy was not substantial (see <xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4</bold>
</xref>), one reason could be the little difference in measured gridded yield data. Future work will examine the ability of more advanced deep learning architectures (e.g., LSTM and its variants) at county scale to extract better information for winter wheat yield prediction.</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Comparison between the estimates of the winter wheat yield obtained using the <bold>(A)</bold> LSTM, <bold>(B)</bold> RF, <bold>(C)</bold> GBDT, and <bold>(D)</bold> SVR models.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-13-1090970-g004.tif"/>
</fig>
</sec>
<sec id="s3_2">
<title>3.2 Estimates of the winter wheat yield based on multispectral and hyperspectral data</title>
<p>The LSTM method was then used to produce estimates of the winter wheat yield based on 30-m ZY-1 02D data, 30-m Sentinel-2 data, and 10-m Sentinel -2 data. The modeling with the ZY-1 02D data used the vegetation indices NDSI, SSI, and RSI designed by ourselves as features, whereas three conventional vegetation indices &#x2013; NDWI, SR, and EVI &#x2013; were used with the Sentinel-2 data. The time-series consisting of 20210324, 0330, 0428, and 0501 were input into the LSTM model for training, and predictions of the yield were obtained for the entire study area (see <xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5</bold>
</xref>). It can be seen that, for all three types of data, the yield distribution was correctly modeled and that 10-m Sentinel-2 data best reflect the actual distribution of the yield within the study area. The spatial resolution of the results based on the other two datasets is low; as a result, the corresponding yield distribution maps are coarse and do not reflect the differences in yield between adjacent grid cells.</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>
<bold>(A)</bold> Details of the observed distribution of the winter wheat yield. Details of the modeled distribution based on <bold>(B)</bold> 10-m Sentinel-2, <bold>(C)</bold> 30-m ZY-1 02D, and <bold>(D)</bold> 30-m Sentinel-2 data. The modeled yield distribution based on <bold>(E)</bold> 10-m Sentinel-2, <bold>(F)</bold> 30-m ZY-1 02D, and <bold>(G)</bold> 30-m Sentinel-2 data.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-13-1090970-g005.tif"/>
</fig>
<p>Overall, the estimates based on the 10-m Sentinel-2 data were found to be the most accurate, followed by those based on the 30-m ZY-1 02D data; the estimates based on the 30-m Sentinel-2 were the least accurate (see <xref ref-type="fig" rid="f6">
<bold>Figure&#xa0;6</bold>
</xref>). By comparing the results for the datasets with different spatial and spectral resolutions, it was found that the spatial resolution had a greater impact on the winter wheat yield estimates than the spectral resolution: this can be seen from a comparison of the results for the 10-m Sentinel-2 data and the 30-m ZY-1 02D data. The 10-m Sentinel-2 data, which was the dataset with the highest spatial resolution, performed best, capturing 91% of the yield variation. The observed data consisted of gridded data with a spatial resolution of 5&#xa0;m, and the satellite data with the spatial resolution that was closest to this produced the best estimates of the yield. The result was also recognized in previous studies that hyperspectral PRISMA models was lower than the multispectral Sentinel-2 models (<xref ref-type="bibr" rid="B43">Marshall et&#xa0;al., 2022</xref>). However, multispectral bands provide coarser spectral information than hypersectral bands (<xref ref-type="bibr" rid="B66">Yang et&#xa0;al., 2021</xref>). A comparison of the results based on the ZY-1 02D and Sentinel-2 data, which have the same spatial resolution, showed that the ZY-1 02D data, which has more spectral bands, performed better, indicating that the features most important to the yield still remained after the band-by-band combination and that the narrow bands could provide relevant and accurate information about the yield. Therefore, Hyperspectral (HS)-Multispectral(MS) fusion paradigm to hyperspectral data is considered to get both advantages of high spatial and spectral resolution. Here, some of the advantages and limitations of applying hyperspectral imaging to estimates of agricultural yields are demonstrated. However, although the mathematical relevance of the hyperspeactral band selection method is maximized, the computational volume is large and the physical meaning is not clear enough, resulting in low applicability (<xref ref-type="bibr" rid="B34">Kong et&#xa0;al., 2022</xref>). Further research should be targeted towards to the implementation and evaluation of more applicable band select method in hyperspectral, making the most effective use of hyperspectral band information.</p>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>Comparison between estimates of the winter wheat yield based on <bold>(A)</bold> 10-m Sentinel-2, <bold>(B)</bold> 30-m ZY-1 02D, and <bold>(C)</bold> 30-m Sentinel-2 data.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-13-1090970-g006.tif"/>
</fig>
</sec>
<sec id="s3_3">
<title>3.3 Effect of various spectral bands and vegetation indexes on yield estimation</title>
<p>In the PFI experiment that was conducted, the importance of the four features used in the modeling could be ranked as SR &gt; NDWI &gt; EVI &gt; REP. The shuffled vegetation index SR produced the largest loss of 0.4783, followed by a loss of 0.4492 for the NDWI, 0.2385 for the EVI, and 0.2371 for the REP. A larger loss value indicates a greater contribution to the results. The large SR contribution is due to the high correlation between the ratio of the red band to the NIR wavelength bands and the leaf area index, which is a good measure of the crop growth (<xref ref-type="bibr" rid="B31">Jordan, 1969</xref>). The NDWI indicates the amount of biostructural water contained in a crop, so the large contribution made by the NDWI indicates that water has a great influence on the accumulation of organic matter in a crop (<xref ref-type="bibr" rid="B16">Gao, 1996</xref>). The EVI, which is a greenness vegetation index, had less effect on the results, which may be related to the instability of the blue band due to residual atmospheric effects. The poor performance of the REP vegetation index in the modeling may be due to the fact that the spatial resolution of the red-edge bands in Sentinel-2 is 20&#xa0;m, which does not match the spatial resolution of the yield data. However, the vegetation indices we selected were based on anthropogenic experience about spectral information; other vegetation indices such as Green Leaf Area Index (<xref ref-type="bibr" rid="B13">Duchemin et&#xa0;al., 2008</xref>), Crop Water Stress Index (<xref ref-type="bibr" rid="B17">Ghaemi et&#xa0;al., 2016</xref>) should also be considered. We can combine empirical and statistical methods in the selection of vegetation indices.</p>
<p>In most previous studies, each feature was input to a model individually to retrain the model (<xref ref-type="bibr" rid="B6">Cai et&#xa0;al., 2019</xref>). This seems an intuitive approach; however, it is not appropriate if we are interested in the feature importance of the model where all features are trained together instead of one by one. <xref ref-type="bibr" rid="B72">Zhang et&#xa0;al. (2021)</xref> evaluated six typical VIs separately for their abilities to predict maize yield using the three approaches. Compared the feature importance rank in <xref ref-type="bibr" rid="B72">Zhang et&#xa0;al. (2021)</xref>, the advantage of the PFI method that we used in this study is that it outputs the performance of each feature when all of the features are input to the model together, and all interactions with other features are automatically considered. We analyzed the spectral information in combination with the environmental stresses of the crop, and the ranking results reflected the most important factors for wheat growth, so as to provide a reference for practical agricultural management: in conducting winter wheat farming, we need to focus on natural conditions such as tillage density, which is related to the leaf area index, and the amount of irrigation, which is closely related to the water within the winter wheat plant. However, besides PFI, more and more methods have recently been proposed to help users interpret the predictions of complex models, such as a unified framework for interpreting predictions named SHapley Additive exPlanations (<xref ref-type="bibr" rid="B39">Lundberg and Lee, 2017</xref>), and further research should be targeted towards to the comparison among multiple feature importance rank methods.</p>
<p>From the analysis of the correlation between the different vegetation indices and the yield based on the band-by-band combination method, we determined which bands had the strongest positive and negative correlations with the three different vegetation indices (see <xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref>). Of the 13203 combinations calculated for the NDSI, the largest correlation with the yield was for the wavelength range 516&#x2013;765 nm. This lies in the visible and NIR region and was negatively correlated with the yield with a correlation coefficient of &#x2013;0.7413. For the SSI, the best combination was 671&#x2013;679 nm, which was negatively correlated with the yield with a correlation coefficient of &#x2013;0.7559. For the RSI, the strongest correlation was for the combination 1779&#x2013;2216 nm, which was positively correlated with the yield with a correlation coefficient of 0.7539. The correlation between bands combination and yield is consistent with (<xref ref-type="bibr" rid="B43">Marshall et&#xa0;al., 2022</xref>). These three customized vegetation indices were then adopted as three features for input to the hyperspectral data model. The correlation coefficients shown in <xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref> all pass the significance test of P-value&lt; 0.001. Importantly, from the values of the correlation between the vegetation indexes and the yield (see <xref ref-type="fig" rid="f7">
<bold>Figure&#xa0;7</bold>
</xref>), it was found that the combinations of visible and NIR bands were negatively correlated with the yield, whereas there was a positive correlation between the shortwave infrared narrowband combinations and the yield. And can be seen from <xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref>, the difference between the most positively correlated shortwave infrared narrowband combination and the most negatively correlated visible&#x2013;NIR band combination is almost negligible (between 0.5% and 5%), which indicates that both combinations can provide equally important information for yield estimates. The combinations of bands around the maximum value in <xref ref-type="fig" rid="f7">
<bold>Figure&#xa0;7</bold>
</xref> can almost play the same role as the chosen bands (see <xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref>) and can also be used to build the yield estimation model when the requirements are not too strict. This conclusion confirmed the results reported in the previous studies choosing the visible and NIR bands to compute VIs (<xref ref-type="bibr" rid="B30">Jin et&#xa0;al., 2017</xref>) for yield prediction to various crops. <xref ref-type="bibr" rid="B34">Kong et&#xa0;al. (2022)</xref> used the band-by-band combination method between 450-950 nm to construct new vegetation index, and analyzed the correlation of them with LAI. In our work, we expanded band range to full bands following the recommendation of <xref ref-type="bibr" rid="B43">Marshall et&#xa0;al., 2022</xref> to get a comprehensive use of hyperspectral information.</p>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Band combinations giving the highest correlations with the customized vegetation indices.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left"/>
<th valign="middle" align="center">Band combination (nm)</th>
<th valign="middle" align="center">Correlation coefficient</th>
<th valign="middle" align="center">Band combination (nm)</th>
<th valign="middle" align="center">Correlation coefficient</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">NDSI</td>
<td valign="middle" align="center">1375&#x2013;1896</td>
<td valign="middle" align="center">0.6946***</td>
<td valign="middle" align="center">516&#x2013;765</td>
<td valign="middle" align="center">-0.7413***</td>
</tr>
<tr>
<td valign="middle" align="left">SSI</td>
<td valign="middle" align="center">1408&#x2013;1930</td>
<td valign="middle" align="center">0.7506***</td>
<td valign="middle" align="center">671&#x2013;679</td>
<td valign="middle" align="center">-0.7559***</td>
</tr>
<tr>
<td valign="middle" align="left">RSI</td>
<td valign="middle" align="center">671&#x2013;679</td>
<td valign="middle" align="center">-0.7559***</td>
<td valign="middle" align="center">516&#x2013;765</td>
<td valign="bottom" align="center">-0.7413***</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>***means the value is significant at the 0.001 level</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>The high degree of correlation between the visible&#x2013;NIR band combinations and the yield can be explained by the correlation between the crop growth and the chlorophyll content (<xref ref-type="bibr" rid="B1">Acito et&#xa0;al., 2022</xref>), which has also been demonstrated in previous studies (<xref ref-type="bibr" rid="B72">Zhang et&#xa0;al., 2021</xref>). However, in most studies, only bands in the range 500 to 900 nm have been used and most other useful bands have been neglected (<xref ref-type="bibr" rid="B71">Zhang et&#xa0;al., 2018</xref>; <xref ref-type="bibr" rid="B72">Zhang et&#xa0;al., 2021</xref>). In this study, we also found that the shortwave infrared bands between 1000 and 2500 nm had a highly positive correlation with the yield (see <xref ref-type="fig" rid="f7">
<bold>Figure&#xa0;7</bold>
</xref>), suggesting that combinations of narrow shortwave infrared bands can provide equally important information to that provided by visible&#x2013;NIR bands for crop yield estimation and that the use of the two types of information might achieve even better results. In future studies, experiments using the two types of bands should be performed to determine the quantitative relationship with the winter wheat yield.</p>
<fig id="f7" position="float">
<label>Figure&#xa0;7</label>
<caption>
<p>Heatmap showing the correlation between <bold>(A)</bold> the NDSIs, <bold>(B)</bold> RSIs, and <bold>(C)</bold> SSIs and the yield. The green triangles represent the selected band combinations (thickened in <xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref>); in each case, the green circle represents the other band combination listed in <xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref>.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-13-1090970-g007.tif"/>
</fig>
</sec>
<sec id="s3_4">
<title>3.4 Analysis of the models and other factors affecting the yield</title>
<p>The results described above (see <xref ref-type="fig" rid="f3">
<bold>Figures&#xa0;3</bold>
</xref>, <xref ref-type="fig" rid="f4">
<bold>4</bold>
</xref>) show that the LSTM model produced significantly better estimates of the winter wheat yield than the other three models; these results are consistent with those found by <xref ref-type="bibr" rid="B72">Zhang et&#xa0;al. (2021)</xref> and <xref ref-type="bibr" rid="B38">Lin et&#xa0;al. (2020)</xref>. Compared with machine learning models, deep learning with complex neural network structures has the advantage that it processes high-dimensional data that reflects the growth and development of crops (<xref ref-type="bibr" rid="B46">Mu et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B60">Wang et&#xa0;al., 2020</xref>). The LSTM can learn more time-dependent information (<xref ref-type="bibr" rid="B23">Hochreiter and Schmidhuber, 1997</xref>). The data input to the models consisted of series of monthly data that can be used to explore various types of changes in crop growth, including the yield and other related crop parameters (<xref ref-type="bibr" rid="B20">Haider et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B59">Tian et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B61">Wang et&#xa0;al., 2022</xref>). The two tree models (RF and GBDT) proved to be better than the SVR model at estimating the yield, which also confirms the results reported in a previous study (<xref ref-type="bibr" rid="B38">Lin et&#xa0;al., 2020</xref>). The LSTM neural network provides an effective tool for building new data-driven models for regional yield estimation. Neural network modeling transforms raw input variables into high-level representations through nonlinear activation and squashing functions, which weakens the traceability and interpretability of the LSTM model (<xref ref-type="bibr" rid="B59">Tian et&#xa0;al., 2021</xref>). <xref ref-type="bibr" rid="B67">You et&#xa0;al. (2017)</xref> add a DGP to Long Short Term Memory (LSTM) network, and outperforming all the competing approaches. Future work will examine the ability of more advanced deep learning architectures (e.g., LSTM and its variants) and more kinds of data (e.g., remote sensing data and climate information) to extract better multimodal information for grain yield prediction. The introduction of an attention mechanism to capture and interpret the contribution of each time node in the time-series data to the models can be considered; in combination with knowledge of the crop phenological period, this could be used to make estimates of pre-production early yields. In subsequent studies, the use of transfer learning methods to improve the scalability of the model could also be tried; this would be similar to a method of predicting winter wheat FVC using deep transfer learning (<xref ref-type="bibr" rid="B68">Yu et&#xa0;al., 2022</xref>).</p>
<p>Experiments were then performed in which the Sowing, cultivation and irrigation were varied. It was found that the winter wheat accumulated the most organic matter when the seeding rate was set at 225 kg/ha (0.8 times the conventional sowing rate), rotational tillage was adopted, variable amounts of fertilizer were used, and the amount of irrigation was set at 60&#xa0;mm (see <xref ref-type="fig" rid="f8">
<bold>Figure&#xa0;8</bold>
</xref>). It was also found that a high sowing density leads to a lower yield, probably due to the intense competition between water, fertilizer, and light making the plant less biologically productive and causing lodging (<xref ref-type="bibr" rid="B11">Chen et&#xa0;al., 2022</xref>). <xref ref-type="fig" rid="f8">
<bold>Figure&#xa0;8B</bold>
</xref> shows that the choice of rotational tillage as the tillage practice can maximize soil fertility: some studies have shown that rotational tillage practices can increase the soil porosity and improve the nutrient quality, thus increasing crop yields (<xref ref-type="bibr" rid="B47">Nie et&#xa0;al., 2015</xref>). From <xref ref-type="fig" rid="f8">
<bold>Figure&#xa0;8C</bold>
</xref> it can be seen that the use of variable fertilization promotes yield improvement because it meets the nitrogen demand of winter wheat throughout the growing season and allows the crop to maintain a more reasonable canopy structure for photosynthesis even after flowering (<xref ref-type="bibr" rid="B28">Jiang et&#xa0;al., 2015</xref>). <xref ref-type="fig" rid="f8">
<bold>Figure&#xa0;8D</bold>
</xref> shows that the greater the amount of irrigation, the more water is absorbed by the crop due to osmotic pressure regulation; this increases the soil water storage and improves the drought tolerance of the wheat and the yield. The above meteorological variables are closely related to the crop growth process and directly affect the yield. Currently, as chemical fertilizer prices are rising sharply and many places are experiencing water shortages, advanced studies that will lead to the application of precise amounts of fertilizer and irrigation so that planting costs and environmental pollution can be reduced and high yields of wheat achieved are research priorities (<xref ref-type="bibr" rid="B9">Carberry et&#xa0;al., 2013</xref>). Our study quantitatively explored the effect of environmental conditions on the winter wheat yield, and the results provide data that are important to the cultivation of winter wheat in northern China.</p>
<fig id="f8" position="float">
<label>Figure&#xa0;8</label>
<caption>
<p>The winter wheat yield plotted against different planting management variables: <bold>(A)</bold> seeding rate, <bold>(B)</bold> farming method (Subsoiling-Tillage-Rotatillage-Rototilling), <bold>(C)</bold> the rate of fertilizer application, and <bold>(D)</bold> the amount of irrigation.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-13-1090970-g008.tif"/>
</fig>
<p>Meanwhile, from the above experimental results, it can be seen that crop management statistics have a strong correlation with the crop yield and can be used to indicate yield changes. Management statistics models are included in crop environment models, which can be used in crop yield estimation by establishing a correlation between the crop management statistics and crop yields (<xref ref-type="bibr" rid="B18">Guarin and Asseng, 2022</xref>). The most commonly used statistical management model is the systemic integrated factor forecasting method developed by <xref ref-type="bibr" rid="B10">Chen (1992)</xref>. This forecasting method predicts the annual grain yield by building a systematic model between statistical factors (irrigation, fertilizer usage, and mechanical inputs) and the crop yield. Besides management statistics models, crop environment models (<xref ref-type="bibr" rid="B36">Launay and Guerif, 2005</xref>) also include agrometeorological models and agronomic yield estimation models &#x2013; the former use integral regression models based on meteorological factors and yields (<xref ref-type="bibr" rid="B48">O'Neal et&#xa0;al., 2002</xref>), and the latter mainly establish relationships between crop growth conditions and crop yield components, thus allowing them to predict crop yields. Predictions of wheat yields based on management statistics, meteorological data, and crop growth conditions can be highly accurate; however, these models do not apply to large areas and the values of the parameters are difficult to determine. Using satellite remote sensing, crop information can be acquired repeatedly over large areas at a relatively low cost. The combination of remote sensing data with crop growth dynamics models to predict yields has shown promise, and several studies (<xref ref-type="bibr" rid="B8">Cao et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B72">Zhang et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B4">Beyene et&#xa0;al., 2022</xref>) have shown that combining remote sensing and other factors can improve the accuracy of yield estimates. The previous studies demonstrate the tremendouspotential of remote sensing data-based crop yield prediction when employing a multimodal data fusion and deep neural network approach. <xref ref-type="bibr" rid="B42">Maimaitijiang et&#xa0;al. (2020)</xref> verified that multimodal data fusion yielded superior performance for yield prediction over single sensor data, regardless of modeling methods. Therefore, in addition to being adaptable to different remote sensing data-VIs, within-field, multi-field, and regional applications require grain yield models to cope with variation and heterogeneity in space caused by differences in soil, irrigation, fertilization and other field conditions that affect plant growth (<xref ref-type="bibr" rid="B42">Maimaitijiang et&#xa0;al., 2020</xref>). For example, <xref ref-type="bibr" rid="B57">Su et&#xa0;al (2017)</xref> integrated geographical data from the weather station in China and the SVR method to estimate crop growth at various stages. In subsequent research, we plan to collect multiple types of data, including meteorological data and crop management statistics, and combine these with remote sensing data to produced more accurate winter wheat yield estimates.</p>
<p>However, there remain challenges to fully understanding changes in winter wheat yields that arise from a lack of understanding of the mechanisms involved or a lack of data. If fused or integrated data with a high temporal, spatial, and spectral resolution (<xref ref-type="bibr" rid="B8">Cao et&#xa0;al., 2020</xref>) can be obtained, transferring our proposed model to a larger study area can be considered. In this study, we found that an increase in either the spectral or spatial resolution leads to an increase in the estimation accuracy (<xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4</bold>
</xref>). <xref ref-type="bibr" rid="B1">Acito et&#xa0;al. (2022)</xref> deals with the problem of improving the spatial resolution of hyperspectral data from the PRISMA mission and provides a superresolved image with a spatial resolution of 10&#xa0;m and the same spectral resolution as the PRISMA hyperspectral sensor. In future, following the work by <xref ref-type="bibr" rid="B1">Acito et&#xa0;al. (2022)</xref>, In future, we also hope to use fused ZY-1 02D and Sentinel-2 data for yield estimation to explore how temporally, spectrally, and spatially rich data perform, in a similar way to how the fusion of ZY-1 02D and multispectral data has been used for land classification (<xref ref-type="bibr" rid="B56">Sun et&#xa0;al., 2020a</xref>). Compared with yield estimation models on county scale (<xref ref-type="bibr" rid="B55">Sun et&#xa0;al., 2020b</xref>), the yield estimation models that we developed in this study all apply at the pixel scale and may be less applicable at the larger scale of experimental fields. In subsequent studies, we will collect yield data from a large number of counties and cities, compare agricultural fields in their natural state with small experimental fields under human management, and explore the performance of the yield estimation models over spatially heterogeneous large plots. Furthermore, the approach can be tested for different crop types at different development stages and environmental conditions to evaluate the robustness.</p>
</sec>
</sec>
<sec id="s4" sec-type="conclusions">
<title>4 Conclusion</title>
<p>In this paper, based on Sentinel-2 and ZY-1 02D remote sensing imagery and using the LSTM, RF, GBDT, SVR machine learning methods, we aimed to find the most suitable model, data source, and combination of spectral bands for making estimates of winter wheat yields. It was found that, of these four models, the LSTM model outperformed the SVR, RF, and GBDT models in learning the temporal relationship between the satellite data and the winter wheat yield, giving a value of R<sup>2</sup> of 0.93. After band selection, the 30-m ZY-1 02D hyperspectral data produced better results than the 30-m multispectral Sentinel-2 data and captured 5% more of the yield variation. However, the most accurate yield estimates were obtained using the data with the highest resolution &#x2013; the 10-m Sentinel-2 data &#x2013;for which R<sup>2</sup> was 0.91. In addition, it was found that the greenness vegetation index, SR, had the greatest effect on the yield estimates, followed by the water index, NDWI. For the hyperspectral data, the combinations of visible and NIR bands were usually negatively correlated with the yield, whereas the linear combinations of narrow shortwave infrared bands were mostly positively correlated with the yield. Our results also show the strong correlation between crop management statistics and yield and suggest the combination of management statistics data and remote sensing data.</p>
<p>In future work, we will consider the application of the modeling to a larger study area and evaluate the performance of fused Sentinel-2 and ZY-1 02D data. Approaches that incorporate migration learning will also be considered.</p>
</sec>
<sec id="s5" sec-type="data-availability">
<title>Data availability statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation.</p>
</sec>
<sec id="s6" sec-type="author-contributions">
<title>Author contributions</title>
<p>The experiment were mainly conceived and designed by BZ and DP. YL and CX processed the satellite data. EC, LZ, LY, CL, XL, YC, HY, HW, RY, JH and SY performed the experiments. BZ, DP and EC analyzed the data. The algorithm development were mainly accomplished by BZ, DP and EC. EC wrote the manuscript and DP made very significant revisions. BZ helped perform the analysis with constructive discussions. All authors contributed to the article and approved the submitted version.</p>
</sec>
</body>
<back>
<sec id="s7" sec-type="funding-information">
<title>Funding</title>
<p>The authors acknowledge the support of the National Natural Science Foundation of China under Grant 42030111, 42071329 and the CCF-AFSG Research Fund.</p>
</sec>
<sec id="s8" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>Author XL and YC are employed by Aerospace ShuWei High Tech. Co., Ltd.</p>
<p>The remaining authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s9" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Acito</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Diani</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Corsini</surname> <given-names>G.</given-names>
</name>
</person-group> (<year>2022</year>). &#x201c;<article-title>PRISMA spatial resolution enhancement by fusion with sentinel-2 data</article-title>,&#x201d; in <conf-name>IEEE J. Sel. Top. Appl. Earth Obs. Remote Sens</conf-name>. <volume>15</volume>. <fpage>62</fpage>&#x2013;<lpage>79</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/JSTARS.2021.3132135</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Alain</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Bengio</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Understanding intermediate layers using linear classifier probes</article-title>. <source>Mach. Learn</source>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1610.01644</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Betbeder</surname> <given-names>J.</given-names>
</name>
<name>
<surname>R&#xe9;my</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Baup</surname> <given-names>F.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>Assimilation of LAI and dry biomass data from optical and SAR images into an agro-meteorological model to estimate soybean yield</article-title>,&#x201d; in <conf-name>IEEE J Sel Top Appl Earth Obs Remote Sens</conf-name>. <volume>9</volume>. <fpage>1</fpage>&#x2013;<lpage>14</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/JSTARS.2016.2541169</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Beyene</surname> <given-names>A. N.</given-names>
</name>
<name>
<surname>Zeng</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Gebremicael</surname> <given-names>T. G.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>M.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Coupling remote sensing and crop growth model to estimate national wheat yield in Ethiopia</article-title>. <source>Big Earth Data.</source> <volume>6</volume> (<issue>1</issue>), <fpage>18</fpage>&#x2013;<lpage>35</lpage>. doi: <pub-id pub-id-type="doi">10.1080/20964471.2020.1837529</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Breiman</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2001</year>). <article-title>Random forests</article-title>. <source>Mach. Learn.</source> <volume>45</volume>, <fpage>5</fpage>&#x2013;<lpage>32</lpage>. doi: <pub-id pub-id-type="doi">10.1023/A:1010933404324</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cai</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Guan</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Lobell</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Potgieter</surname> <given-names>A. B.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Peng</surname> <given-names>J.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>Integrating satellite and climate data to predict wheat yield in Australia using machine learning approaches</article-title>. <source>Agric. For. Meteorol.</source> <volume>274</volume>, <fpage>144</fpage>&#x2013;<lpage>159</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.agrformet.2019.03.010</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cao</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Luo</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Z.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>Wheat yield predictions at a county and field scale with deep learning, machine learning, and google earth engine</article-title>. <source>Eur. J. Agron.</source> <volume>123</volume>, <fpage>126204</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.eja.2020.126204</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cao</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Tao</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Luo</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Han</surname> <given-names>J.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>Identifying the contributions of multi-source data for winter wheat yield prediction in China</article-title>. <source>Remote Sens.</source> <volume>12</volume> (<issue>5</issue>), <elocation-id>750</elocation-id>. doi: <pub-id pub-id-type="doi">10.3390/rs12050750</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Carberry</surname> <given-names>P. S.</given-names>
</name>
<name>
<surname>Liang</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Twomlow</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Holzworth</surname> <given-names>D. P.</given-names>
</name>
<name>
<surname>Dimes</surname> <given-names>J. P.</given-names>
</name>
<name>
<surname>McClelland</surname> <given-names>T.</given-names>
</name>
<etal/>
</person-group>. (<year>2013</year>). <article-title>Scope for improved eco-efficiency varies among diverse cropping systems</article-title>. <source>PNAS.</source> <volume>10</volume> (<issue>21</issue>), <fpage>8381</fpage>&#x2013;<lpage>8386</lpage>. doi: <pub-id pub-id-type="doi">10.1073/pnas.1208050110</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>1992</year>). <article-title>National grain production forecast research</article-title>. <source>Bull. Chin. Acad. Sci.</source> <volume>4</volume>, <fpage>330</fpage>&#x2013;<lpage>333</lpage>.</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Liang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>J.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Effects of planting densities on grain filling and grain yield of uniformly sown winter wheat</article-title>. <source>Xinjiang Agric. Sci.</source> <volume>59</volume> (<issue>6</issue>), <fpage>1338</fpage>&#x2013;<lpage>1346</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.6048/j.issn.1001-4330.2022.06.005</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Cunha</surname> <given-names>R. L. F.</given-names>
</name>
<name>
<surname>Silva</surname> <given-names>B.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Estimating crop yields with remote sensing and deep learning</article-title>,&#x201d; in <conf-name>2020 IEEE Latin American GRSS &amp; ISPRS Remote Sensing Conference (LAGIRS)</conf-name> (<publisher-loc>Piscataway, NJ</publisher-loc>: <publisher-name>IEEE</publisher-name>). <fpage>59</fpage>&#x2013;<lpage>64</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.5194/isprs-annals-IV-3-W2-2020-59-2020</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Duchemin</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Maisongrande</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Boulet</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Benhadj</surname> <given-names>I.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>A simple algorithm for yield estimates: Evaluation for semi-arid irrigated winter wheat monitored with green leaf area index</article-title>. <source>Environ. Model. Software</source> <volume>23</volume>, <fpage>876</fpage>&#x2013;<lpage>892</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.envsoft.2007.10.003</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Feng</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>Processing in crop yield estimating by remote sensing in China</article-title>. <source>World Sci-Tech R&amp;D.</source> <volume>28</volume> (<issue>3</issue>), <fpage>32</fpage>&#x2013;<lpage>36</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3969/j.issn.1006-6055.2006.03.006</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fisher</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Rudin</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Dominici</surname> <given-names>F.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>All models are wrong, but many are useful: Learning a variable&#x2019;s importance by studying an entire class of prediction models simultaneously</article-title>. <source>J. Mach. Learn Res.</source> <volume>20</volume>, <fpage>1</fpage>&#x2013;<lpage>81</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1801.01489</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gao</surname> <given-names>B. C.</given-names>
</name>
</person-group> (<year>1996</year>). <article-title>NDWI a normalized difference water index for remote sensing of vegetation liquid water from space</article-title>. <source>Remote Sens. Environ.</source> <volume>58</volume>, <fpage>257</fpage>&#x2013;<lpage>266</lpage>. doi: <pub-id pub-id-type="doi">10.1016/S0034-4257(96)00067-3</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ghaemi</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Moazed</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Rafie Rafiee</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Broomand Nasab</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Determining CWSI to estimate eggplant evapotranspiration and yield under greenhouse and outdoor conditions</article-title>. <source>Iran Agric. Res.</source> <volume>34</volume>, <fpage>49</fpage>&#x2013;<lpage>60</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/JSTARS.2016.2541169</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guarin</surname> <given-names>J. R.</given-names>
</name>
<name>
<surname>Asseng</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Improving wheat production and breeding strategies using crop models</article-title>. <source>Wheat Improvement</source> <fpage>573</fpage>&#x2013;<lpage>591</lpage>. doi: <pub-id pub-id-type="doi">10.1007/978-3-030-90673-3_31</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guo</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Ren</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Zheng</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Nie</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>Y.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>Identify urban area from remote sensing image using deep learning method</article-title>. <source>IGRSS.</source>, <fpage>7407</fpage>&#x2013;<lpage>7410</lpage>. doi: <pub-id pub-id-type="doi">10.1109/IGARSS.2019.8898874</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Haider</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Naqvi</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Akram</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Umar</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Shahzad</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Sial</surname> <given-names>M.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>LSTM neural network based forecasting model for wheat production in Pakistan</article-title>. <source>Agronomy.</source> <volume>9</volume> (<issue>2</issue>), <elocation-id>72</elocation-id>. doi: <pub-id pub-id-type="doi">10.3390/agronomy9020072</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Han</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Tansey</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Tian</surname> <given-names>H.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Integrating an attention-based deep learning framework and the SAFY-V model for winter wheat yield estimation using time series SAR and optical data</article-title>. <source>Comput. Electron Agric.</source> <volume>201</volume>, <fpage>107334</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.compag.2022.107334</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>He</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>He</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Estimation of winter wheat yield based on the NOAA &#x2212; NDVI data</article-title>. <source>J. Arid Environ.</source> <volume>27</volume> (<issue>5</issue>), <fpage>46</fpage>&#x2013;<lpage>52</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.13448/j.cnki.jalre.2013.05.008</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hochreiter</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Schmidhuber</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>1997</year>). <article-title>Long short-term memory</article-title>. <source>Neural Comput.</source> <volume>9</volume>, <fpage>1735</fpage>&#x2013;<lpage>1780</lpage>. doi: <pub-id pub-id-type="doi">10.1162/neco.1997.9.8.1735</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Horler</surname> <given-names>D. N. H.</given-names>
</name>
<name>
<surname>Dockray</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Barber</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>1983</year>). <article-title>The red edge of plant leaf reflectance</article-title>. <source>Int. J. Remote Sens.</source> <volume>4</volume> (<issue>2</issue>), <fpage>273</fpage>&#x2013;<lpage>288</lpage>. doi: <pub-id pub-id-type="doi">10.1080/01431168308948546</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Feng</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>X.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Developing a dual-stream deep-learning neural network model for improving county-level winter wheat yield estimates in China</article-title>. <source>Remote Sens.</source> <volume>14</volume> (<issue>20</issue>), <elocation-id>5280</elocation-id>. doi: <pub-id pub-id-type="doi">10.3390/rs14205280</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huete</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Didan</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Miura</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Rodriguez</surname> <given-names>E. P.</given-names>
</name>
<name>
<surname>Gao</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Ferreira</surname> <given-names>L. G.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>Overview of the radiometric and biophysical performance of the MODIS vegetation indices</article-title>. <source>Remote Sens Environ.</source> <volume>83</volume>, <fpage>195</fpage>&#x2013;<lpage>213</lpage>. doi: <pub-id pub-id-type="doi">10.1016/S0034-4257(02)00096-2</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>J&#xe4;germeyr</surname> <given-names>J.</given-names>
</name>
<name>
<surname>M&#xfc;ller</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Ruane</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Elliott</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Balkovic</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Castilo</surname> <given-names>O.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>Climate impacts on global agriculture emerge earlier in new generation of climate and crop models</article-title>. <source>Nat. Food.</source> <volume>2</volume>, <fpage>1</fpage>&#x2013;<lpage>13</lpage>. doi: <pub-id pub-id-type="doi">10.1038/s43016-021-00400-y</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jiang</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Gao</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Guan</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Z.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Effects of variable nitrogen application on character of light radiation and yield in canopy of winter wheat after anthesis</article-title>. <source>Southwest China J. Agric. Sci.</source> <volume>28</volume> (<issue>1</issue>), <fpage>255</fpage>&#x2013;<lpage>259</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.16213/j.cnki.scjas.2015.01.048</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jiang</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Huete</surname> <given-names>A. R.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Linearization of NDVI based on its relationship with vegetation fraction</article-title>. <source>Photogramm. Eng. Rem. S.</source> <volume>76</volume> (<issue>8</issue>), <fpage>965</fpage>&#x2013;<lpage>975</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.14358/PERS.76.8.965</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jin</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Azzari</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Lobell</surname> <given-names>D. B.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Improving the accuracy of satellite-based high-resolution yield estimation: A test of multiple scalable approaches</article-title>. <source>Agric. For. Meteorol.</source> <volume>247</volume>, <fpage>207</fpage>&#x2013;<lpage>220</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.agrformet.2017.08.001</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jordan</surname> <given-names>C. F.</given-names>
</name>
</person-group> (<year>1969</year>). <article-title>Derivation of leaf-area index from quality of light on the forest floor</article-title>. <source>Ecology.</source> <volume>50</volume> (<issue>4</issue>), <fpage>663</fpage>&#x2013;<lpage>666</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.2307/1936256</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kamir</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Waldner</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Hochman</surname> <given-names>Z.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Estimating wheat yields in Australia using climate records, satellite image time series and machine learning methods</article-title>. <source>ISPRS J. Photogramm. Remote Sens.</source> <volume>160</volume>, <fpage>124</fpage>&#x2013;<lpage>135</lpage>.</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Khaki</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Pham</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Simultaneous corn and soybean yield prediction from remote sensing data using deep transfer learning</article-title>. <source>Sci. Rep.</source> <volume>11</volume>, <fpage>11132</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41598-021-89779-z</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kong</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Feng</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Liang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>L.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Leaf area index estimation based on UAV hyperspectral band selection</article-title>. <source>Spectrosc Spect Anal.</source> <volume>42</volume> (<issue>3</issue>), <fpage>933</fpage>&#x2013;<lpage>939</lpage>.</citation>
</ref>
<ref id="B35">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Krizhevsky</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Sutskever</surname> <given-names>I.</given-names>
</name>
<name>
<surname>Hinton</surname> <given-names>G. E.</given-names>
</name>
</person-group> (<year>2012</year>). &#x201c;<article-title>ImageNet classification with deep convolutional neural networks</article-title>,&#x201d; in <source>NIPS</source>(<publisher-loc>Lake Tahoe, Nevada, USA</publisher-loc>: <publisher-name>ACM</publisher-name>), <fpage>1097</fpage>&#x2013;<lpage>1105</lpage>.</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Launay</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Guerif</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>Assimilating remote sensing data into a crop model to improve predictive performance for spatial applications</article-title>. <source>Agric. Ecosyst. Environ.</source> <volume>111</volume>, <fpage>321</fpage>&#x2013;<lpage>339</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.agee.2005.06.005</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>LeCun</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Bengio</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Hinton</surname> <given-names>G.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Deep learning</article-title>. <source>Nature.</source> <volume>521</volume>, <fpage>436</fpage>&#x2013;<lpage>444</lpage>. doi: <pub-id pub-id-type="doi">10.1038/nature14539</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lin</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Zhong</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Jiang</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>J.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>DeepCropNet: a deep spatial-temporal learning framework for county-level corn yield estimation</article-title>. <source>Environ. Res. Lett.</source> <volume>15</volume> (<issue>3</issue>). doi: <pub-id pub-id-type="doi">10.1088/1748-9326/ab66cb</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lundberg</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>A unified approach to interpreting model predictions</article-title>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1705.07874</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lu</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Ma</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Convolution neural network feature importance analysis and feature selection enhanced model</article-title>. <source>J. Software</source> <volume>28</volume> (<issue>11</issue>), <fpage>2879</fpage>&#x2013;<lpage>2890</lpage>.</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ma</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Lin</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Yin</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Ma</surname> <given-names>Y.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Estimation of nitrogen content based on the hyperspectral vegetation indexes of interannual and multi-temporal in cotton</article-title>. <source>Agronomy</source> <volume>12</volume> (<issue>6</issue>), <elocation-id>1319</elocation-id>. doi: <pub-id pub-id-type="doi">10.3390/agronomy12061319</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Maimaitijiang</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Sagan</surname> <given-names>V.</given-names>
</name>
<name>
<surname>Sidike</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Hartling</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Esposito</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Fritschi</surname> <given-names>F. B.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Soybean yield prediction from UAV using multimodal data fusion and deep learning</article-title>. <source>Remote Sens Environ.</source> <volume>237</volume>, <fpage>111599</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.rse.2019.111599</pub-id>
</citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Marshall</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Belgiu</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Boschetti</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Pepe</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Stein</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Nelson</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Field-level crop yield estimation with PRISMA and sentinel-2</article-title>. <source>ISPRS J. Photogramm. Remote Sens.</source> <volume>187</volume>, <fpage>191</fpage>&#x2013;<lpage>210</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.isprsjprs.2022.03.008</pub-id>
</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mawani</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>C.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Coronavirus disease (COVID-19); lessons learnt from international response and advice to the Georgia government</article-title>. <source>Innovation (Camb).</source> <volume>1</volume> (<issue>2</issue>), <fpage>100025</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.xinn.2020.100025</pub-id>
</citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mitchell</surname> <given-names>R. N.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Chang'E-5 reveals the moon's secrets to a longer life</article-title>. <source>Innovation (Camb).</source> <volume>2</volume> (<issue>4</issue>), <fpage>100177</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.xinn.2021.100177</pub-id>
</citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mu</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Dang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Yuan</surname> <given-names>B.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Winter wheat yield estimation from multitemporal remote sensing images based on convolutional neural networks</article-title>. in:  <source>2019 10th International Workshop on the Analysis of Multitemporal Remote Sensing Images (MultiTemp)</source>. <fpage>1</fpage>&#x2013;<lpage>4</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/Multi-Temp.2019.8866918</pub-id>
</citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nie</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Guo</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Niu</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Wei</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Ning</surname> <given-names>T.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Effects of rotational tillage on tilth soil structure and crop yield and quality in maize-wheat cropping system</article-title>. <source>Acta Agronomica Sinica.</source> <volume>41</volume> (<issue>3</issue>), <elocation-id>468</elocation-id>. doi: <pub-id pub-id-type="doi">10.3724/SP.J.1006.2015.00468</pub-id>
</citation>
</ref>
<ref id="B48">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>O'Neal</surname> <given-names>M. R.</given-names>
</name>
<name>
<surname>Engel</surname> <given-names>B. A.</given-names>
</name>
<name>
<surname>Ess</surname> <given-names>D. R.</given-names>
</name>
<name>
<surname>Frankenberger</surname> <given-names>J. R.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>Neural network prediction of maize yield using alternative data coding algorithms</article-title>. <source>Biosyst. Eng.</source> <volume>83</volume>, <fpage>31</fpage>&#x2013;<lpage>45</lpage>. doi: <pub-id pub-id-type="doi">10.1006/bioe.2002.0098</pub-id>
</citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Peng</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>F.</given-names>
</name>
<etal/>
</person-group>. (<year>2014</year>). <article-title>Modelling paddy rice yield using MODIS data</article-title>. <source>Agric. For. Meteorol.</source> <volume>184</volume>, <fpage>107</fpage>&#x2013;<lpage>116</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.agrformet.2013.09.006</pub-id>
</citation>
</ref>
<ref id="B50">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Peng</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>F.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>County level rice yield estimation based on combination of Terra and aqua MODIS EVIs</article-title>. <source>Chin. J. Rice Science.</source> <volume>24</volume> (<issue>5</issue>), <fpage>516</fpage>&#x2013;<lpage>522</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3969/j.issn.1001&#x2014;7216.2010.05.012</pub-id>
</citation>
</ref>
<ref id="B51">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Poria</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Cambria.</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Gelbukh.</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2015</year>). &#x201c;<article-title>Deep convolutional neural network textual features and multiple kernel learning for utterance-level multimodal sentiment analysis</article-title>,&#x201d; in <source>EMNLP</source> (<publisher-loc>Lisbon, Portugal</publisher-loc>), <fpage>2539</fpage>&#x2013;<lpage>2544</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.18653/v1/D15-1303</pub-id>
</citation>
</ref>
<ref id="B52">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Qiao</surname> <given-names>M.</given-names>
</name>
<name>
<surname>He</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Cheng</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Luo</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>L.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>Crop yield prediction from multi-spectral, multi-temporal remotely sensed imagery using recurrent 3D convolutional neural networks</article-title>. <source>Int. J. Appl. Earth Obs Geoinf.</source> <volume>102</volume>, <elocation-id>102436</elocation-id>. doi: <pub-id pub-id-type="doi">10.1016/j.jag.2021.102436</pub-id>
</citation>
</ref>
<ref id="B53">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sellami</surname> <given-names>M. H.</given-names>
</name>
<name>
<surname>Albrizio</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Colovic</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Hamze</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Cantore</surname> <given-names>V.</given-names>
</name>
<name>
<surname>Todorovic</surname> <given-names>M.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Selection of hyperspectral vegetation indices for monitoring yield and physiological response in sweet maize under different water and nitrogen availability</article-title>. <source>Agronomy</source> <volume>12</volume> (<issue>2</issue>), <fpage>489</fpage>. doi: <pub-id pub-id-type="doi">10.3390/agronomy12020489</pub-id>
</citation>
</ref>
<ref id="B54">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sharma</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Rai</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Krishnan</surname> <given-names>N. C.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Wheat crop yield prediction using deep LSTM model</article-title>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.2011.01498</pub-id>
</citation>
</ref>
<ref id="B55">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Sun</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Lai</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Di</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Tao</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Shen</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2020</year>b). &#x201c;<article-title>Multilevel deep learning network for county-level corn yield estimation in the US corn belt</article-title>,&#x201d; in <conf-name>IEEE J. Sel. Top. Appl. Earth Obs. Remote Sens</conf-name>. <volume>13</volume>. <fpage>5048</fpage>&#x2013;<lpage>5060</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/JSTARS.2020.3019046</pub-id>
</citation>
</ref>
<ref id="B56">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sun</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Ren</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Xiao</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Meng</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>G.</given-names>
</name>
</person-group> (<year>2020</year>a). <article-title>Classification of coastal wetlands based on hyperspectral and multispectral fusion data of ZY-1-02D satellite</article-title>. <source>Spacecraft Engineering.</source> <volume>29</volume> (<issue>6</issue>), <fpage>162</fpage>&#x2013;<lpage>168</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3969/j.issn.1673-8748.2020.06.024</pub-id>
</citation>
</ref>
<ref id="B57">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Su</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Yan</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Support vector machine-based open crop model (SBOCM): Case of rice production in China</article-title>. <source>Saudi J. Biol. Sci.</source> <volume>24</volume>, <fpage>537</fpage>&#x2013;<lpage>547</lpage>.</citation>
</ref>
<ref id="B58">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Thorp</surname> <given-names>K. R.</given-names>
</name>
<name>
<surname>DeJonge</surname> <given-names>K. C.</given-names>
</name>
<name>
<surname>Kaleita</surname> <given-names>A. L.</given-names>
</name>
<name>
<surname>Batchelor</surname> <given-names>W. D.</given-names>
</name>
<name>
<surname>Paz</surname> <given-names>J. O. J. C.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Methodology for the use of DSSAT models for precision agriculture decision support</article-title>. <source>Comput. Electron Agric.</source> <volume>64</volume>, <fpage>276</fpage>&#x2013;<lpage>285</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2008.05.022</pub-id>
</citation>
</ref>
<ref id="B59">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tian</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Tansey</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>An LSTM neural network for improving wheat yield estimates by integrating remote sensing data and meteorological data in the guanzhong plain, PR China</article-title>. <source>Agric. For. Meteorol.</source> <volume>310</volume>, <elocation-id>108629</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.agrformet.2021.108629</pub-id>
</citation>
</ref>
<ref id="B60">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Feng</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Yin</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Winter wheat yield prediction at county level and uncertainty analysis in main wheat-producing regions of China with deep learning approaches</article-title>. <source>Remote Sens.</source> <volume>12</volume> (<issue>11</issue>), <elocation-id>1744</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/rs12111744</pub-id>
</citation>
</ref>
<ref id="B61">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Si</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Gao</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Shi</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Winter wheat yield prediction using an LSTM model from MODIS LAI products</article-title>. <source>Agriculture</source> <volume>12</volume>, <fpage>1707</fpage>. doi: <pub-id pub-id-type="doi">10.3390/agriculture12101707</pub-id>
</citation>
</ref>
<ref id="B62">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Weiss</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Jacob</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Duveiller</surname> <given-names>G.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Remote sensing for agricultural applications: A meta-review</article-title>. <source>Remote Sens Environ.</source> <volume>236</volume>, <elocation-id>111402</elocation-id>. doi: <pub-id pub-id-type="doi">10.1016/j.rse.2019.111402</pub-id>
</citation>
</ref>
<ref id="B63">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xiao</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Feng</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>C.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Development of winter wheat yield estimation models based on hyperspectral vegetation</article-title>. <source>Chin. J. Ecol.</source> <volume>41</volume> (<issue>7</issue>), <fpage>1433</fpage>&#x2013;<lpage>1440</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.13292/j.1000-4890.202207.019</pub-id>
</citation>
</ref>
<ref id="B64">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xie</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Integration of a crop growth model and deep learning methods to improve satellite-based yield estimation of winter wheat in Henan Province, China</article-title>. <source>Remote Sens.</source> <volume>13</volume>, <fpage>4372</fpage>. doi: <pub-id pub-id-type="doi">10.3390/rs13214372</pub-id>
</citation>
</ref>
<ref id="B65">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xing</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>A comparative study on estimation model for leaf area index of vegetation in marshes in honghe national nature reserve based on hyperspectral and multispectral vegetation indices</article-title>. <source>Wetland Sci.</source> <volume>11</volume> (<issue>3</issue>), <fpage>313</fpage>&#x2013;<lpage>319</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3969/j.issn.1672-5948.2013.03.002</pub-id>
</citation>
</ref>
<ref id="B66">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Nigon</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Hao</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Dias Paiao</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Fern&#xe1;ndez</surname> <given-names>F. G.</given-names>
</name>
<name>
<surname>Mulla</surname> <given-names>D.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>Estimation of corn yield based on hyperspectral imagery and convolutional neural network</article-title>. <source>Comput. Electron Agric.</source> <volume>184</volume>, <fpage>106092</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.compag.2021.106092</pub-id>
</citation>
</ref>
<ref id="B67">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>You</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Low</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Lobell</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Ermon</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Deep Gaussian process for crop yield prediction based on remote sensing data</article-title>. <source>Proc. AAAI Conf. Artif. Intell.</source> <volume>31</volume>, <fpage>4559</fpage>&#x2013;<lpage>4565</lpage>. doi: <pub-id pub-id-type="doi">10.1609/aaai.v31i1.11172</pub-id>
</citation>
</ref>
<ref id="B68">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Yu</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2022</year>). &#x201c;<article-title>A deep transfer learning method for estimating fractional vegetation cover of sentinel-2 multispectral images</article-title>,&#x201d; in <conf-name>IEEE Geosci. Remote Sens. Lett</conf-name>. <volume>19</volume>. <fpage>1</fpage>&#x2013;<lpage>5</lpage>. <pub-id pub-id-type="doi">10.1109/lgrs.2021.3125429</pub-id>
</citation>
</ref>
<ref id="B69">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Peng</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Benediktsson</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Zou</surname> <given-names>L.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>Remotely sensed big data: Evolution in model development for information extraction</article-title>. <source>Proc. IEEE.</source> <volume>107</volume> (<issue>12</issue>), <fpage>2294</fpage>&#x2013;<lpage>2301</lpage>. doi: <pub-id pub-id-type="doi">10.1109/JPROC.2019.2948454</pub-id>
</citation>
</ref>
<ref id="B70">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Chanussot</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Hong</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Yao</surname> <given-names>J.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). &#x201c;<article-title>Progress and challenges in intelligent remote sensing satellite systems</article-title>,&#x201d; in <conf-name>IEEE J. Sel. Top. Appl. Earth Obs. Remote Sens</conf-name>. <volume>15</volume>. <fpage>1814</fpage>&#x2013;<lpage>1822</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/jstars.2022.3148139</pub-id>
</citation>
</ref>
<ref id="B71">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Tang</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Remote sensing inversion of leaf area index of winter wheat based on random forest algorithm</article-title>. <source>Scientia Agricultura Sinica.</source> <volume>51</volume> (<issue>5</issue>), <fpage>855</fpage>&#x2013;<lpage>867</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3864/j.issn.0578-1752.2018.05.005</pub-id>
</citation>
</ref>
<ref id="B72">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Luo</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Cao</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Xie</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Integrating satellite-derived climatic and vegetation indices to predict smallholder maize yield using deep learning</article-title>. <source>Agric. For. Meteorol.</source> <volume>311</volume>, <elocation-id>108666</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.agrformet.2021.108666.</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>