<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Plant Sci.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Plant Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Plant Sci.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">1664-462X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpls.2026.1746869</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Near-infrared prediction of tannin content in walnut kernels using wavelet transform combined with interpretable machine learning models</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" equal-contrib="yes">
<name><surname>Xia</surname><given-names>Qiuhao</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="author-notes" rid="fn003"><sup>&#x2020;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3281240/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
</contrib>
<contrib contrib-type="author" equal-contrib="yes">
<name><surname>Luo</surname><given-names>Langqin</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<xref ref-type="author-notes" rid="fn003"><sup>&#x2020;</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Yerzati</surname><given-names>Yerhazi</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Ahmed</surname><given-names>Mian Muhammad</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Chen</surname><given-names>Yonghao</given-names></name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Wang</surname><given-names>Shiwei</given-names></name>
<xref ref-type="aff" rid="aff6"><sup>6</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Qin</surname><given-names>Jiangnan</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Chen</surname><given-names>Liping</given-names></name>
<xref ref-type="aff" rid="aff7"><sup>7</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Jin</surname><given-names>Qiang</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Guo</surname><given-names>Zhongzhong</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>*</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Zhang</surname><given-names>Rui</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>*</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>College of Horticulture and Forestry, Tarim University</institution>, <city>Alar</city>,&#xa0;<country country="cn">China</country></aff>
<aff id="aff2"><label>2</label><institution>Efficient and High-Quality Cultivation and Deep Processing Technology of Characteristic Fruit Trees in Southern Xinjiang, National Local Joint Engineering Laboratory</institution>, <city>Alar</city>,&#xa0;<country country="cn">China</country></aff>
<aff id="aff3"><label>3</label><institution>Xinjiang Production and Construction Corps, Southern Xinjiang Characteristic Forest and Fruit Technology Innovation Center</institution>, <city>Alar</city>,&#xa0;<country country="cn">China</country></aff>
<aff id="aff4"><label>4</label><institution>College of Life Science and Technology, Tarim University</institution>, <city>Alar</city>,&#xa0;<country country="cn">China</country></aff>
<aff id="aff5"><label>5</label><institution>Beijing Academy of Agricultural and Forestry Sciences Forestry Fruit Tree Research Institute</institution>, <city>Beijing</city>,&#xa0;<country country="cn">China</country></aff>
<aff id="aff6"><label>6</label><institution>School of Forestry and Landscape Architecture, Xinjiang Agricultural University</institution>, <city>Urumqi</city>,&#xa0;<country country="cn">China</country></aff>
<aff id="aff7"><label>7</label><institution>School of Information Engineering, Tarim University</institution>, <city>Aral</city>,&#xa0;<country country="cn">China</country></aff>
<author-notes>
<corresp id="c001"><label>*</label>Correspondence: Rui Zhang, <email xlink:href="mailto:zhrgsh@163.com">zhrgsh@163.com</email>; Zhongzhong Guo, <email xlink:href="mailto:742560026@qq.com">742560026@qq.com</email></corresp>
<fn fn-type="equal" id="fn003">
<label>&#x2020;</label>
<p>These authors have contributed equally to this work</p></fn>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-06">
<day>06</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="corrected" iso-8601-date="2026-02-27">
<day>27</day>
<month>02</month>
<year>2026</year></pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>17</volume>
<elocation-id>1746869</elocation-id>
<history>
<date date-type="received">
<day>17</day>
<month>11</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>16</day>
<month>01</month>
<year>2026</year>
</date>
<date date-type="rev-recd">
<day>05</day>
<month>01</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2026 Xia, Luo, Yerzati, Ahmed, Chen, Wang, Qin, Chen, Jin, Guo and Zhang.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Xia, Luo, Yerzati, Ahmed, Chen, Wang, Qin, Chen, Jin, Guo and Zhang</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-06">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>Tannin content is a key factor influencing the taste of walnuts and serves as an important index for evaluating walnut quality. Rapid and accurate detection of tannin levels in walnut kernels is therefore significant for quality assessment and management. This study aims to develop an efficient method for predicting tannin content in walnut kernels using near-infrared (NIR) spectroscopy combined with machine learning techniques.</p>
</sec>
<sec>
<title>Methods</title>
<p>A total of 180 samples of &#x2018;Wen 185&#x2019; walnut kernels were used as the research objects. The NIR reflectance spectra of the samples were measured within the range of 4000&#x2013;10000 cm&#x207b;&#xb9;. The spectral data were processed using mathematical transformations and continuous wavelet transform (CWT), both separately and in combination. Pearson correlation analysis was applied to extract characteristic bands related to tannin content. Based on these features, a random forest (RF) model was constructed to quantitatively predict tannin content. Additionally, the SHAP algorithm was employed to interpret and visualize the machine learning model.</p>
</sec>
<sec>
<title>Results</title>
<p>The results indicated that within the spectral range of 4000&#x2013;10000 cm&#x207b;&#xb9;, the NIR reflectance of walnut kernels increased with tannin content under different orchard management modes. Both first-order differential transformation and CWT, as well as their combination, significantly enhanced the correlation between spectral data and tannin content. The combination of first-order differential transformation and CWT notably improved the model's prediction performance. The optimal prediction model was achieved using the feature lg&#x2019;(1/R)_CWT_28, with training set metrics of R&#xb2; = 0.880, RMSE = 1.188, RPD = 2.904, and validation set metrics of R&#xb2; = 0.831, RMSE = 1.620, RPD = 2.459.</p>
</sec>
<sec>
<title>Discussion</title>
<p>The study demonstrates that combining mathematical transformations with wavelet transform can effectively improve the prediction accuracy of models for tannin content in walnut kernels. The RF model based on processed spectral data showed strong performance, indicating its potential for rapid and non-destructive tannin quantification. The use of SHAP algorithm further enhances model interpretability. These findings provide a valuable reference for the accurate prediction of tannin content in walnut kernels and may support quality control in walnut production and processing.</p>
</sec>
</abstract>
<kwd-group>
<kwd>continuous wavelet transform (CWT)</kwd>
<kwd>near-infrared</kwd>
<kwd>random forest (RF)</kwd>
<kwd>Shapley additive explanations (SHAP)</kwd>
<kwd>tannins</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was received for this work and/or its publication. This work was supported by the National Natural Science Foundation of China Project (No.32160689),Tianshan Talent Training Program(No.2022TSYCCX0120),Tarim University President&#x2019;s Fund Major Project Cultivation Project (No.TDZKZD202403),Walnut Full Industry Chain Innovation R&amp;D and Promotion Team -&#x201d;Three Station Chain Cooperation&#x201d; Walnut Full Industry Chain Industry University Research Development Practice (No.TDZKCX202101), Southern Xinjiang Key Industry Innovation and Development Support Plan (No.2022DB022), Guiding Science and Technology Program Project of the Xinjiang Production and Construction Corps (No.2024ZD113),Tarim University President&#x2019;s Fund Populus euphratica Talent (PhD) Project (No.TDZKBS202419).the Guiding Plan Projects of the Science and Technology Bureau of Xinjiang Production and Construction Corps (No.2023ZD102).</funding-statement>
</funding-group>
<counts>
<fig-count count="10"/>
<table-count count="1"/>
<equation-count count="3"/>
<ref-count count="36"/>
<page-count count="14"/>
<word-count count="6524"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Sustainable and Intelligent Phytoprotection</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>Walnut <italic>(Juglans</italic> regia L.) is a significant woody oil and economic tree species in China, valued for its drought tolerance and the high nutritional quality of its fruit. In particular, the southern Xinjiang region has seen extensive cultivation of walnuts, which have become a crucial component of local economic development and a primary source of income for farmers (<xref ref-type="bibr" rid="B23">Pei and Lu, 2011</xref>; <xref ref-type="bibr" rid="B24">Qu, 1980</xref>). Currently, however, most walnut products in China are either consumed directly or subjected to minimal processing. The astringency of the fruit notably affects its taste, with tannin compounds being the principal contributors to this astringency (<xref ref-type="bibr" rid="B17">Liu et&#xa0;al., 2023</xref>). Walnut tannin is a highly polymerized polyphenolic compound that interacts with salivary proteins, resulting in astringency. Research indicates that walnut tannin possesses antioxidant and antibacterial properties and may also play a role in the prevention of chronic diseases (<xref ref-type="bibr" rid="B29">Xiao et&#xa0;al., 2006</xref>; <xref ref-type="bibr" rid="B1">Aoki et&#xa0;al., 2006</xref>; <xref ref-type="bibr" rid="B13">Li et&#xa0;al., 2009</xref>).</p>
<p>Traditional methods for detecting tannins in walnuts typically include EDTA titration, the phosphomolybdic acid-sodium tungstate colorimetric method, capillary gas chromatography, and potassium ferrous hexachloride (III) spectrophotometry (<xref ref-type="bibr" rid="B9">Huang and Ni, 2022</xref>; <xref ref-type="bibr" rid="B3">Cai, 1997</xref>; <xref ref-type="bibr" rid="B7">Hernes and Hedges, 2000</xref>; <xref ref-type="bibr" rid="B30">Yang and Qu, 1989</xref>). However, these techniques are often costly, exhibit low time efficiency, and pose challenges for large-scale rapid detection. Furthermore, the use of chemical reagents during testing can endanger the health of testers, while the disposal of chemical waste can contribute to environmental pollution (<xref ref-type="bibr" rid="B20">Ma et&#xa0;al., 2022</xref>).</p>
<p>With the rapid advancement of spectroscopic technologies, near-infrared (NIR) spectroscopy has evolved from a well-established diagnostic method into a continuously innovating tool with expanding applications and significant practical value. NIR spectroscopy enables real-time, non-destructive, and dynamic monitoring of crop quality at specific spatial and temporal scales, offering considerable advantages over traditional chemical or sensory methods. As such, developing a rapid, universal, and efficient approach to predict tannin content in Juglans regia &#x2018;Wen 185&#x2019; walnuts grown in southern Xinjiang has become increasingly important for rapid quality assessment and classification. <xref ref-type="bibr" rid="B34">Zhang et&#xa0;al. (2011)</xref> established a calibration model for soluble tannin content in astringent persimmons using visible and near-infrared diffuse reflectance (Vis/NIR) spectroscopy. By applying an improved partial least squares regression (PLSR) algorithm combined with first derivative and scatter correction preprocessing, the model demonstrated superior predictive performance, highlighting the utility of Vis/NIR spectroscopy for internal quality assessment. Similarly, <xref ref-type="bibr" rid="B4">Cheng (2020)</xref> employed NIR hyperspectral imaging integrated with chemometric methods, machine learning, and deep learning techniques to rapidly classify wine grape varieties, determine their geographic origins, and predict tannin levels at different maturation stages. <xref ref-type="bibr" rid="B10">Jensen et&#xa0;al. (2008)</xref> utilized Fourier transform mid-infrared (FT-MIR) spectroscopy for the rapid quantification of various wine constituents, including tannins. However, due to overlapping spectral responses from other compounds, accurate quantification of tannins remains challenging. Their study explored four variable selection methods to identify key spectral regions relevant to tannin determination using PLSR. In another study, <xref ref-type="bibr" rid="B32">Ying et&#xa0;al. (2006)</xref> applied wavelet transform (WT) to denoise NIR spectra of 90 apple samples, exploiting the multiscale differences in the evolution of wavelet modulus maxima between singular signals and random noise, and successfully predicted sugar content via stepwise regression.NIR spectroscopy has thus been widely applied in predicting tannin content in crops such as grapes, apples, persimmons, and sorghum. However, limited studies have specifically addressed tannin quantification in walnut kernels. Existing research has predominantly focused on optimizing model performance, while relatively little attention has been given to enhancing model interpretability.</p>
<p>In this experiment, spectral information from walnut kernels was collected within the wavenumber range of 4000&#x2013;10000 cm<sup>-1</sup>.Various spectral processing methods, including mathematical transformation, wavelet transformation, and their combinations, were investigated to identify the most suitable pretreatment method for detecting tannin content in walnut kernels. Building on this foundation, a prediction model for tannin content was developed using random forests. The SHAP algorithm was applied to ascertain feature significance and facilitate internal model visualization, enabling swift tannin content detection in walnut kernels.</p>
</sec>
<sec id="s2" sec-type="materials|methods">
<label>2</label>
<title>Materials and methods</title>
<sec id="s2_1">
<label>2.1</label>
<title>Plant materials and instrumentation</title>
<p>The experimental material for this study was the &#x2018;Wen 185&#x2019; walnut selected from the walnut forest farm in Wensu County, Aksu Prefecture, Xinjiang. A total of 180 walnut samples were collected from 9 walnut orchards with varying management levels: 3 high-yield, 3 medium-yield, and 3 low-yield orchards. The walnut trees in these orchards are spaced 5 meters by 6 meters apart and are all 10 years old. Following harvest at ripeness, the walnuts underwent a process where the green skins were removed, and then they were dried in a well-ventilated environment until their moisture content reached approximately 6%. Subsequently, the walnuts were shelled, kernels extracted, crushed for 3 minutes using a FW-80 high-speed universal crusher, and thoroughly mixed. The crushed walnut kernels were then sealed in plastic bags and stored at 4 &#xb0;C for subsequent spectral scanning and determination of tannin content.</p>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Acquisition and processing of raw spectral data from walnut kernels</title>
<p>NIR spectral data were collected using a Fourier-transform near-infrared spectrometer (Antaris II, Thermo Fisher Scientific, USA). The instrument was operated at a resolution of 8 cm<sup>-1</sup> with a gain setting of 2, using the built-in background as reference. Each spectrum was obtained as the average of 32 scans. Prior to spectral acquisition, walnut samples were equilibrated under controlled environmental conditions (25&#xb0;C and 40% relative humidity) for 24 hours to ensure consistency with the instrument&#x2019;s ambient environment, thereby minimizing spectral variability.</p>
<p>The instrument was preheated for 60 minutes before measurement. Spectra were recorded over the wavenumber range of 4000-10000 cm<sup>-1</sup>. Ground walnut kernel powder was uniformly packed into quartz sample cups (30 mm diameter, 5 mm height, 1 mm wall thickness), with the sample surface leveled and aligned with the rim of the cup. Each sample was scanned three times, resulting in a total of 540 spectra for 180 samples. The final representative spectrum for each sample was obtained by averaging its three replicate scans. After each measurement, the sample cups were sequentially rinsed with tap water, distilled water, and then wiped clean with ethanol to ensure cleanliness and prevent cross-contamination.</p>
<sec id="s2_2_1">
<label>2.2.1</label>
<title>Outlier detection and removal</title>
<p>Outlier removal was performed using the Monte Carlo method (<xref ref-type="bibr" rid="B22">Meng et&#xa0;al., 2022</xref>), which is effective for identifying data points that deviate significantly from the distribution of the dataset. Such outliers may result from instrumental noise, measurement errors, or data entry mistakes. Eliminating these anomalous points is essential to enhance the accuracy and robustness of subsequent model development.</p>
</sec>
<sec id="s2_2_2">
<label>2.2.2</label>
<title>Correlation analysis</title>
<p>Selection of informative spectral features is a critical step in improving the sensitivity of NIR data to tannin content. In the preliminary phase of this study, various feature selection strategies were evaluated, including Pearson correlation analysis. The comparison indicated that spectral bands selected based on a significance threshold of p &lt;0.01 yielded superior modeling performance. Consequently, Pearson correlation analysis was conducted using MATLAB R2023a (MathWorks, USA) to assess the strength of association between each spectral band and tannin content. Spectral bands exhibiting statistically significant correlations (p&lt;0.01) were retained for model construction (<xref ref-type="bibr" rid="B21">Mao et&#xa0;al., 2023</xref>). The Pearson correlation coefficient (r), ranging from -1 to 1, quantifies the degree of linear association between variables, with larger absolute values indicating stronger correlations.</p>
</sec>
<sec id="s2_2_3">
<label>2.2.3</label>
<title>Traditional mathematical transformations</title>
<p>To evaluate the impact of different preprocessing methods on spectral feature extraction and avoid information omission, this study used a total of 11 mathematical transformations, including reciprocal transformation, logarithmic transformation, and their derivatives, for horizontal comparison. Among them, reciprocal transformation aims to compress high reflection areas to enhance low value signals, while logarithmic transformation is used to reduce dynamic range and improve spectral response linearity. Both are commonly used benchmark methods to verify the effectiveness of spectral preprocessing.</p>
</sec>
<sec id="s2_2_4">
<label>2.2.4</label>
<title>Wavelet transform processing</title>
<p>Continuous Wavelet Transform (CWT) is a time-frequency analytical method that decomposes spectral reflectance into components of different frequencies and scales, allowing the identification of subtle spectral variations across multiple resolutions. This study employed various mother wavelets&#x2014;including Bior, Morlet, Haar, and Gabor functions (<xref ref-type="bibr" rid="B14">Li X. et al., 2024</xref>), to convolve with the spectral data, thereby generating wavelet coefficients corresponding to different scales and frequency domains (<xref ref-type="bibr" rid="B5">Guan et&#xa0;al., 2024</xref>). The multiscale decomposition enabled by CWT improves feature resolution by capturing localized changes in spectral patterns while suppressing random noise, ultimately enhancing data interpretability and model performance (<xref ref-type="bibr" rid="B16">Lin et&#xa0;al., 2021</xref>). The wavelet decomposition is mathematically expressed as:</p>
<disp-formula id="eq1"><label>(1)</label>
<mml:math display="block" id="M1"><mml:mrow><mml:msub><mml:mi>&#x3c9;</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:mrow><mml:msubsup><mml:mo>&#x222b;</mml:mo><mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mi>&#x221e;</mml:mi></mml:mrow><mml:mrow><mml:mo>+</mml:mo><mml:mi>&#x221e;</mml:mi></mml:mrow></mml:msubsup><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mi>&#x3c8;</mml:mi><mml:mrow><mml:mi>a</mml:mi><mml:mo>,</mml:mo><mml:mi>b</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>j</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mi>d</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:mrow></mml:mstyle></mml:mrow></mml:math>
</disp-formula>
<p>In the formula, they represent the wavelet coefficient and reflectance of the j-th band of the i-th tannin sample, respectively; <italic>a</italic> is the scale factor ranging from 2<sup>1</sup> to 2<sup>10</sup>, <italic>b</italic> is the translation factor; <italic>&#x3c8;a,b(j)</italic> denotes the wavelet basis function.</p>
<disp-formula id="eq2"><label>(2)</label>
<mml:math display="block" id="M2"><mml:mrow><mml:msub><mml:mi>&#x3c8;</mml:mi><mml:mrow><mml:mi>a</mml:mi><mml:mo>,</mml:mo><mml:mi>b</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>j</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:msqrt><mml:mi>a</mml:mi></mml:msqrt></mml:mrow></mml:mfrac><mml:mi>&#x3c8;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:mi>j</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mi>b</mml:mi></mml:mrow><mml:mi>a</mml:mi></mml:mfrac></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math>
</disp-formula>
<p>In addition, CWT encompasses a variety of wavelet basis functions, each of which may yield different decomposition outcomes. The selection of an appropriate wavelet function and optimal decomposition scale is therefore critical for effective spectral preprocessing.To select the optimal wavelet basis function, this study preliminarily compared various commonly used wavelets, including Daubechies (db4), Symlets (sym8), Morlet (morl), Mexican hat (mexh), and Gaussian function (gaus4). Based on the comprehensive performance of feature band separation and noise suppression in pre experiments, the gaus4 wavelet was ultimately selected for subsequent analysis (<xref ref-type="bibr" rid="B14">Li et&#xa0;al., 2024</xref>).</p>
</sec>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>Determination of tannin content in walnut kernels</title>
<p>Tannin content was determined according to the Chinese agricultural industry standard NY/T 1600-2008: Determination of tannin content in fruits, vegetables, and their products&#x2014;Spectrophotometric method (<xref ref-type="bibr" rid="B11">Li et&#xa0;al., 1999</xref>). Precisely 1.00 g of ground walnut kernel sample was weighed and placed in a 100 mL volumetric flask. The sample was extracted using a boiling water bath for 30 minutes. After extraction, the mixture was cooled to room temperature and diluted to the mark with distilled water. The extract was centrifuged, and 2 mL of the supernatant was transferred into a 50 mL volumetric flask. Then, 1 mL of a sodium tungstate&#x2013;sodium molybdate reagent and sodium carbonate solution was added, and the mixture was shaken thoroughly. After standing at room temperature for 2 hours, the absorbance of the solution was measured at 765 nm using a UV&#x2013;Vis spectrophotometer.The tannin content was calculated using the following equation:</p>
<disp-formula id="eq3"><label>(3)</label>
<mml:math display="block" id="M3"><mml:mrow><mml:msub><mml:mi>X</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>C</mml:mi><mml:mo>&#xd7;</mml:mo><mml:msub><mml:mi>V</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>&#xd7;</mml:mo><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>M</mml:mi><mml:mo>&#xd7;</mml:mo><mml:mn>1000</mml:mn></mml:mrow></mml:mfrac></mml:mrow></mml:math>
</disp-formula>
<p>In the formula, X1 represents the tannin content in the sample (mg/g); <italic>C</italic> represents the gallic acid content obtained from the standard curve (mg); <italic>V1</italic> represents the volume of the sample determination solution (ml); <italic>M</italic> represents the mass of the walnut sample taken (g); <italic>N</italic> represents the dilution factor; 1000 represents the conversion coefficient.</p>
</sec>
<sec id="s2_4">
<label>2.4</label>
<title>Model construction and performance evaluation</title>
<p>With the rapid development of machine learning algorithms, numerous advanced modeling techniques have been applied to the prediction of fruit quality traits, often outperforming traditional statistical methods (<xref ref-type="bibr" rid="B25">Tan et&#xa0;al., 2023</xref>). Random forest (RF) (<xref ref-type="bibr" rid="B2">Breiman, 2001</xref>), an ensemble learning algorithm composed of multiple decision trees, exhibits robustness to multicollinearity and performs well with imbalanced or incomplete datasets (<xref ref-type="bibr" rid="B6">Guo et&#xa0;al., 2025</xref>). In this study, the dataset was randomly split into a training set and a validation set at a 6:4 ratio. Model performance was evaluated using the coefficient of determination (R<sup>2</sup>), root mean square error (RMSE), and relative percent deviation (RPD). A model with R<sup>2</sup> approaching 1 and low RMSE indicates strong predictive capability. An RPD value between 1.4 and 2.0 suggests moderate reliability suitable for estimation, while RPD &gt; 2.0 indicates a robust predictive model (<xref ref-type="bibr" rid="B14">Li et&#xa0;al., 2024</xref>).</p>
</sec>
<sec id="s2_5">
<label>2.5</label>
<title>SHAP-based feature importance analysis</title>
<p>Due to the &#x201c;black-box&#x201d; nature of many machine learning models, their internal decision-making processes are often opaque and difficult to interpret (<xref ref-type="bibr" rid="B31">Ye et&#xa0;al., 2024</xref>). The SHAP (Shapley additive explanations) algorithm addresses this issue by applying Shapley values&#x2014;originating from cooperative game theory-to decompose the output of a model into contributions from each input feature (<xref ref-type="bibr" rid="B12">Li et&#xa0;al., 2025</xref>). This allows for a more transparent understanding of the model&#x2019;s predictions and facilitates interpretability in complex systems.The experimental flowchart is shown in <xref ref-type="fig" rid="f1"><bold>Figure&#xa0;1</bold></xref>.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Experimental flowchart.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1746869-g001.tif">
<alt-text content-type="machine-generated">The experimental flowchart outlines the complete research methodology, starting from the collection of walnut samples and acquisition of their near-infrared (NIR) spectra, followed by data preprocessing (including outlier removal and spectral transformations), feature selection using correlation analysis and Continuous Wavelet Transform (CWT), the construction of a predictive Random Forest (RF) model, and concluding with model interpretation using the SHAP algorithm to explain feature importance.</alt-text>
</graphic></fig>
</sec>
<sec id="s2_6">
<label>2.6</label>
<title>Software and implementation</title>
<p>All data preprocessing, spectral feature selection, and model development were performed using MATLAB<sup>&#xae;</sup> (Version R2023a; MathWorks, 2023) and Python. Chemical and spectral mean values were calculated using Microsoft Excel<sup>&#xae;</sup> (Version 2016; Microsoft Corporation, 2016). All visualizations and figures were generated using Origin<sup>&#xae;</sup> (Version 2021; OriginLab Corporation, 2021). For model interpretation, the SHAP Python package (<xref ref-type="bibr" rid="B19">Lundberg and Lee, 2017</xref>) was employed, which is based on the Shapley additive explanations framework.</p>
</sec>
</sec>
<sec id="s3" sec-type="results">
<label>3</label>
<title>Results and analysis</title>
<sec id="s3_1">
<label>3.1</label>
<title>Analysis of tannin content in walnut kernels and removal of outliers</title>
<p>The tannin content in walnut kernels can be determined using a specific formula (<xref ref-type="disp-formula" rid="eq3">Equation 3</xref>). The results indicate significant variability in tannin content among samples, demonstrating notable distinctions and representativeness. a comparison of orchard sample data under various management models is presented in <xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2a</bold></xref>, revealing distinct differences. The average tannin content differs among orchards managed under different models, facilitating model development. <xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2b</bold></xref> illustrates the near-infrared spectral range of 4000&#x2013;10000 cm<sup>-1</sup>. The spectral curves under different management modes exhibit similar overall shapes, running roughly parallel to each other, with spectral reflectance increasing as wavelength increases.The absorption features observed in the spectra, particularly in the regions around 4000&#x2013;5000 cm<sup>-1</sup> and 7000&#x2013;9000 cm<sup>-1</sup>, are likely associated with the characteristic vibrational modes of tannin molecules. Tannins, as polyphenolic compounds, contain abundant hydroxyl (-OH) groups, whose overtone and combination bands typically appear in the NIR region. Specifically, the first overtone of O-H stretching vibrations often occurs around 7000 cm<sup>-1</sup>, while combination bands involving O-H bending and stretching vibrations may contribute to the absorption features in the 4000&#x2013;5000 cm<sup>-1</sup> range.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Tannin content data and reflectance curves under different management modes. <bold>(a)</bold> tannin content data under different management modes, <bold>(b)</bold> Reflectance curves of tannin content in walnut kernels under different management modes.The absorption features in the spectra, particularly around 4000&#x2013;5000 cm<sup>-1</sup> and 7000&#x2013;9000 cm<sup>-1</sup>, are associated with characteristic vibrational modes of tannin molecules, primarily involving O-H overtone and combination bands.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1746869-g002.tif">
<alt-text content-type="machine-generated">Panel (a) presents a comparative analysis of tannin content measured in walnut kernel samples collected from orchards under different management modes (high, medium, and low yield), highlighting the variability used for modeling. Panel (b) displays the raw NIR reflectance spectra (4000-10000 cm?&#xb9;) of all samples, showing the overall spectral shape and key absorption regions potentially associated with tannin's molecular vibrations.</alt-text>
</graphic></fig>
<p>Outliers in the dataset were identified and removed using the Monte Carlo simulation method. A total of 2000 iterations were performed. In each iteration, 60% of the samples were randomly selected as the training set, and the remaining 40% were used for validation. The training data were preprocessed using mean centering (&#x201c;center&#x201d;), and a partial least squares (PLS) regression model was constructed using 20 latent variables. The resulting regression coefficients were applied to the validation samples to obtain predicted values, and prediction errors were calculated accordingly.As shown in <xref ref-type="fig" rid="f3"><bold>Figure&#xa0;3</bold></xref>, samples that fell outside the dashed boundary lines were identified as outliers. These samples exhibited either a mean prediction error greater than the overall mean or a standard deviation exceeding the global standard deviation. The criteria for outlier elimination were set as: standard deviation &gt; 2 and mean &gt; 10. Based on these thresholds, nine samples&#x2014;No. 30, 42, 57, 69, 101, 128, 131, 133, and 161&#x2014;were identified as outliers and removed from the dataset.After eliminating these nine samples, the remaining 171 samples were retained for subsequent modeling. This outlier removal step led to a significant improvement in model performance, enhancing the reliability and stability of the predictive results.</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Removal of outliers in data.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1746869-g003.tif">
<alt-text content-type="machine-generated">This figure illustrates the process of outlier detection and removal using the Monte Carlo simulation method. Data points falling outside the defined dashed boundary lines, which indicate samples with high prediction error or high standard deviation relative to the model, were identified as statistical outliers and excluded to enhance the dataset's quality and subsequent model robustness.</alt-text>
</graphic></fig>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Spectral feature analysis of mathematical transformations</title>
<p>After applying 11 mathematical transformations to the raw reflectance spectra (R), including reciprocal (1/R), logarithmic (log R), reciprocal-logarithmic (log(1/R)), as well as first- and second-order derivatives, significant differences in spectral reflectance patterns were observed (<xref ref-type="fig" rid="f4"><bold>Figure&#xa0;4</bold></xref>). As shown in <xref ref-type="fig" rid="f4"><bold>Figure&#xa0;4a</bold></xref>, the original spectra are relatively smooth, lacking prominent peaks or absorption valleys, making it difficult to extract subtle features associated with walnut kernel tannin content. <xref ref-type="fig" rid="f4"><bold>Figures&#xa0;4b-d</bold></xref> display the results of the 1/R, log R, and log(1/R) transformations. Although the spectral shapes remain smooth, there are still no clearly distinguishable peaks or valleys. These transformations are designed to compress high reflectance values or balance the distribution of reflectance intensities, thereby enhancing blended spectral features. However, they do not effectively amplify small differences between adjacent wavelengths, limiting their ability to highlight features relevant to tannin concentration.In contrast, <xref ref-type="fig" rid="f4"><bold>Figures&#xa0;4e-h</bold></xref> demonstrate the effect of the first derivative transformation, which significantly improves spectral sensitivity to tannins. This is achieved by computing the rate of change between adjacent wavelengths, resulting in sharper spectral variation and an increased number of peaks and valleys. The first derivative effectively emphasizes local variations, inflection points, and abrupt changes, while suppressing low-frequency noise and reducing the impact of spectral overlap. However, this method is also more susceptible to high-frequency noise, which may reduce model robustness (<xref ref-type="bibr" rid="B14">Li et&#xa0;al., 2024</xref>; <xref ref-type="bibr" rid="B18">Liu et&#xa0;al., 2024</xref>).<xref ref-type="fig" rid="f4"><bold>Figures&#xa0;4i-l</bold></xref> illustrate the spectral changes after applying the second derivative transformation. Compared to the first derivative, the number of spectral peaks and valleys is further increased. This occurs because the second derivative reflects the curvature rate of change in the spectral profile, which smooths flatter regions and removes weaker spectral information, thereby retaining only the most prominent features. While this transformation enhances key features, it also amplifies noise and may cause loss of useful bands, leading to reduced model stability (<xref ref-type="bibr" rid="B27">Wang et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B36">Zhong et&#xa0;al., 2023</xref>). Overall, although both derivative methods improve feature extraction, excessive noise sensitivity in the second derivative makes the first derivative more suitable for further modeling.</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>The original spectral reflectance (R) and the average reflectance of spectra processed by 11 mathematical transformations. <bold>(a)</bold> R; <bold>(b)</bold> 1/R; <bold>(c)</bold> lgR; <bold>(d)</bold> lg(1/R); <bold>(e)</bold> R&#x2019;; <bold>(f)</bold> (1/R)&#x2019;; <bold>(g)</bold> lg&#x2019;R; <bold>(h)</bold> lg&#x2019;(1/R); <bold>(i)</bold> R&#x2019;&#x2019;; <bold>(j)</bold> (1/R)&#x2019;&#x2019;; <bold>(k)</bold> lg&#x2019;&#x2019;R; <bold>(l)</bold> lg&#x2019;&#x2019;(1/R).</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1746869-g004.tif">
<alt-text content-type="machine-generated">This composite figure shows the average spectral reflectance curves resulting from 11 different mathematical transformations applied to the original NIR data (R), including reciprocal (1/R), logarithmic (lgR, lg(1/R)), and their first and second derivatives. It visually compares the effects of these preprocessing techniques on the spectral profiles.</alt-text>
</graphic></fig>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Extraction of characteristic wavelengths</title>
<p>The large number of wavelength variables in the original spectral data may introduce redundancy, which can impair modeling efficiency and increase computational load during subsequent analysis. Therefore, effective selection of characteristic wavelengths is essential for building efficient and accurate predictive models.Pearson correlation analysis was performed using MATLAB R2023a to assess the relationships between the measured tannin content and spectral reflectance values from both the raw spectra (R) and its 11 mathematically transformed forms. Spectral variables that passed a significance threshold of <italic>P</italic> &lt; 0.01 and had correlation coefficients exceeding the critical value (|<italic>r</italic>| &gt; 0.123) were retained as characteristic wavelengths.As shown in <xref ref-type="fig" rid="f5"><bold>Figure&#xa0;5</bold></xref>, the correlation coefficient (<italic>r</italic>) reflects the strength and direction of the relationship between individual wavelengths and measured tannin content. The color gradient in the figure indicates the magnitude of correlation: darker tones represent stronger positive or negative associations, while lighter tones indicate weaker correlations.</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>Correlation between Spectral Characteristic Bands and Tannin Content in Walnut Kernel.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1746869-g005.tif">
<alt-text content-type="machine-generated">A heatmap visualization of the Pearson correlation coefficients (r) between each individual NIR wavelength and the measured tannin content. The color gradient (from dark to light) represents the strength and direction (positive or negative) of the linear relationship, aiding in the identification of spectrally informative regions correlated with tannin levels.</alt-text>
</graphic></fig>
<p><xref ref-type="table" rid="T1"><bold>Table&#xa0;1</bold></xref> summarizes the number of characteristic wavelengths identified under the original spectrum and various mathematically transformed spectra, along with the maximum, minimum, and mean values of their positive and negative Pearson correlation coefficients. Compared to the original reflectance spectrum (R), transformations such as 1/R, log R (lgR), and log(1/R) increased the number of selected wavelengths but did not yield significant improvements in correlation strength.In contrast, derivative-based preprocessing significantly enhanced the correlation between spectral variables and tannin content in walnut kernels. Specifically, under first derivative transformations, spectra such as R&#x2032;, (1/R)&#x2032;, lg&#x2032;R, and lg&#x2032;(1/R) produced a moderate number of characteristic wavelengths but showed substantially improved correlations, with maximum absolute <italic>r</italic> values of 0.399, 0.392, 0.424, and 0.386, respectively.Second derivative transformations also improved correlation levels, particularly for lg&#x2033;(1/R) and lg&#x2033;R, which reached correlation values as high as &#xb1;0.400. Taking both the number of selected wavelengths and the correlation strength into consideration, the mean absolute correlation coefficient (|<italic>r</italic>|) was used as the evaluation criterion. As a result, R&#x2032;, lg&#x2032;R, and lg&#x2032;(1/R) were selected as the most effective mathematical preprocessing methods for subsequent model development.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Statistics of the number of characteristic bands and correlation values.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Transformation</th>
<th valign="middle" align="center">NFB</th>
<th valign="middle" align="center">Positive correlation extremum</th>
<th valign="middle" align="center">Negative correlation extremum</th>
<th valign="middle" align="center">|r|</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">R</td>
<td valign="middle" align="center">33</td>
<td valign="middle" align="center">0.106</td>
<td valign="middle" align="center">-0.127</td>
<td valign="middle" align="center">0.041</td>
</tr>
<tr>
<td valign="middle" align="center">1/R</td>
<td valign="middle" align="center">155</td>
<td valign="middle" align="center">0.162</td>
<td valign="middle" align="center">-0.120</td>
<td valign="middle" align="center">0.056</td>
</tr>
<tr>
<td valign="middle" align="center">lgR</td>
<td valign="middle" align="center">79</td>
<td valign="middle" align="center">0.113</td>
<td valign="middle" align="center">-0.145</td>
<td valign="middle" align="center">0.048</td>
</tr>
<tr>
<td valign="middle" align="center">lg(1/R)</td>
<td valign="middle" align="center">79</td>
<td valign="middle" align="center">0.145</td>
<td valign="middle" align="center">-0.113</td>
<td valign="middle" align="center">0.048</td>
</tr>
<tr>
<td valign="middle" align="center">R&#x2019;</td>
<td valign="middle" align="center">981</td>
<td valign="middle" align="center">0.399</td>
<td valign="middle" align="center">-0.367</td>
<td valign="middle" align="center">0.172</td>
</tr>
<tr>
<td valign="middle" align="center">(1/R)&#x2019;</td>
<td valign="middle" align="center">877</td>
<td valign="middle" align="center">0.392</td>
<td valign="middle" align="center">-0.435</td>
<td valign="middle" align="center">0.160</td>
</tr>
<tr>
<td valign="middle" align="center">lg&#x2019;R</td>
<td valign="middle" align="center">1115</td>
<td valign="middle" align="center">0.424</td>
<td valign="middle" align="center">-0.386</td>
<td valign="middle" align="center">0.193</td>
</tr>
<tr>
<td valign="middle" align="center">lg&#x2019;(1/R)</td>
<td valign="middle" align="center">1115</td>
<td valign="middle" align="center">0.386</td>
<td valign="middle" align="center">-0.424</td>
<td valign="middle" align="center">0.193</td>
</tr>
<tr>
<td valign="middle" align="center">R&#x2019;&#x2019;</td>
<td valign="middle" align="center">607</td>
<td valign="middle" align="center">0.395</td>
<td valign="middle" align="center">-0.440</td>
<td valign="middle" align="center">0.120</td>
</tr>
<tr>
<td valign="middle" align="center">(1/R)&#x2019;&#x2019;</td>
<td valign="middle" align="center">339</td>
<td valign="middle" align="center">0.324</td>
<td valign="middle" align="center">-0.381</td>
<td valign="middle" align="center">0.085</td>
</tr>
<tr>
<td valign="middle" align="center">lg&#x2019;&#x2019;R</td>
<td valign="middle" align="center">671</td>
<td valign="middle" align="center">0.400</td>
<td valign="middle" align="center">-0.348</td>
<td valign="middle" align="center">0.117</td>
</tr>
<tr>
<td valign="middle" align="center">lg&#x2019;&#x2019;(1/R)</td>
<td valign="middle" align="center">671</td>
<td valign="middle" align="center">0.348</td>
<td valign="middle" align="center">-0.400</td>
<td valign="middle" align="center">0.117</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Research indicates that the wavelet transform offers significant advantages over traditional mathematical transformations in spectral processing. To investigate these advantages, continuous wavelet transform (CWT) analyses were conducted on R, R&#x2019;, lg&#x2019;R, and lg&#x2019;(1/R) (<xref ref-type="disp-formula" rid="eq1">Equations 1</xref> and <xref ref-type="disp-formula" rid="eq2">2</xref>), denoted as R_CWT, R&#x2019;_CWT, lg&#x2019;R_CWT, and lg&#x2019;(1/R)_CWT, respectively. R was utilized to represent the correlation between the wavelet coefficients and tannin levels in walnut kernels. As illustrated in <xref ref-type="fig" rid="f6"><bold>Figure&#xa0;6</bold></xref>, CWT processing resulted in an overall increase in the correlation between the spectral data and walnut tannin. The correlation exhibited a trend of initially increasing and then decreasing from scale 2<sup>1</sup> to 2<sup>10</sup>. Notably, at scale 2<sup>4</sup>, R_CWT and lg&#x2019;R_CWT reached their maximum correlation values of 0.446 and 0.430, respectively. The maximum value for R&#x2019;_CWT at scale 2<sup>5</sup> was 0.448, while the maximum R for lg&#x2019;(1/R)_CWT at scale 2<sup>7</sup> was 0.388. As the decomposition scale increased, the number of characteristic bands across the four CWT treatments generally exhibited an upward trend (<xref ref-type="fig" rid="f6"><bold>Figure&#xa0;6</bold></xref>).</p>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>Correlation between Wavelet Coefficients and Tannins under Four Different Continuous Wavelet Transform Processing Methods. <bold>(a)</bold> R_CWT; <bold>(b)</bold> R&#x2019;_CWT; <bold>(c)</bold> lg&#x2019;R_CWT; <bold>(d)</bold> lg&#x2019;(1/R)_CWT.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1746869-g006.tif">
<alt-text content-type="machine-generated">This set of four subplots depicts the correlation between wavelet coefficients (derived via CWT at scales 2&#xb9; to 2&#xb9;&#xb0;) and tannin content for four differently preprocessed spectral inputs: R_CWT, R'_CWT, lgR_CWT, and lg'(1/R)_CWT. It demonstrates how CWT processing enhances and modulates the correlation across different decomposition scales.</alt-text>
</graphic></fig>
<p>As shown in <xref ref-type="fig" rid="f7"><bold>Figure&#xa0;7</bold></xref>, the mean absolute Pearson correlation coefficient (|<italic>r</italic>|) for R_CWT and R&#x2032;_CWT reached their respective maximum values at scale 2<sup>9</sup>, with values of 0.178 and 0.208. For lg&#x2032;R_CWT and lg&#x2032;(1/R)_CWT, the highest mean |<italic>r</italic>| values (both 0.240) were observed at scale 2<sup>8</sup>.Considering both the number of characteristic wavelengths and the strength of their correlations, scale 2<sup>9</sup> was selected as the optimal decomposition scale for R_CWT and R&#x2032;_CWT, while scale 2<sup>8</sup> was determined to be optimal for lg&#x2032;R_CWT and lg&#x2032;(1/R)_CWT.In subsequent tannin prediction modeling, wavelet coefficients extracted from these four spectral forms at their respective optimal scales will be used as independent variables for model construction.</p>
<fig id="f7" position="float">
<label>Figure&#xa0;7</label>
<caption>
<p>Extreme positive and negative correlations between wavelet coefficients and tannins, as well as the number of characteristic bands, under four different CWT processing methods. <bold>(a)</bold> R_CWT; <bold>(b)</bold> R&#x2019;_CWT; <bold>(c)</bold> lg&#x2019;R_CWT; <bold>(d)</bold> lg&#x2019;(1/R)_CWT.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1746869-g007.tif">
<alt-text content-type="machine-generated">The figure presents detailed statistics for the four CWT methods, plotting the maximum positive and negative correlation values, as well as the number of selected characteristic bands, across the ten decomposition scales. This analysis was crucial for determining the optimal scale (e.g., 28 or 2?) for each CWT-preprocessed dataset to maximize feature relevance.</alt-text>
</graphic></fig>
</sec>
<sec id="s3_4">
<label>3.4</label>
<title>Sample set partitioning</title>
<p>Prior to model construction, the full dataset was randomly divided into training and validation sets at a ratio of 6:4. A total of 102 samples were randomly assigned to the training set for model development, while the remaining 69 samples were used as the validation set for performance evaluation. As shown in <xref ref-type="fig" rid="f8"><bold>Figure&#xa0;8</bold></xref>, presents violin plots of tannin content for the three datasets.the tannin content across all samples ranged from 4.73 to 20.17 mg/g. The distribution patterns of tannin content were generally consistent among the full dataset, training set, and validation set. The outer contours reflect kernel density estimates, with wider sections indicating larger sample concentrations.and illustrates the distribution of standard deviation, median, and coefficient of variation (CV) across the datasets.The mean tannin content of the full dataset was 13.17 mg/g, while the training and validation sets had mean values of 13.06 mg/g and 13.33 mg/g, respectively. The mean, median, and standard deviation of tannin content were comparable across the three sets. Moreover, the full dataset exhibited CV and mean values intermediate between those of the training and validation sets, indicating that the partitioning was statistically balanced and representative. These results support the suitability of the sample division for constructing a robust and generalizable prediction model for walnut kernel tannin content.</p>
<fig id="f8" position="float">
<label>Figure&#xa0;8</label>
<caption>
<p>Descriptive statistical characteristics of tannins in each sample set.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1746869-g008.tif">
<alt-text content-type="machine-generated">Panel (a) uses violin plots to compare the distribution of tannin content across the full dataset, the training set, and the validation set, confirming their statistical similarity. Panel (b) provides key descriptive statistics (mean, median, standard deviation, coefficient of variation) for these sets, validating that the 6:4 random split produced balanced and representative subsets for modeling.</alt-text>
</graphic></fig>
</sec>
<sec id="s3_5">
<label>3.5</label>
<title>Model construction and accuracy evaluation</title>
<p>In this study, random forest (RF) models were developed to predict tannin content in walnut kernels using both full-spectrum data and selected characteristic wavelengths derived from various spectral preprocessing techniques. The spectral variables served as independent variables, while measured tannin content was used as the dependent variable.To optimize RF model performance, key hyperparameters were systematically tuned. Specifically, the number of decision trees was set to 200, the minimum leaf size was fixed at 10, Bayesian optimization was performed with 50 iterations, and five-fold cross-validation was used to ensure predictive accuracy and model generalizability. The modeling results are presented in <xref ref-type="fig" rid="f9"><bold>Figure&#xa0;9</bold></xref>.The results showed notable differences in model performance between full-spectrum and characteristic wavelength inputs under different spectral preprocessing strategies. Overall, models based on selected characteristic wavelengths outperformed those using full-spectrum data, particularly in terms of the coefficient of determination (R<sup>2</sup>). Most models based on characteristic wavelengths achieved R<sup>2</sup> values above 0.70, whereas nearly half of the full-spectrum models showed signs of overfitting.This indicates that selecting informative wavelengths can effectively eliminate irrelevant spectral information, thereby enhancing model performance. Moreover, under the same preprocessing method, the difference in R<sup>2</sup> values between the training and validation sets was smaller when using characteristic wavelengths, further demonstrating improved model stability.Although full-spectrum models exhibited slightly better RMSE and RPD values in the training set, their RPD values differed substantially between training and validation sets&#x2014;with a maximum difference of 0.928&#x2014;suggesting weaker generalization capability. In contrast, the RPD values of characteristic wavelength models in the validation set were typically above 2.2, indicating higher predictive accuracy and better robustness across different preprocessing methods. Nearly half of the full-spectrum models had RPD values below 1.4, further underscoring their limited stability.In summary, feature wavelength selection reduced spectral redundancy and improved model robustness. While full-spectrum models performed slightly better in certain metrics, models based on characteristic wavelengths offered superior comprehensive performance in terms of accuracy, stability, and computational efficiency, making them more suitable for tannin estimation.Notably, models constructed using first derivative spectra combined with continuous wavelet transform (CWT) achieved the highest prediction accuracy and stability. This highlights that integrating first derivative preprocessing with CWT significantly enhances the model&#x2019;s ability to predict tannin content and offers a reliable strategy for improving spectral model performance.</p>
<fig id="f9" position="float">
<label>Figure&#xa0;9</label>
<caption>
<p>Results of Training and Validation Sets for Walnut Tannin Estimation Model. <bold>(a)</bold> R full band; <bold>(b)</bold> R&#x2019;full band; <bold>(c)</bold> lg&#x2019;R full band; <bold>(d)</bold> Lg&#x2019;(1/R) full band; <bold>(e)</bold> R_CWT2<sup>9</sup> full band; <bold>(f)</bold> R&#x2019;_CWT_2<sup>9</sup> full band; <bold>(g)</bold> lg&#x2019;R_CWT_2<sup>8</sup> full band; <bold>(h)</bold> lg&#x2019;(1/R)_CWT_2<sup>8</sup> full band; <bold>(i)</bold> R characteristic band; <bold>(j)</bold> R&#x2019;characteristic band; <bold>(k)</bold> lg&#x2019;R characteristic band; <bold>(l)</bold> lg&#x2019;(1/R) characteristic band; <bold>(m)</bold> R_CWT2<sup>9</sup> characteristic band; <bold>(n)</bold> R&#x2019;_CWT_2<sup>9</sup> characteristic band; <bold>(o)</bold> lg&#x2019;R_CWT-2<sup>8</sup> characteristic band; <bold>(p)</bold> lg&#x2019;(1/R)_CWT_2<sup>8</sup> characteristic band.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1746869-g009.tif">
<alt-text content-type="machine-generated">A comprehensive bar chart comparing the performance (R&#xb2;, RMSE, RPD) of Random Forest models built using full-spectrum data versus models using only selected characteristic wavelengths, across multiple spectral preprocessing methods. It clearly demonstrates the superior accuracy and stability of models based on feature-wavelength selection, with the combined first-derivative and CWT method yielding the best results.</alt-text>
</graphic></fig>
</sec>
<sec id="s3_6">
<label>3.6</label>
<title>SHAP-based interpretation of the RF model</title>
<p>Due to the inherent &#x201c;black-box&#x201d; nature of machine learning algorithms, assessing the influence of input features plays a crucial role in model interpretation and optimization (<xref ref-type="bibr" rid="B31">Ye et&#xa0;al., 2024</xref>). To identify the most influential spectral variables and explain their contributions to the model&#x2019;s predictions, the SHAP algorithm was employed. Visualization of model interpretability was performed using the SHAP library in Python.Specifically, SHAP values were used to evaluate the contribution of each selected wavelength in the best-performing RF prediction model. As shown in <xref ref-type="fig" rid="f10"><bold>Figure&#xa0;10A</bold></xref>, the top 10 most important features are visualized in a SHAP beeswarm plot. The features are ranked in ascending order based on their cumulative contribution to the model. It is evident that the most influential spectral regions are located within the 4000-4999 cm<sup>-1</sup> and 7000-8999 cm<sup>-1</sup> ranges. The horizontal axis represents SHAP values, while the vertical axis displays the features ranked by their overall impact. <xref ref-type="fig" rid="f10"><bold>Figure&#xa0;10B</bold></xref> presents a bar plot of the mean absolute SHAP values for all features, indicating the average contribution of each variable across all predictions. Features with higher SHAP values contributed more significantly to the model&#x2019;s output. <xref ref-type="fig" rid="f10"><bold>Figure&#xa0;10C</bold></xref> shows waterfall plots for two randomly selected samples, illustrating the contribution of individual features to the prediction result. Positive SHAP values indicate an increase in the predicted tannin content, whereas negative values suggest a decrease. The vertical axis ranks the features by cumulative SHAP impact. Blue bars represent features that reduced the prediction, while red bars represent features that increased the prediction.</p>
<fig id="f10" position="float">
<label>Figure&#xa0;10</label>
<caption>
<p>Explains the feature interpretation of the random forest model using the SHAP algorithm. <bold>(A)</bold> List the top 10 important features of the bee colony graph, with red data points indicating high SHAP values and blue data points indicating low SHAP values. <bold>(B)</bold> SHAP algorithm summary chart, representing the mean SHAP value of each feature. <bold>(C)</bold> SHAP algorithm waterfall diagram,randomly select 2 samples for analysis;a:lg&#x2019;(1/R)_CWT_2<sup>8</sup> full band;b:lg&#x2019;(1/R)_CWT_2<sup>8</sup> characteristic band.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1746869-g010.tif">
<alt-text content-type="machine-generated">This figure utilizes SHAP (Shapley Additive Explanations) to interpret the best-performing RF model. Panel (A) is a beeswarm plot ranking the top 10 most influential features/wavelengths. Panel (B) is a summary bar plot of mean absolute SHAP values for all features. Panel (C) shows waterfall plots for two random samples, detailing how each feature contributed to pushing the final prediction above or below the baseline value.</alt-text>
</graphic></fig>
</sec>
</sec>
<sec id="s4" sec-type="discussion">
<label>4</label>
<title>Discussion</title>
<p>This study investigated the effects of various spectral preprocessing techniques&#x2014;including mathematical transformations, continuous wavelet transform (CWT), and their combinations&#x2014;on enhancing spectral sensitivity and improving the predictive accuracy for tannin content. The results demonstrated that reciprocal, logarithmic, and reciprocal-logarithmic transformations did not significantly improve the correlation between spectral variables and tannin content. In contrast, the first derivative transformation markedly enhanced correlations by amplifying subtle variations within the spectral data. These findings align with previous reports by Guo Yanping et&#xa0;al. (2025), <xref ref-type="bibr" rid="B14">Li et&#xa0;al. (2024)</xref>; <xref ref-type="bibr" rid="B35">Zhang et&#xa0;al. (2023)</xref>, confirming the efficacy of first derivative processing in spectral pretreatment.</p>
<p>However, compared to wavelet-based methods, conventional mathematical transformations were less effective in suppressing high-frequency noise and managing complex background interference. CWT demonstrated distinct advantages in dimensionality reduction, noise suppression, and feature enhancement. Decomposing the original spectra via CWT led to notable improvements in both model sensitivity and stability, outperforming traditional mathematical preprocessing. This observation is consistent with the work of <xref ref-type="bibr" rid="B8">Hu et&#xa0;al. (2025)</xref>, who demonstrated that CWT effectively extracted spectral features related to rice leaf SPAD values using a BPNN model based on bior3.3 wavelets. Similarly, <xref ref-type="bibr" rid="B26">Wang et&#xa0;al. (2022)</xref> reported that small-scale CWT significantly improved the estimation of nitrogen content in tea leaves, reducing the number of input variables by 99.34% and increasing model accuracy by 11% compared to conventional preprocessing methods.</p>
<p>The characteristic wavelengths identified in this study, notably those within the 4000-5000 cm<sup>-1</sup> and 7000-9000 cm<sup>-1</sup> ranges, correspond to known NIR absorption regions for phenolic compounds. The former region is often associated with combination bands involving O-H and C-O vibrations, while the latter is typically linked to the first overtone of O-H stretching. These assignments are consistent with the chemical structure of tannins, which are rich in hydroxyl and aromatic moieties. The strong correlation between these spectral features and tannin content underscores the physicochemical plausibility of the selected wavelengths and supports the robustness of the developed prediction model.</p>
<p>Overall, the integration of wavelet decomposition, particularly CWT, enhanced both model accuracy and robustness. Among the preprocessing strategies, the combination of the first derivative and CWT proved especially effective. This synergy likely stems from the first derivative&#x2019;s capacity to capture fine-scale spectral variations, which are subsequently refined and enhanced through the multi-resolution decomposition provided by CWT (<xref ref-type="bibr" rid="B33">Yumiti&#xb7; and Wang, 2022</xref>). Consequently, this combined approach yielded superior predictive performance compared to standalone mathematical or wavelet transformations. Furthermore, CWT exhibited high computational efficiency and sensitivity in detecting abrupt changes and localized features within high-dimensional spectral data. By preserving both low- and high-frequency information, CWT contributed to improved modeling performance relative to conventional spectral transformation algorithms, as also evidenced in prior studies (<xref ref-type="bibr" rid="B33">Yumiti&#xb7; and Wang, 2022</xref>; <xref ref-type="bibr" rid="B28">Wang et&#xa0;al., 2014</xref>).In the data preprocessing stage, outlier detection and feature selection are based on all samples, and the training set and verification set are not strictly distinguished. Although this practice is common in spectral studies of limited samples (<xref ref-type="bibr" rid="B10">Jensen et&#xa0;al., 2008</xref>; <xref ref-type="bibr" rid="B32">Ying et&#xa0;al., 2006</xref>), it may introduce a certain risk of information leakage in theory, resulting in optimistic model verification results. Future research can adopt more rigorous nested cross validation or completely independent validation set design to further improve the generalization ability and reliability of the model.</p>
<p>While the random forest (RF) algorithm was selected for this study due to its established robustness, interpretability, and strong performance in similar spectral applications, we acknowledge that other machine learning approaches&#x2014;such as gradient boosting, support vector machines, and deep learning architectures&#x2014;may offer distinct advantages for spectral modeling. Future comparative studies that systematically incorporate a broader range of algorithms could further refine prediction accuracy for tannin content and provide deeper insights for model selection in this field.</p>
<p>In summary, CWT outperformed traditional mathematical transformations in strengthening the correlation between spectral data and tannin content and in enhancing model accuracy. Moreover, combining mathematical transformations with wavelet processing optimized the spectral pretreatment pipeline, leading to improved predictive performance. The application of diverse preprocessing methods for NIR-based tannin estimation establishes a solid foundation for monitoring walnut tannins and opens new avenues for remote sensing-based quantitative trait analysis in agriculture. Future research could integrate optimized spectral transformation techniques with advanced machine learning algorithms and satellite remote sensing data to enable regional-scale monitoring of walnut tannin content, thereby advancing the application of remote sensing technologies in fruit crop research.</p>
</sec>
<sec id="s5" sec-type="conclusions">
<label>5</label>
<title>Conclusion</title>
<p>This study used &#x2018;Wen 185&#x2019; walnut kernels as the research material and applied multiple preprocessing strategies&#x2014;including mathematical transformations, continuous wavelet transform (CWT), and their combination&#x2014;to enhance spectral data. Based on these preprocessing results, random forest (RF) models were constructed to quantitatively predict tannin content. The first derivative transformation, CWT, and the combination of first derivative with CWT all improved the correlation between spectral data and measured tannin content. Among them, the combination of first derivative and CWT yielded the best model performance.The most effective prediction model was constructed using the characteristic wavelengths of lg&#x2032;(1/R)_CWT at scale 2<sup>8</sup>, achieving R&#xb2; values of 0.880 and 0.831 for the training and validation sets, respectively; RMSE values of 1.188 and 1.620; and RPD values of 2.904 and 2.459. These results indicate strong predictive accuracy and robustness.Furthermore, the SHAP algorithm was employed to visualize feature importance and model interpretability. The analysis confirmed that the RF model effectively captured the key wavelengths contributing to tannin prediction, offering a reliable, interpretable approach for estimating tannin content in walnuts.</p>
</sec>
</body>
<back>
<sec id="s6" sec-type="data-availability">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material. Further inquiries can be directed to the corresponding author.</p></sec>
<sec id="s7" sec-type="author-contributions">
<title>Author contributions</title>
<p>QX: Software, Methodology, Writing &#x2013; original draft. LL: Data curation, Writing &#x2013; review &amp; editing. YY: Visualization, Investigation, Writing &#x2013; review &amp; editing. MA: Supervision, Writing &#x2013; review &amp; editing. YC: Resources, Supervision, Writing &#x2013; review &amp; editing, Conceptualization. SW: Resources, Writing &#x2013; review &amp; editing, Conceptualization, Supervision. JQ: Resources, Conceptualization, Supervision, Writing &#x2013; review &amp; editing. LC: Supervision, Writing &#x2013; review &amp; editing, Resources. QJ: Writing &#x2013; review &amp; editing, Resources, Supervision. ZG: Supervision, Writing &#x2013; review &amp; editing, Conceptualization, Resources, Funding acquisition. RZ: Writing &#x2013; review &amp; editing.</p></sec>
<sec id="s9" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p></sec>
<sec id="s10" sec-type="correction-statement">
<title>Correction note</title>
<p>A correction has been made to this article. Details can be found at: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fpls.2026.1806058">10.3389/fpls.2026.1806058</ext-link>.</p></sec>
<sec id="s11" sec-type="ai-statement">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p></sec>
<sec id="s12" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p></sec>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Aoki</surname> <given-names>H.</given-names></name>
<name><surname>Kimura</surname> <given-names>K.</given-names></name>
<name><surname>Igarashi</surname> <given-names>K.</given-names></name>
<etal/>
</person-group>. (<year>2006</year>). 
<article-title>Soy protein suppresses gene expression of acetyl-CoA carboxylase alpha from promoter PI in rat liver</article-title>. <source>Biosci. Biotechnol. Biochem.</source> <volume>70</volume>, <fpage>843</fpage>&#x2013;<lpage>849</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1271/bbb.70.843</pub-id>, PMID: <pub-id pub-id-type="pmid">16636450</pub-id>
</mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Breiman</surname> <given-names>L.</given-names></name>
</person-group> (<year>2001</year>). 
<article-title>Random forests</article-title>. <source>Mach. Learn.</source> <volume>45</volume>, <fpage>5</fpage>&#x2013;<lpage>32</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1023/A:1010933404324</pub-id>, PMID: <pub-id pub-id-type="pmid">40797221</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Cai</surname> <given-names>J. J.</given-names></name>
</person-group> (<year>1997</year>). 
<article-title>Determination of tannins in fruits using o-phenanthroline colorimetry</article-title>. <source>Tianjin. Chem. Ind.</source> <volume>11</volume>, <fpage>40</fpage>&#x2013;<lpage>41</lpage>. https://doi.org/CNKI: SUN : TJHG.0.1997-03-013.
</mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name><surname>Cheng</surname> <given-names>Y. L.</given-names></name>
</person-group> (<year>2020</year>). <source>Classification and tannin content detection of wine grapes based on near-infrared hyperspectral imaging</source> (
<publisher-name>Northwest A&amp;F University</publisher-name>). doi:&#xa0;<pub-id pub-id-type="doi">10.27409/d.cnki.gxbnu.2020.001300</pub-id>
</mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Guan</surname> <given-names>C.</given-names></name>
<name><surname>Liu</surname> <given-names>M. Y.</given-names></name>
<name><surname>Man</surname> <given-names>W. D.</given-names></name>
<name><surname>Zhang</surname> <given-names>Y. B.</given-names></name>
<name><surname>Zhang</surname> <given-names>Q. W</given-names></name>
</person-group>. (<year>2024</year>). 
<article-title>Estimation of Spartina alterniflora leaf chlorophyll content based on continuous wavelet and random forest algorithm</article-title>. <source>Spectrosc. Spectr. Anal.</source> <volume>44</volume>, <fpage>2993</fpage>&#x2013;<lpage>3000</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3964/j.issn.1000-0593(2024)10-2993-08</pub-id>, PMID: <pub-id pub-id-type="pmid">41747345</pub-id>
</mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Guo</surname> <given-names>Y. P.</given-names></name>
<name><surname>Wang</surname> <given-names>X. M.</given-names></name>
<name><surname>Zhao</surname> <given-names>F.</given-names></name>
<name><surname>Li</surname> <given-names>P. P</given-names></name>
</person-group>. (<year>2025</year>). 
<article-title>Hyperspectral inversion of soil salinity in oasis tillage layer based on optimal mathematics and wavelet transform</article-title>. <source>Trans. Chin. Soc. Agric. Eng.</source> <volume>41</volume>, <fpage>83</fpage>&#x2013;<lpage>93</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.11975/j.issn.1002-6819.202407184</pub-id>
</mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Hernes</surname> <given-names>P. J.</given-names></name>
<name><surname>Hedges</surname> <given-names>J. I.</given-names></name>
</person-group> (<year>2000</year>). 
<article-title>Determination of condensed tannin monomers in environmental samples by capillary gas chromatography of acid depolymerization extracts</article-title>. <source>Anal. Chem.</source> <volume>72</volume>, <fpage>5115</fpage>&#x2013;<lpage>5124</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1021/ac991301y</pub-id>, PMID: <pub-id pub-id-type="pmid">11055736</pub-id>
</mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Hu</surname> <given-names>W. R.</given-names></name>
<name><surname>Gao</surname> <given-names>Q. W.</given-names></name>
<name><surname>Yang</surname> <given-names>H. B.</given-names></name>
<name><surname>Gao</surname> <given-names>Z. Q</given-names></name>
</person-group>. (<year>2025</year>). 
<article-title>Estimation of SPAD value in rice based on CWT and BP neural network</article-title>. <source>Shandong. Agric. Sci.</source> <volume>57</volume>, <fpage>154</fpage>&#x2013;<lpage>162</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.14083/j.issn.1001-4942.2025.04.019</pub-id>
</mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Huang</surname> <given-names>C. F.</given-names></name>
<name><surname>Ni</surname> <given-names>Y. N.</given-names></name>
</person-group> (<year>2002</year>). 
<article-title>Determination of tannin in food by spectrophotometry</article-title>. <source>J. Nanchang. Univ. (Nat. Sci).</source> <volume>26</volume>, <fpage>243</fpage>&#x2013;<lpage>246</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3969/j.issn.1006-0464.2002.03.012</pub-id>, PMID: <pub-id pub-id-type="pmid">35900448</pub-id>
</mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Jensen</surname> <given-names>J. S.</given-names></name>
<name><surname>Egebo</surname> <given-names>M.</given-names></name>
<name><surname>Meyer</surname> <given-names>A. S.</given-names></name>
</person-group> (<year>2008</year>). 
<article-title>Identification of spectral regions for quantification of red wine tannins with Fourier transform mid-infrared spectroscopy</article-title>. <source>J. Agric. Food Chem.</source> <volume>56</volume>, <fpage>3493</fpage>&#x2013;<lpage>3499</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1021/jf703573f</pub-id>, PMID: <pub-id pub-id-type="pmid">18442247</pub-id>
</mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Li</surname> <given-names>H.</given-names></name>
<name><surname>Ainival</surname> <given-names>A.</given-names></name>
<name><surname>Ahmat</surname> <given-names>M.</given-names></name>
</person-group> (<year>1999</year>). 
<article-title>Rapid determination method of oil iodine value</article-title>. <source>Fine. Chem.</source>, <fpage>26</fpage>&#x2013;<lpage>28</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3321/j.issn:1003-5214.1999.03.009</pub-id>, PMID: <pub-id pub-id-type="pmid">30704229</pub-id>
</mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Li</surname> <given-names>Q.</given-names></name>
<name><surname>Chen</surname> <given-names>S.</given-names></name>
<name><surname>Han</surname> <given-names>J.</given-names></name>
<name><surname>Li</surname> <given-names>B.</given-names></name>
<name><surname>Wu</surname> <given-names>L.</given-names></name>
<name><surname>Li</surname> <given-names>J</given-names></name>
<etal/>
</person-group>. (<year>2025</year>). 
<article-title>Unraveling almonds deterioration using whole-cell biosensor coupled with machine learning approaches and SHAP interpretation</article-title>. <source>Food Chem.</source> <volume>484</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.foodchem.2025.144392</pub-id>, PMID: <pub-id pub-id-type="pmid">40286707</pub-id>
</mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Li</surname> <given-names>M.</given-names></name>
<name><surname>Liu</surname> <given-names>Y.</given-names></name>
<name><surname>Sun</surname> <given-names>C.</given-names></name>
<name><surname>Meng</surname> <given-names>Y. N.</given-names></name>
<name><surname>Yang</surname> <given-names>K. Q.</given-names></name>
<name><surname>Hou</surname> <given-names>L. Q</given-names></name>
<etal/>
</person-group>. (<year>2009</year>). 
<article-title>Progress in research on nutritional value of walnut</article-title>. <source>Cereal Oil J. China</source> <volume>24</volume>, <fpage>166</fpage>&#x2013;<lpage>170</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.20048/j.cnki.issn.1003-0174.2009.06.036</pub-id>
</mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Li</surname> <given-names>X.</given-names></name>
<name><surname>Zhang</surname> <given-names>Y. B.</given-names></name>
<name><surname>Liu</surname> <given-names>M. Y.</given-names></name>
<name><surname>Man</surname> <given-names>W. D.</given-names></name>
<name><surname>Kong</surname> <given-names>D. K.</given-names></name>
<name><surname>Song</surname> <given-names>L. J</given-names></name>
<etal/>
</person-group> (<year>2024</year>). 
<article-title>Prediction of nitrogen content in wolfberry leaves using hyperspectral reflectance</article-title>. <source>Ningxia. J. Agric. For. Sci. Technol.</source> <volume>65</volume>, <fpage>48</fpage>&#x2013;<lpage>54</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3969/j.issn.1002-204x.2024.06.011</pub-id>, PMID: <pub-id pub-id-type="pmid">35900448</pub-id>
</mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Li</surname> <given-names>Y. M.</given-names></name>
<name><surname>Zhang</surname> <given-names>L. G.</given-names></name>
<name><surname>Zhang</surname> <given-names>P. C.</given-names></name>
</person-group>. (<year>2024</year>). 
<article-title>Prediction of nitrogen content in wolfberry leaves using hyperspectral reflectance</article-title>. <source>Ningxia. J. Agric. For. Sci. Technol.</source> <volume>65</volume>, <fpage>48</fpage>&#x2013;<lpage>54</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3969/j.issn.1002-204x.2024.06.011</pub-id>, PMID: <pub-id pub-id-type="pmid">35900448</pub-id>
</mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Lin</surname> <given-names>D.</given-names></name>
<name><surname>Li</surname> <given-names>G.</given-names></name>
<name><surname>Zhu</surname> <given-names>Y.</given-names></name>
<name><surname>Liu</surname> <given-names>H.</given-names></name>
<name><surname>Jiao</surname> <given-names>Q</given-names></name>
</person-group>. (<year>2021</year>). 
<article-title>Predicting copper content in chicory leaves using hyperspectral data with continuous wavelet transforms and partial least squares</article-title>. <source>Comput. Electron. Agric.</source> <volume>187</volume>, <elocation-id>106293</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2021.106293</pub-id>, PMID: <pub-id pub-id-type="pmid">41743167</pub-id>
</mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Liu</surname> <given-names>J.</given-names></name>
<name><surname>Li</surname> <given-names>Y.</given-names></name>
<name><surname>Liu</surname> <given-names>J. L.</given-names></name>
<name><surname>Wang</surname> <given-names>Y. J.</given-names></name>
<name><surname>Yu</surname> <given-names>Q. X</given-names></name>
</person-group>. (<year>2023</year>). 
<article-title>Preliminary establishment of an astringency evaluation system of walnut based on tannin content</article-title>. <source>Anhui. Agric. Sci. Bull.</source> <volume>51</volume>, <fpage>190</fpage>&#x2013;<lpage>192</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3969/j.issn.0517-6611.2023.09.045</pub-id>, PMID: <pub-id pub-id-type="pmid">35900448</pub-id>
</mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Liu</surname> <given-names>T.</given-names></name>
<name><surname>Wang</surname> <given-names>W. Q.</given-names></name>
<name><surname>Li</surname> <given-names>Z. M.</given-names></name>
<name><surname>Qi</surname> <given-names>Y.</given-names></name>
<name><surname>Guo</surname> <given-names>Z. H.</given-names></name>
<name><surname>Xu</surname> <given-names>T. Y</given-names></name>
<etal/>
</person-group>. 
<article-title>Prediction of nitrogen content in rice leaves based on DWT-DE transformation and AHAELM algorithm</article-title>. <source>Trans. Chin. Soc. Agric. Mach.</source>, <fpage>1</fpage>&#x2013;<lpage>11</lpage>.
</mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Lundberg</surname> <given-names>S.</given-names></name>
<name><surname>Lee</surname> <given-names>S. I. A.</given-names></name>
</person-group> (<year>2017</year>). 
<article-title>Unified approach to interpreting model predictions</article-title>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1705.07874</pub-id>, PMID: <pub-id pub-id-type="pmid">41363103</pub-id>
</mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Ma</surname> <given-names>X. T.</given-names></name>
<name><surname>Luo</surname> <given-names>H. P.</given-names></name>
<name><surname>Gao</surname> <given-names>F.</given-names></name>
<name><surname>Wang</surname> <given-names>C.</given-names></name>
<name><surname>X</surname></name>
</person-group>. (<year>2022</year>). 
<article-title>Research and application of near-infrared spectroscopy in apple detection</article-title>. <source>J. Food Saf. Qual.</source> <volume>13</volume>, <fpage>4219</fpage>&#x2013;<lpage>4227</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.19812/j.cnki.jfsq11-5956/ts.2022.13.048</pub-id>
</mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Mao</surname> <given-names>J. H.</given-names></name>
<name><surname>Zhao</surname> <given-names>H. Q.</given-names></name>
<name><surname>Jin</surname> <given-names>Q.</given-names></name>
<name><surname>Wang</surname> <given-names>X. F.</given-names></name>
<name><surname>Miao</surname> <given-names>Q. F.</given-names></name>
<name><surname>Wang</surname> <given-names>P</given-names></name>
<etal/>
</person-group>. (<year>2023</year>). 
<article-title>Comparison of hyperspectral inversion methods for heavy metal content in soil of lead-zinc tailings area in Hebei</article-title>. <source>Trans. Chin. Soc. Agric. Eng.</source> <volume>39</volume>, <fpage>144</fpage>&#x2013;<lpage>156</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.11975/j.issn.1002-6819.202307092</pub-id>
</mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Meng</surname> <given-names>L.</given-names></name>
<name><surname>Zhang</surname> <given-names>J.</given-names></name>
<name><surname>Yang</surname> <given-names>T.</given-names></name>
<name><surname>Wu</surname> <given-names>L. G</given-names></name>
</person-group>. (<year>2022</year>). 
<article-title>Visualization of chlorophyll content in tomato leaves based on hyperspectral imaging</article-title>. <source>Hubei. Agric. Sci.</source> <volume>61</volume>, <fpage>171</fpage>&#x2013;<lpage>177</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.14088/j.cnki.issn0439-8114.2022.14.031</pub-id>
</mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name><surname>Pei</surname> <given-names>D.</given-names></name>
<name><surname>Lu</surname> <given-names>X. Z.</given-names></name>
</person-group> (<year>2011</year>). <source>Chinese Walnut Germplasm Resources</source> (<publisher-loc>Beijing</publisher-loc>: 
<publisher-name>China Forestry Publishing House</publisher-name>).
</mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name><surname>Qu</surname> <given-names>Z. Z.</given-names></name>
</person-group> (<year>1980</year>). <source>Pomology: Special Lectures on Fruit Tree Cultivation</source> Vol. <volume>321</volume> (<publisher-loc>Beijing</publisher-loc>: 
<publisher-name>Agricultural Publishing House</publisher-name>).
</mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Tan</surname> <given-names>Y. X.</given-names></name>
<name><surname>Tian</surname> <given-names>Y. C.</given-names></name>
<name><surname>Huang</surname> <given-names>Z. M.</given-names></name>
<name><surname>Zhang</surname> <given-names>Q.</given-names></name>
<name><surname>Tao</surname> <given-names>J.</given-names></name>
<name><surname>Liu</surname> <given-names>H. X</given-names></name>
<etal/>
</person-group>. (<year>2023</year>). 
<article-title>Aboveground biomass inversion of Kandelia obovata mangrove in Maowei Sea, Beibu Gulf based on XGBoost algorithm</article-title>. <source>Acta Ecol. Sin.</source> <volume>43</volume>, <fpage>4674</fpage>&#x2013;<lpage>4688</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.5846/stxb202201140141</pub-id>
</mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>F.</given-names></name>
<name><surname>Chen</surname> <given-names>L. Y.</given-names></name>
<name><surname>Duan</surname> <given-names>D. D.</given-names></name>
<name><surname>Cao</surname> <given-names>Q.</given-names></name>
<name><surname>Zhao</surname> <given-names>Y.</given-names></name>
<name><surname>Lan</surname> <given-names>W</given-names></name>
<etal/>
</person-group>. (<year>2022</year>). 
<article-title>Hyperspectral monitoring of total nitrogen content in fresh tea leaves based on wavelet analysis</article-title>. <source>Spectrosc. Spectr. Anal.</source> <volume>42</volume>, <fpage>3235</fpage>&#x2013;<lpage>3242</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3964/j.issn.1000-0593(2022)10-3235-08</pub-id>, PMID: <pub-id pub-id-type="pmid">41747333</pub-id>
</mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>Y. C.</given-names></name>
<name><surname>Li</surname> <given-names>X. F.</given-names></name>
<name><surname>Li</surname> <given-names>L. J.</given-names></name>
<name><surname>Li</surname> <given-names>N.</given-names></name>
<name><surname>Jiang</surname> <given-names>Q. N.</given-names></name>
<name><surname>Gu</surname> <given-names>X. H</given-names></name>
<etal/>
</person-group>. (<year>2023</year>). 
<article-title>Quantitative inversion of chlorophyll content in pitaya stems and leaves based on discrete wavelet&#x2013;differential transform algorithm</article-title>. <source>Spectrosc. Spectr. Anal.</source> <volume>43</volume>, <fpage>549</fpage>&#x2013;<lpage>556</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3964/j.issn.1000-0593(2023)02-0549-08</pub-id>, PMID: <pub-id pub-id-type="pmid">41747344</pub-id>
</mixed-citation>
</ref>
<ref id="B28">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>Y. C.</given-names></name>
<name><surname>Yang</surname> <given-names>G. J.</given-names></name>
<name><surname>Zhu</surname> <given-names>J. S.</given-names></name>
<name><surname>Gu</surname> <given-names>X. H.</given-names></name>
<name><surname>Xu</surname> <given-names>P.</given-names></name>
<name><surname>Liao</surname> <given-names>Q. H</given-names></name>
<etal/>
</person-group>. (<year>2014</year>). 
<article-title>Estimation of organic matter content in northern meadow soils based on wavelet transform and PLS coupled model</article-title>. <source>Spectrosc. Spectr. Anal.</source> <volume>34</volume>, <fpage>1922</fpage>&#x2013;<lpage>1926</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3964/j.issn.1000-0593(2014)07-1922-05</pub-id>, PMID: <pub-id pub-id-type="pmid">41747314</pub-id>
</mixed-citation>
</ref>
<ref id="B29">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Xiao</surname> <given-names>C.</given-names></name>
<name><surname>Wood</surname> <given-names>C.</given-names></name>
<name><surname>Huang</surname> <given-names>W. X.</given-names></name>
<name><surname>L' Abb&#xe9;</surname> <given-names>M.</given-names></name>
<name><surname>Sarwar</surname> <given-names>G.</given-names></name>
<name><surname>Cooke</surname> <given-names>G</given-names></name>
<etal/>
</person-group>. (<year>2006</year>). 
<article-title>Tissue-specific regulation of acetyl-CoA carboxylase gene expression by dietary soy protein isolate in rats</article-title>. <source>Br. J. Nutr.</source> <volume>95</volume>, <fpage>1048</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1079/BJN20061776</pub-id>, PMID: <pub-id pub-id-type="pmid">16768825</pub-id>
</mixed-citation>
</ref>
<ref id="B30">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Yang</surname> <given-names>W.</given-names></name>
<name><surname>Qu</surname> <given-names>X. J.</given-names></name>
</person-group> (<year>1989</year>). 
<article-title>Determination of tannin in hops by potassium ferricyanide absorbance method</article-title>. <source>J. Shandong. Agric. Univ.</source> <volume>20</volume>, <fpage>36</fpage>&#x2013;<lpage>40</lpage>. https://doi.org/CNKI: SUN : SCHO.0.1989-02-006.
</mixed-citation>
</ref>
<ref id="B31">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Ye</surname> <given-names>M.</given-names></name>
<name><surname>Zhu</surname> <given-names>L.</given-names></name>
<name><surname>Liu</surname> <given-names>X. D.</given-names></name>
<name><surname>Huang</surname> <given-names>Y.</given-names></name>
<name><surname>Chen</surname> <given-names>P. P.</given-names></name>
<name><surname>Li</surname> <given-names>H</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>Hyperspectral inversion of soil organic matter content based on CWT, SHAP, and XGBoost</article-title>. <source>Environ. Sci.</source> <volume>45</volume>, <fpage>2280</fpage>&#x2013;<lpage>2291</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.13227/j.hjkx.202304100</pub-id>
</mixed-citation>
</ref>
<ref id="B32">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Ying</surname> <given-names>Y. B.</given-names></name>
<name><surname>Liu</surname> <given-names>Y. D.</given-names></name>
<name><surname>Fu</surname> <given-names>X. P.</given-names></name>
</person-group> (<year>2006</year>). 
<article-title>Sugar content prediction of apple using near-infrared spectroscopy treated by wavelet transform</article-title>. <source>Spectrosc. Spectr. Anal.</source> <volume>26</volume>, <fpage>63</fpage>&#x2013;<lpage>66</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/S1003-6326(06)60040-X</pub-id>, PMID: <pub-id pub-id-type="pmid">41737640</pub-id>
</mixed-citation>
</ref>
<ref id="B33">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Yumiti&#xb7;</surname> <given-names>M.</given-names></name>
<name><surname>Wang</surname> <given-names>X. M.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>Estimation of soil organic matter content based on continuous wavelet transform</article-title>. <source>Spectrosc. Spectr. Anal.</source> <volume>42</volume>, <fpage>1278</fpage>&#x2013;<lpage>1284</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3964/j.issn.1000-0593(2022)04-1278-07</pub-id>, PMID: <pub-id pub-id-type="pmid">41747333</pub-id>
</mixed-citation>
</ref>
<ref id="B34">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhang</surname> <given-names>P.</given-names></name>
<name><surname>Li</surname> <given-names>J. K.</given-names></name>
<name><surname>Meng</surname> <given-names>X. J.</given-names></name>
<name><surname>Zhang</surname> <given-names>P.</given-names></name>
<name><surname>Feng</surname> <given-names>X. Y.</given-names></name>
<name><surname>Wang</surname> <given-names>B. G</given-names></name>
<etal/>
</person-group>. (<year>2011</year>). 
<article-title>Research on nondestructive measurement of soluble tannin content in astringent persimmon using Vis&#x2013;NIR diffuse reflectance spectroscopy</article-title>. <source>Spectrosc. Spectr. Anal.</source> <volume>31</volume>, <elocation-id>951</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3964/j.issn.1000-0593(2011)04-0951-04</pub-id>, PMID: <pub-id pub-id-type="pmid">41747314</pub-id>
</mixed-citation>
</ref>
<ref id="B35">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhang</surname> <given-names>X. Q.</given-names></name>
<name><surname>Li</surname> <given-names>Z. W.</given-names></name>
<name><surname>Zheng</surname> <given-names>D. C.</given-names></name>
<name><surname>Song</surname> <given-names>H. Y.</given-names></name>
<name><surname>Wang</surname> <given-names>G. L</given-names></name>
</person-group>. (<year>2023</year>). 
<article-title>Prediction of brown soil organic matter based on visible&#x2013;near-infrared hyperspectral stacking generalization model</article-title>. <source>Spectrosc. Spectr. Anal.</source> <volume>43</volume>, <fpage>903</fpage>&#x2013;<lpage>910</lpage>.
</mixed-citation>
</ref>
<ref id="B36">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhong</surname> <given-names>L.</given-names></name>
<name><surname>Qian</surname> <given-names>J. W.</given-names></name>
<name><surname>Chu</surname> <given-names>X. Y.</given-names></name>
<name><surname>Qian</surname> <given-names>Z. H.</given-names></name>
<name><surname>Wang</surname> <given-names>M.</given-names></name>
<name><surname>Li</surname> <given-names>J. L</given-names></name>
<etal/>
</person-group>. (<year>2023</year>). 
<article-title>Monitoring heavy metal pollution in wheat soil using hyperspectral remote sensing</article-title>. <source>Trans. Chin. Soc. Agric. Eng.</source> <volume>39</volume>, <fpage>265</fpage>&#x2013;<lpage>270</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.11975/j.issn.1002-6819.202207160</pub-id>
</mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn id="n1" fn-type="custom" custom-type="edited-by">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1665650">Zheli Wang</ext-link>, Hebei University of Economics and Business, China</p></fn>
<fn id="n2" fn-type="custom" custom-type="reviewed-by">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/743896">Jaime Cuevas</ext-link>, University of Quintana Roo, Mexico</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3296032">Zhu Zhou</ext-link>, Zhejiang Agriculture and Forestry University, China</p></fn>
</fn-group>
</back>
</article>