<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Plant Sci.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Plant Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Plant Sci.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">1664-462X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpls.2026.1756218</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Enabling rapid and accurate grand discrimination of flue-cured tobacco: a near-infrared hyperspectral and machine learning approach</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" equal-contrib="yes">
<name><surname>Zou</surname><given-names>Jiang</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn003"><sup>&#x2020;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3297451/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author" equal-contrib="yes">
<name><surname>Gao</surname><given-names>Hongbo</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn003"><sup>&#x2020;</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Wang</surname><given-names>Duo</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Chen</surname><given-names>Yunquan</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3298001/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Deng</surname><given-names>Shiyou</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Shi</surname><given-names>Nuo</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2846889/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Yang</surname><given-names>Shengjie</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Huang</surname><given-names>Chunlin</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Zi</surname><given-names>Dingchun</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3020203/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Du</surname><given-names>Yu</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project-administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Bai</surname><given-names>Yuxiang</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Wang</surname><given-names>Na</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Wang</surname><given-names>Ge</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/1344774/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project-administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Liu</surname><given-names>Zhengling</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>*</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Zhang</surname><given-names>Junhua</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>*</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2064091/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Zhou</surname><given-names>Peng</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>*</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3085709/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>Kunming University of Science and Technology</institution>, <city>Kunming</city>, <state>Yunnan</state>,&#xa0;<country country="cn">China</country></aff>
<aff id="aff2"><label>2</label><institution>Yunnan Agricultural University</institution>, <city>Kunming</city>, <state>Yunnan</state>,&#xa0;<country country="cn">China</country></aff>
<aff id="aff3"><label>3</label><institution>Kunming Branch of Yunnan Tobacco Company</institution>, <city>Kunming</city>, <state>Yunnan</state>,&#xa0;<country country="cn">China</country></aff>
<author-notes>
<corresp id="c001"><label>*</label>Correspondence: Zhengling Liu, <email xlink:href="mailto:542811538@qq.com">542811538@qq.com</email>; Junhua Zhang, <email xlink:href="mailto:zhangjh@kust.edu.cn">zhangjh@kust.edu.cn</email>; Peng Zhou, <email xlink:href="mailto:zhoupeng@ynau.edu.cn">zhoupeng@ynau.edu.cn</email></corresp>
<fn fn-type="equal" id="fn003">
<p>&#x2020;These authors have contributed equally to this work</p></fn>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-24">
<day>24</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>17</volume>
<elocation-id>1756218</elocation-id>
<history>
<date date-type="received">
<day>28</day>
<month>11</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>31</day>
<month>01</month>
<year>2026</year>
</date>
<date date-type="rev-recd">
<day>28</day>
<month>01</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2026 Zou, Gao, Wang, Chen, Deng, Shi, Yang, Huang, Zi, Du, Bai, Wang, Wang, Liu, Zhang and Zhou.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Zou, Gao, Wang, Chen, Deng, Shi, Yang, Huang, Zi, Du, Bai, Wang, Wang, Liu, Zhang and Zhou</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-24">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<p>To address the inefficiency and subjectivity of manual grading, this study established a machine learning model based on near-infrared hyperspectral data (950&#x2013;1650 nm) for the accurate classification of first-roasted tobacco grades. Multivariate statistical analysis uncovered the intrinsic correlations among grade, spectral data, and chemical composition, thereby laying a theoretical foundation for hyperspectral-based grading technology. Three preprocessing methods (namely, multiplicative scatter correction (MSC), standard normal variate transformation, and Savitzky&#x2013;Golay convolutional smoothing) and four classification models (namely, random forest, backpropagation neural network, extreme learning machine, and partial least squares&#x2013;discriminant analysis (PLS-DA)) were employed. Moreover, characteristic bands were selected through the successive projections algorithm (SPA) and competitive adaptive reweighted sampling to investigate how the number of characteristic bands affects the grade classification accuracy. The results showed that rank exhibited highly significant correlations with nicotine, reducing sugars, total sugars, and sugar-nicotine ratio, and that spectra exhibited highly significant correlations with nicotine. The classification accuracy of full-band MSC preprocessing combined with the PLS-DA model reached 98.5%, while the classification accuracy reached 94.0% when using 70% of the full bands selected using the SPA. In conclusion, near-infrared hyperspectroscopy combined with machine learning not only offers high efficiency, accuracy, and non-destructiveness in the grading of first-roasted tobacco leaves but also provides a theoretical basis for industrial hyperspectral grading by elucidating the correlations among spectrum, chemical composition, and grade. This method avoids the subjectivity of manual grading and offers key technical support to advance the intelligence and automation of first-roasted tobacco leaf grading in the tobacco industry.</p>
</abstract>
<kwd-group>
<kwd>characteristic bands</kwd>
<kwd>chemical analysis</kwd>
<kwd>machine learning</kwd>
<kwd>mantel test correlation analysis</kwd>
<kwd>near-infrared hyper spectroscopy</kwd>
<kwd>tobacco leaf grading</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was received for this work and/or its publication. Research on the Science and Technology Plan Project of Kunming Branch of Yunnan Provincial Tobacco Company (KMYC202303).</funding-statement>
</funding-group>
<counts>
<fig-count count="10"/>
<table-count count="4"/>
<equation-count count="6"/>
<ref-count count="103"/>
<page-count count="18"/>
<word-count count="11208"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Technical Advances in Plant Science</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>Tobacco is one of the most important cash crops worldwide, with an annual production exceeding 6.66 million tons. China alone produces more than 2 million tons annually, making it one of the world&#x2019;s leading tobacco producers (<xref ref-type="bibr" rid="B14">Food and Agriculture Organization of the United Nations, 2025</xref>). In China, according to GB 2635&#x2013;1992, first-roasted tobacco is classified into 42 grades based on maturity, leaf structure, oil content, color intensity, and other appearance traits. The grade of a first-roasted tobacco leaf determines its purchase price and industrial application, which directly influences the economic efficiency of the tobacco industry and the income of tobacco farmers (<xref ref-type="bibr" rid="B46">Marzan and Ruiz, 2019</xref>). There is no obvious difference in the appearance of similar grades of first-roasted tobacco, yet there is a substantial difference in their purchase price. For example, according to the 2024 purchase prices issued by the China Tobacco Monopoly Bureau, grade B1F was priced at 45.9 yuan/kg, while grade B2F was priced at 38.3 yuan/kg. Due to differences in the physical properties and intrinsic chemical compositions of various tobacco grades, it is necessary to classify raw materials by grade to meet the requirements of different tobacco products (<xref ref-type="bibr" rid="B42">Lu et&#xa0;al., 2021b</xref>). High-grade tobacco has higher mechanical strength, more balanced chemical composition, and better flavor, making it suitable for use as the primary raw material. In contrast, low-grade tobacco is mostly used as a filler. The misclassification of first-roasted tobacco grades may result in fluctuations or a decline in cigarette quality. Therefore, the accurate assessment of first-roasted tobacco leaf grades is of great importance.</p>
<p>Traditional tobacco grading mainly relies on sensory evaluation. Grading personnel require extensive training to master the relevant skills, and skilled evaluators are relatively scarce. Moreover, the accuracy of grading can be influenced by the experience of the personnel, environmental conditions, physical factors, and other variables. Online, real-time, and rapid tobacco grading during tobacco acquisition and processing is essential for adapting to large-scale production in the tobacco industry while reducing the labor intensity for workers (<xref ref-type="bibr" rid="B40">Lu et&#xa0;al., 2023a</xref>; <xref ref-type="bibr" rid="B89">Xin et&#xa0;al., 2023</xref>). Thus, the development of grading strategies for first-roasted tobacco leaves that do not rely on sensory evaluation has become a prominent research topic in the tobacco industry. The first method establishes decision rules based on fuzzy mathematics. For example, <xref ref-type="bibr" rid="B99">Zhang and Zhang, 2011</xref> applied digital image processing with fuzzy set theory for the automatic classification of tobacco leaves. The second approach employs computer vision techniques supported by machine learning algorithms, including both traditional machine learning and deep learning methods (<xref ref-type="bibr" rid="B38">Lou and Zhang, 2018</xref>; <xref ref-type="bibr" rid="B20">Harjoko et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B46">Marzan and Ruiz, 2019</xref>; <xref ref-type="bibr" rid="B33">Li et&#xa0;al., 2021</xref>). <xref ref-type="bibr" rid="B66">Setiawan and Purnama, 2020</xref> used the DarkNet19 algorithm to classify tobacco leaf images as healthy, curly, or hollow. However, both methods have certain limitations. Decision rule methods based on fuzzy mathematics require a class recognition model established using a large number of sample images. The mathematical derivation of this method is complex, and the image acquisition process is susceptible to interference, resulting in low classification accuracy and efficiency. However, computer vision techniques based on machine learning rely on high-quality, accurately labeled image data. The data acquisition equipment is susceptible to environmental conditions, leading to high annotation costs. Moreover, due to the minimal differences in appearance between tobacco leaves from adjacent producing areas and of similar grades, computer vision technology faces considerable challenges in achieving accurate classification, thereby imposing certain limitations on both methods (<xref ref-type="bibr" rid="B55">Niu et&#xa0;al., 2022</xref>).</p>
<p>Spectroscopic techniques are more accurate and environmentally adaptable than fuzzy mode and machine vision techniques. Furthermore, using spectroscopic techniques, we can obtain spectral curves that reflect the structural characteristics of the tobacco leaf as well as chemical indices and internal structural information closely related to leaf quality (<xref ref-type="bibr" rid="B96">Zhang et&#xa0;al., 2023</xref>), providing more comprehensive information about the samples. Due to the advantages of machine learning in generalization capability, computational speed, and the handling of high-dimensional data, the combination of spectroscopy with machine learning has yielded encouraging results in tobacco classification (<xref ref-type="bibr" rid="B51">Moreira et&#xa0;al., 2009</xref>; <xref ref-type="bibr" rid="B32">Li et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B58">Qin et&#xa0;al., 2023</xref>). However, traditional spectroscopic methods (e.g., multispectral technology and near-infrared spectroscopy) have a limited number of bands, and interference with a key band can compromise the entire analysis. When the sample composition or morphology is highly similar, traditional spectroscopy struggles to differentiate samples due to the smaller number of bands and low-dimensional features. Hyperspectral methods avoid these drawbacks due to their extremely high spectral resolution and continuous spectral band coverage, making them a superior alternative in various applications (<xref ref-type="bibr" rid="B22">Hu et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B87">Wu et&#xa0;al., 2025</xref>).</p>
<p>Hyperspectral techniques, which integrate image and spectral information, have been applied in tobacco research (including maturity detection, chemical composition analysis, and pest and disease monitoring) (<xref ref-type="bibr" rid="B41">Lu et&#xa0;al., 2023b</xref>; <xref ref-type="bibr" rid="B91">Yang et&#xa0;al., 2025</xref>), and near-infrared hyperspectral technology, in particular, finds extensive application throughout the entire tobacco industry chain for quality control&#x2014;enabling rapid screening of basic leaf indices during procurement to replace manual labor and improve efficiency, facilitating real-time monitoring of key chemical components in cut tobacco and lamina during processing to ensure consistency, and supporting field-based monitoring of tobacco growth to inform breeding and cultivation optimization (<xref ref-type="bibr" rid="B45">Marcelo et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B9">Chen et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B96">Zhang et&#xa0;al., 2023</xref>). However, this technology also faces several well-recognized complexities and challenges, including but not limited to the substantial storage demands due to high-dimensional data, high costs associated with data processing and computation, as well as issues related to model complexity and interpretability (<xref ref-type="bibr" rid="B43">Luo et&#xa0;al., 2024</xref>; <xref ref-type="bibr" rid="B83">Wang et&#xa0;al., 2024</xref>; <xref ref-type="bibr" rid="B56">Panda et&#xa0;al., 2025</xref>). <xref ref-type="bibr" rid="B12">Dante and Sahu, 2018</xref> applied linear discriminant analysis to visible&#x2013;near-infrared hyperspectroscopy (400&#x2013;1000 nm) to model roasted and white-ribbed tobacco grades, representing an initial application of hyperspectral techniques in tobacco grade classification. <xref ref-type="bibr" rid="B85">Wei et&#xa0;al., 2024</xref> employed a one-dimensional convolutional neural network (1D-CNN) model in combination with the least angle regression (LAR) algorithm to classify tobacco leaves into 10 grades. The essence of tobacco leaf grades is the external expression of the suitability and balance of their chemical compositions. Because the visible (400&#x2013;750 nm) and near-infrared (750&#x2013;1000 nm) two spectral ranges within the visible&#x2013;near-infrared band exhibit weak correlation, spectral analysis may fail to fully capture the complex chemical features of tobacco leaves. This imposes certain limitations on the application of the visible&#x2013;near-infrared band in tobacco leaf grade classification. The near-infrared band is more stable than the visible&#x2013;near-infrared band and provides more comprehensive information on the chemical composition of tobacco. Therefore, the use of near-infrared hyperspectroscopy to classify tobacco grades and explore the relationship among tobacco grades hyperspectral data, and chemical composition is highly valuable for classifying tobacco grades.</p>
<p>Compared to basic preprocessing methods (e.g., standardization or centering) that mainly adjust data scale, the preprocessing methods adopted in this study include Multiplicative Scatter Correction (MSC), Standard Normal Variate (SNV) transformation, and Savitzky&#x2013;Golay (SG) convolutional smoothing. These techniques effectively mitigate spectral interference, preserve intrinsic nonlinear relationships in hyperspectral data, and generate preprocessed spectra with improved compatibility for subsequent classification modeling (<xref ref-type="bibr" rid="B64">Roger et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B92">Yang et&#xa0;al., 2024</xref>). Unlike deep learning models such as CNNs or Transformers, which usually demand large annotated datasets and significant computational resources, classifiers including Random Forest (RF), Backpropagation Neural Network (BPNN), Extreme Learning Machine (ELM), and Partial Least Squares-Discriminant Analysis (PLS-DA) are more practical for limited-sample scenarios and cost-sensitive industrial applications (<xref ref-type="bibr" rid="B98">Zhang et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B41">Lu et&#xa0;al., 2023b</xref>). Thus, this study employs these four models (covering both linear and nonlinear approaches) to comparatively evaluate their performance and applicability in first-roasted tobacco leaf grading. Compared to Principal Component Analysis (PCA), Genetic Algorithm (GA), and stepwise regression, Competitive Adaptive Reweighted Sampling (CARS) and Successive Projections Algorithm (SPA) offer feature selection strategies better suited to first-roasted tobacco leaves&#x2019; spectral characteristics&#x2014;key information resides in narrow reflectance bands. Thus, CARS and SPA are adopted to reduce data redundancy and enhance classification model robustness (<xref ref-type="bibr" rid="B102">Zhu et&#xa0;al., 2017</xref>; <xref ref-type="bibr" rid="B25">Huang et&#xa0;al., 2021</xref>).</p>
<p>Herein, multivariate statistical analysis was used to explore correlations among tobacco grades, spectral data, and chemical compositions. Then, using near-infrared hyperspectral data of first-roasted tobacco leaves, three preprocessing methods&#x2014;MSC, SG, SNV&#x2014;and four classification models (RF, ELM, BPNN, PLS-DA) were applied for tobacco grade classification to identify the optimal industrial model. Finally, characteristic wavelength selection algorithms examined model performance across different bands, balancing simplicity and predictive accuracy. Overall, this study provides a feasible strategy for lightweight deployment and industrial application of hyperspectral technology in tobacco leaf grading and is expected to enhance the economic benefits of the tobacco industry and lay a solid theoretical foundation for its practical industrial application.</p>
</sec>
<sec id="s2" sec-type="materials|methods">
<label>2</label>
<title>Materials and methods</title>
<sec id="s2_1">
<label>2.1</label>
<title>Reagents and materials</title>
<p>Nicotine standards (CAS:54-11-5, purity &#x2265; 98%) were purchased from Aladdin Biochemical Technology Co., Ltd. (Shanghai, China). D-glucose (CAS:50-99-7, purity &#x2265; 99.5%), sodium chloride (NaCl, CAS:7647-14-5, purity &#x2265; 99.5%), and acetic acid (HAc, analytical grade) were obtained from Sinopharm Chemical Reagent Co., Ltd. (Shanghai, China). The potassium single-element standard solution (GBW(E)080125) was purchased from the National Institute of Metrology, China (NIM).</p>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Tobacco sample preparation</title>
<p>First-roasted tobacco leaves are primarily processed products obtained by curing harvested mature fresh leaves with artificial heat. In this study, such samples were collected from eight counties of Kunming City, Yunnan Province, during 2023 and 2024 [All samples were provided under the Research on the Science and Technology Plan Project of Kunming Branch of Yunnan Provincial Tobacco Company (KMYC202303)]. All samples were flue-cured tobacco of the species <italic>Nicotiana tabacum</italic> L. (genus <italic>Nicotiana</italic> L., family Solanaceae). In accordance with the Chinese National Standard GB 2635&#x2013;1992, the tobacco leaf grades were classified based on leaf position, grade tier, and color (<xref ref-type="bibr" rid="B71">Standardization Administration of China, 1992</xref>). The lugs, cutters, and leaf terms correspond to the lower (X), middle (C), and upper (B) positions of leaves on the stalk, respectively. Each part is further divided into three or four grades (denoted as 1, 2, 3, and 4). The color categories include eight orange (F) and one lemon (L). Groups are then formed by combining the leaf position, grade, and color. For example, C1F indicates orange cutters grade one. All samples were graded by professional graders from China Tobacco Corporation, Kunming City, based on the above criteria. The dataset included nine major acquisition grades, comprising a total of 1,784 samples (see Appendix 1 for details).</p>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>Chemical constituent measurement</title>
<p>The K<sub>2</sub>O content was determined according to the Chinese Tobacco Industry Standard YC/T 173-2003: Tobacco and Tobacco Products&#x2013;Determination of Potassium &#x2013;Flame Photometry (<xref ref-type="bibr" rid="B72">State Tobacco Monopoly Administration, 2003</xref>). The chloride ion (Cl<sup>&#x2212;</sup>) content was determined according to YC/T 162-2011: Tobacco and Tobacco Products&#x2013;Determination of Chloride&#x2013;Continuous Flow Method (<xref ref-type="bibr" rid="B73">State Tobacco Monopoly Administration, 2011</xref>). The total sugar (TS) and reducing sugar (RS) contents were determined according to YC/T 159-2019: Tobacco and Tobacco Products&#x2013;Determination of Water-Soluble Sugars&#x2013;Continuous Flow Method (<xref ref-type="bibr" rid="B74">State Tobacco Monopoly Administration, 2019</xref>). The&#xa0;nicotine (Nic) content was determined according to YC/T 468-2021: Tobacco and Tobacco Products&#x2013;Determination of Total&#xa0;Alkaloids&#x2013;Continuous Flow (Potassium Thiocyanate) Method (<xref ref-type="bibr" rid="B75">State Tobacco Monopoly Administration, 2021</xref>). After determining the contents of Nic, TS, K, and Cl, the sugar-nicotine ratio (S/N = TS/Nic) and the potassium&#x2013;chloride ratio (K/Cl = K<sub>2</sub>O/Cl<sup>&#x2212;</sup>) were calculated.</p>
</sec>
<sec id="s2_4">
<label>2.4</label>
<title>Multivariate statistical analysis</title>
<p>Principal component analysis (PCA) is commonly used to reduce the dimensionality of data, and the Mantel test is used to assess the correlation between matrices, with its significance evaluated via random permutations (<xref ref-type="bibr" rid="B53">Nekola and White, 1999</xref>; <xref ref-type="bibr" rid="B21">Hu&#xa0;et&#xa0;al., 2024</xref>). PCA transforms the original high-dimensional variables into a set of linearly uncorrelated low-dimensional variables, called principal components (PCs), using orthogonal transformation to extract the main source of variance. The data&#xa0;dimensionality is reduced and used as a basis for clustering algorithms (<xref ref-type="bibr" rid="B57">Patra et&#xa0;al., 2022</xref>). The Mantel test is a nonparametric statistical method that determines significance by calculating the correlation coefficients between two matrices, randomly permuting the ranks of one matrix to generate a random distribution, and comparing the observed correlation with this distribution. The Mantel test is used to assess the correlation between two distance matrices, offering the capability to handle inter-matrix relationships that cannot be evaluated using traditional correlation coefficients (<xref ref-type="bibr" rid="B10">Crabot et&#xa0;al., 2019</xref>). In this study, dimensionality reduction and clustering analyses of chemical constituents and hyperspectral data were performed using PCA, and the correlations among various chemical components, as well as the relationships between grade, spectral data, and chemical composition, were analyzed using the Mantel test.</p>
</sec>
<sec id="s2_5">
<label>2.5</label>
<title>Hyperspectral acquisition</title>
<sec id="s2_5_1">
<label>2.5.1</label>
<title>Hyperspectral imaging</title>
<p>The hyperspectral data used in the experiment were collected using the GTM-900Pro tobacco comprehensive test bench (Chuangheyi Electronic Technology Development Co., Ltd. Shanghai, China). The GTM-900Pro consists of three modules: a sample visual inspection module, a near-infrared spectrometer (950&#x2013;1650 nm, 351 bands), and the integrated industrial electronic control module (<xref ref-type="fig" rid="f1"><bold>Figure&#xa0;1</bold></xref>).</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Schematic of the structure of the hyperspectral imaging system.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1756218-g001.tif">
<alt-text content-type="machine-generated">Diagram of a laboratory setup with numbered components: one is a camera, two is a near infrared spectrograph, three is a scanning platform, four are light source, and five is a computer.</alt-text>
</graphic></fig>
<p>1. Camera 2. Near infrared spectrograph 3. Scanning platform 4. Light source 5. Computer.</p>
</sec>
<sec id="s2_5_2">
<label>2.5.2</label>
<title>Acquisition of hyperspectral images</title>
<p>During data acquisition, the ambient lighting was kept constant to avoid interference from external light sources. To minimize the adverse effects of temperature and humidity fluctuations on spectral data acquisition, the room temperature was strictly maintained at 25 &#xb1; 1 &#xb0;C throughout the entire data acquisition process. Tobacco samples were placed horizontally on the inspection stage, and black and white reference plate corrections were applied to the raw hyperspectral images to minimize noise caused by factors such as illumination and camera current variations. A standard reflective whiteboard was placed vertically in front of the imaging lens, and a single frame of whiteboard data corresponding to the current slit was captured for calibration. By attaching a lens cover to the lens, the black and white reference correction can be performed by acquiring the corresponding dark frame data. The calculations are detailed in <xref ref-type="disp-formula" rid="eq1">Equation 1</xref>:</p>
<disp-formula id="eq1"><label>(1)</label>
<mml:math display="block" id="M1"><mml:mrow><mml:mtable equalrows="true" equalcolumns="true"><mml:mtr><mml:mtd><mml:mrow><mml:mi>R</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:msub><mml:mi>I</mml:mi><mml:mrow><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>w</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>I</mml:mi><mml:mrow><mml:mi>d</mml:mi><mml:mi>a</mml:mi><mml:mi>r</mml:mi><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:msub><mml:mi>I</mml:mi><mml:mrow><mml:mi>w</mml:mi><mml:mi>h</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>I</mml:mi><mml:mrow><mml:mi>d</mml:mi><mml:mi>a</mml:mi><mml:mi>r</mml:mi><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im1"><mml:mi>R</mml:mi></mml:math></inline-formula> indicates the corrected sample image, <inline-formula>
<mml:math display="inline" id="im2"><mml:mrow><mml:msub><mml:mi>I</mml:mi><mml:mrow><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>w</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> denotes the original image of the sample, <inline-formula>
<mml:math display="inline" id="im3"><mml:mrow><mml:msub><mml:mi>I</mml:mi><mml:mrow><mml:mi>d</mml:mi><mml:mi>a</mml:mi><mml:mi>r</mml:mi><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> signifies a blackboard correction image, and <inline-formula>
<mml:math display="inline" id="im4"><mml:mrow><mml:msub><mml:mi>I</mml:mi><mml:mrow><mml:mi>w</mml:mi><mml:mi>h</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> represents a whiteboard correction image.</p>
</sec>
<sec id="s2_5_3">
<label>2.5.3</label>
<title>Spectral data extraction</title>
<p>During spectral data acquisition, the instrument measures the transmittance at 81 points of each sample, through 81 holes arranged in a 9&#xd7;9 matrix and calculates the average of these measurements to represent the spectral data of the tobacco piece. In traditional machine learning, a 70:30 or 80:20 split between training and test sets is commonly used. A 70:30 division has the advantage of not requiring an independent validation set, providing more stable evaluation results that can reduce the risk of overfitting. Therefore, in this experiment, 70% of the samples were randomly selected from each level as the training set, and 30% as the test set (<xref ref-type="bibr" rid="B81">Toleva, 2021</xref>). The sample set in this study comprised flue-cured tobacco leaves collected from eight counties/districts in Kunming over two consecutive years (2023 and 2024), thus capturing geographical and interannual variability to a certain extent. In addition, an independent test set with 535 samples was established, which was completely independent of the training data and had a consistent grade distribution with the latter. Therefore, no additional external validation is required to confirm the credibility of the developed model (<xref ref-type="bibr" rid="B16">Godin et&#xa0;al., 2015</xref>).</p>
</sec>
</sec>
<sec id="s2_6">
<label>2.6</label>
<title>Spectral data processing</title>
<p>Various environmental factors and individual differences among the test samples can interfere with the spectral data during collection. These interferences may reduce the accuracy of the collected data and introduce irrelevant information or noise into the samples, such as external light, electrical interference, and artificial transmission noise. These factors can affect model development and reduce the final predictive accuracy; therefore, the spectral data must be preprocessed before constructing the model. Commonly used spectral preprocessing methods include SG smoothing (for random noise removal), SNV transformation (to correct scattering-induced spectral distortions), and MSC (which eliminates spectral differences due to scattering intensity variations and enhances spectrum&#x2013;reference data correlations) (<xref ref-type="bibr" rid="B23">Hu et&#xa0;al., 2025</xref>; <xref ref-type="bibr" rid="B19">Hao et&#xa0;al., 2025</xref>; <xref ref-type="bibr" rid="B44">Luo et&#xa0;al., 2026</xref>). Additionally, mean centering removes absolute absorption by subtracting the average spectrum, while orthogonal signal correction filters out spectral information orthogonal to the concentration matrix prior to multivariate calibration (<xref ref-type="bibr" rid="B60">Qu et&#xa0;al., 2005</xref>; <xref ref-type="bibr" rid="B90">Yan, 2025</xref>). This process simplifies the model and improves its predictive ability (<xref ref-type="bibr" rid="B61">Rinnan et&#xa0;al., 2009</xref>). In this study, the spectral data were preprocessed using three methods: MSC, SNV, and SG.</p>
</sec>
<sec id="s2_7">
<label>2.7</label>
<title>Feature selection algorithms</title>
<p>The large sample size and numerous spectral bands in hyperspectral data often lead to data redundancy and high dimensionality, which can result in a decline in both the accuracy and efficiency of spectral model classification. To address this issue, important feature wavelengths can be extracted using a feature band selection algorithm to reduce data dimensionality, improve model accuracy, and reduce the risk of overfitting (<xref ref-type="bibr" rid="B48">Medjahed and Ouali, 2018</xref>). The SPA and CARS algorithms were chosen as feature selection methods for the experiments. The SPA is a forward variable selection method used to extract characteristic wavelengths from spectral data while minimizing multicollinearity among variables (<xref ref-type="bibr" rid="B68">Soares et&#xa0;al., 2013</xref>). The algorithm iteratively selects new bands with the lowest linear correlation to the previously chosen band. In each iteration, the remaining bands are projected onto the orthogonal complementary space of the selected band. The band with the maximum projection vector length is selected, and the final set of characteristic wavelengths is determined according to the model performance. CARS is a feature variable selection algorithm inspired by Charles Darwin&#x2019;s &#x201c;survival of the fittest&#x201d; theory (<xref ref-type="bibr" rid="B82">Wang et&#xa0;al., 2018</xref>). The algorithm combines Monte Carlo sampling with PLS regression coefficients to identify feature bands that contribute significantly to the prediction model by simulating a process of competition, elimination, and iterative optimization (<xref ref-type="bibr" rid="B29">Li et&#xa0;al., 2009</xref>).</p>
</sec>
<sec id="s2_8">
<label>2.8</label>
<title>Classification model</title>
<p>Near-infrared hyperspectral data have high dimensionality and subtle differences in the spectral features among sample categories. Traditional statistical methods struggle to effectively differentiate overlapping spectral features, while classification models (e.g., CNNs, support vector machines, and orthogonal PLS-DA) can enhance classification performance by automatically extracting deeper features from high-dimensional data (<xref ref-type="bibr" rid="B77">Sun et&#xa0;al., 2021</xref>). In this study, four algorithms (namely, RF, BPNN, ELM, and PLS-DA) were used to build the classification model. RF is a meta-estimator based on ensemble learning, which implements classification decisions by aggregating the predictions of multiple classification and regression trees through a voting mechanism (<xref ref-type="bibr" rid="B5">Breiman, 2001</xref>). During the construction of decision trees, the observations (rows) and variables (columns) are chosen randomly, and the trees are grown without pruning. In categorization, each sample is passed through all decision trees, and the predicted category labels from each tree are counted. The category with the most votes is selected as the categorization result (<xref ref-type="bibr" rid="B11">Cutler et&#xa0;al., 2007</xref>). The BPNN model is a supervised learning model based on the error backpropagation algorithm, which can capture complex patterns in data through multilayer nonlinear transformations. The network typically consists of an input layer, a hidden layer, and an output layer. Learning is achieved by feeding the neural network output back to the hidden layers and adjusting the weights and thresholds to minimize the total error (<xref ref-type="bibr" rid="B15">Fu and Tian, 2020</xref>). The ELM model is an efficient single hidden layer feedforward neural network consisting of input, hidden, and output layers. The algorithm reduces training complexity by randomly initializing the hidden layer parameters and analytically solving for the output weights. Specifically, the optimal solution is obtained by randomly generating the input weights (w) and hidden layer biases (b), setting the activation function g(x) and the number of neurons in the hidden layer (L), and calculating the output weights (&#x3b2;) between the hidden and output layers (<xref ref-type="bibr" rid="B94">Yang et&#xa0;al., 2018</xref>). PLS-DA combines PLS and discriminant analysis, providing an effective approach for high-dimensional and multicollinear datasets. The core idea of PLS-DA is to maximize the covariance between independent variables and class labels by projecting the original variables onto a low-dimensional latent space, extracting the most discriminative features for classification. Compared with traditional classification models, such as logistic regression and support vector machines, PLS-DA effectively mitigates the &#x201c;curse of dimensionality&#x201d; through dimensionality reduction while retaining key discriminative information in the data. High-dimensional data are represented in a lower-dimensional space using a set of latent variables. The latent variables provide an optimal representation of the predictive data <inline-formula>
<mml:math display="inline" id="im5"><mml:mi>X</mml:mi></mml:math></inline-formula>, maximizing their predictive power for the response data <inline-formula>
<mml:math display="inline" id="im6"><mml:mi>Y</mml:mi></mml:math></inline-formula> (<xref ref-type="bibr" rid="B3">Barker and Rayens, 2003</xref>). PLS-DA can provide a model by identifying the correlation between the scores of <inline-formula>
<mml:math display="inline" id="im7"><mml:mi>X</mml:mi></mml:math></inline-formula> and <inline-formula>
<mml:math display="inline" id="im8"><mml:mi>Y</mml:mi></mml:math></inline-formula> (<xref ref-type="bibr" rid="B8">Chandra and Kundu, 2024</xref>). The <inline-formula>
<mml:math display="inline" id="im9"><mml:mi>X</mml:mi></mml:math></inline-formula> and <inline-formula>
<mml:math display="inline" id="im10"><mml:mi>Y</mml:mi></mml:math></inline-formula> data are processed separately in the PLS-DA external model, while internal relationships establish connections between the two datasets. The external relationships among the predictor variables, response variables, and their corresponding scores and loading matrices in the latent dimensions are formally defined in <xref ref-type="disp-formula" rid="eq2">Equations 2</xref>&#x2013;<xref ref-type="disp-formula" rid="eq5">5</xref> as&#xa0;follows:</p>
<disp-formula id="eq2"><label>(2)</label>
<mml:math display="block" id="M2"><mml:mrow><mml:mtable equalrows="true" equalcolumns="true"><mml:mtr><mml:mtd><mml:mrow><mml:mi>X</mml:mi><mml:mo>=</mml:mo><mml:msub><mml:mi>t</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:msubsup><mml:mi>p</mml:mi><mml:mn>1</mml:mn><mml:mi>T</mml:mi></mml:msubsup><mml:mo>+</mml:mo><mml:msub><mml:mi>t</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:msubsup><mml:mi>p</mml:mi><mml:mn>2</mml:mn><mml:mi>T</mml:mi></mml:msubsup><mml:mo>+</mml:mo><mml:mo>&#x22ef;</mml:mo><mml:mo>+</mml:mo><mml:msub><mml:mi>t</mml:mi><mml:mi>n</mml:mi></mml:msub><mml:msubsup><mml:mi>p</mml:mi><mml:mi>n</mml:mi><mml:mi>T</mml:mi></mml:msubsup><mml:mo>+</mml:mo><mml:mi>E</mml:mi><mml:mo>=</mml:mo><mml:mi>T</mml:mi><mml:msup><mml:mi>P</mml:mi><mml:mi>T</mml:mi></mml:msup><mml:mo>+</mml:mo><mml:mi>E</mml:mi></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<disp-formula id="eq3"><label>(3)</label>
<mml:math display="block" id="M3"><mml:mrow><mml:mtable equalrows="true" equalcolumns="true"><mml:mtr><mml:mtd><mml:mrow><mml:mi>Y</mml:mi><mml:mo>=</mml:mo><mml:msub><mml:mi>u</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:msubsup><mml:mi>q</mml:mi><mml:mn>1</mml:mn><mml:mi>T</mml:mi></mml:msubsup><mml:mo>+</mml:mo><mml:msub><mml:mi>u</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:msubsup><mml:mi>q</mml:mi><mml:mn>2</mml:mn><mml:mi>T</mml:mi></mml:msubsup><mml:mo>+</mml:mo><mml:mo>&#x22ef;</mml:mo><mml:mo>+</mml:mo><mml:msub><mml:mi>u</mml:mi><mml:mi>n</mml:mi></mml:msub><mml:msubsup><mml:mi>q</mml:mi><mml:mi>n</mml:mi><mml:mi>T</mml:mi></mml:msubsup><mml:mo>+</mml:mo><mml:mi>F</mml:mi><mml:mo>=</mml:mo><mml:mi>U</mml:mi><mml:msup><mml:mi>Q</mml:mi><mml:mi>T</mml:mi></mml:msup><mml:mo>+</mml:mo><mml:mi>F</mml:mi></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im11"><mml:mi>t</mml:mi></mml:math></inline-formula> and <inline-formula>
<mml:math display="inline" id="im12"><mml:mi>u</mml:mi></mml:math></inline-formula> represent the principal component (PC) score matrices of <inline-formula>
<mml:math display="inline" id="im13"><mml:mi>X</mml:mi></mml:math></inline-formula> and <inline-formula>
<mml:math display="inline" id="im14"><mml:mi>Y</mml:mi></mml:math></inline-formula>, respectively; <inline-formula>
<mml:math display="inline" id="im15"><mml:mi>p</mml:mi></mml:math></inline-formula> and <inline-formula>
<mml:math display="inline" id="im16"><mml:mi>q</mml:mi></mml:math></inline-formula> are load vectors that indicate the dominant directions by maximizing the covariance within the <inline-formula>
<mml:math display="inline" id="im17"><mml:mi>X</mml:mi></mml:math></inline-formula> and <inline-formula>
<mml:math display="inline" id="im18"><mml:mi>Y</mml:mi></mml:math></inline-formula> data. The errors <inline-formula>
<mml:math display="inline" id="im19"><mml:mi>E</mml:mi></mml:math></inline-formula> and <inline-formula>
<mml:math display="inline" id="im20"><mml:mi>F</mml:mi></mml:math></inline-formula>cancel out to zero if each dimension of <inline-formula>
<mml:math display="inline" id="im21"><mml:mi>X</mml:mi></mml:math></inline-formula> and <inline-formula>
<mml:math display="inline" id="im22"><mml:mi>Y</mml:mi></mml:math></inline-formula> is considered. The relationship between the PC scores <inline-formula>
<mml:math display="inline" id="im23"><mml:mi>T</mml:mi></mml:math></inline-formula> and <inline-formula>
<mml:math display="inline" id="im24"><mml:mi>U</mml:mi></mml:math></inline-formula> represents the internal model linking <inline-formula>
<mml:math display="inline" id="im25"><mml:mi>X</mml:mi></mml:math></inline-formula> and <inline-formula>
<mml:math display="inline" id="im26"><mml:mi>Y</mml:mi></mml:math></inline-formula>.</p>
<disp-formula id="eq4"><label>(4)</label>
<mml:math display="block" id="M4"><mml:mrow><mml:mtable equalrows="true" equalcolumns="true"><mml:mtr><mml:mtd><mml:mrow><mml:mi>U</mml:mi><mml:mo>=</mml:mo><mml:mi>T</mml:mi><mml:mi>B</mml:mi></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im27"><mml:mi>B</mml:mi></mml:math></inline-formula> is the regression matrix. <inline-formula>
<mml:math display="inline" id="im28"><mml:mi>Y</mml:mi></mml:math></inline-formula> can be written as:</p>
<disp-formula id="eq5"><label>(5)</label>
<mml:math display="block" id="M5"><mml:mrow><mml:mtable equalrows="true" equalcolumns="true"><mml:mtr><mml:mtd><mml:mrow><mml:mi>Y</mml:mi><mml:mo>=</mml:mo><mml:mi>T</mml:mi><mml:mi>B</mml:mi><mml:msup><mml:mi>Q</mml:mi><mml:mi>T</mml:mi></mml:msup><mml:mo>+</mml:mo><mml:mi>F</mml:mi></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>The relationship equations are derived iteratively, using newly calculated residuals to update the scores and loadings, until the residuals become negligible or the number of PLS-DA latent variables exceeds the number of <inline-formula>
<mml:math display="inline" id="im29"><mml:mi>X</mml:mi></mml:math></inline-formula> variables. The percentage of variance explained and cross-validation residuals are used to determine the number of PLS-DA latent variables.</p>
</sec>
<sec id="s2_9">
<label>2.9</label>
<title>Evaluation of model performance</title>
<sec id="s2_9_1">
<label>2.9.1</label>
<title>Accuracy</title>
<p>The test set accuracy reflects the ability of a model to generalize to unseen data and serves as a key performance indicator (<xref ref-type="bibr" rid="B35">Liu and He, 2025</xref>). The test set accuracy is the probability of correctly predicting an outcome across all test set samples, which is calculated as shown in <xref ref-type="disp-formula" rid="eq6">Equation 6</xref>:</p>
<disp-formula id="eq6"><label>(6)</label>
<mml:math display="block" id="M6"><mml:mrow><mml:mtable equalrows="true" equalcolumns="true"><mml:mtr><mml:mtd><mml:mrow><mml:mi>A</mml:mi><mml:mi>c</mml:mi><mml:mi>c</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>+</mml:mo><mml:mi>T</mml:mi><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>+</mml:mo><mml:mi>T</mml:mi><mml:mi>N</mml:mi><mml:mo>+</mml:mo><mml:mi>F</mml:mi><mml:mi>P</mml:mi><mml:mo>+</mml:mo><mml:mi>F</mml:mi><mml:mi>N</mml:mi></mml:mrow></mml:mfrac><mml:mo>&#xd7;</mml:mo><mml:mn>100</mml:mn><mml:mo>%</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im30"><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi></mml:mrow></mml:math></inline-formula> denotes the number of correctly categorized positive samples; <inline-formula>
<mml:math display="inline" id="im31"><mml:mrow><mml:mi>T</mml:mi><mml:mi>N</mml:mi></mml:mrow></mml:math></inline-formula> denotes the number of correctly categorized negative samples; <inline-formula>
<mml:math display="inline" id="im32"><mml:mrow><mml:mi>F</mml:mi><mml:mi>P</mml:mi></mml:mrow></mml:math></inline-formula> denotes the number of negative samples incorrectly categorized as positive samples; and <inline-formula>
<mml:math display="inline" id="im33"><mml:mrow><mml:mi>F</mml:mi><mml:mi>N</mml:mi></mml:mrow></mml:math></inline-formula> denotes the number of positive samples incorrectly categorized as negative samples.</p>
</sec>
<sec id="s2_9_2">
<label>2.9.2</label>
<title>Confusion matrix</title>
<p>A confusion matrix, also known as a likelihood or error matrix, is an important tool for comparing the classification results with actual values. It effectively represents the accuracy of the classification results and is widely used to evaluate classifier performance (<xref ref-type="bibr" rid="B80">Theissler et&#xa0;al., 2022</xref>).</p>
</sec>
</sec>
<sec id="s2_10">
<label>2.10</label>
<title>Software</title>
<p>The multivariate statistical analyses used in this study, including PCA and the Mantel test (Pearson), were performed using the Metware Cloud platform (<ext-link ext-link-type="uri" xlink:href="https://cloud.metware.cn/">https://cloud.metware.cn/</ext-link>). The MSC, SNV transformation, and SG; RF, BPNN, ELM, PLS-DA classification models; and SPA and CARS feature band algorithms were implemented using MATLAB 2023b. All experiments were conducted on a computer running Windows 11, equipped with an AMD Ryzen 9 7845HX CPU, 16 GB DDR5 RAM (2&#xd7;8 GB), and an NVIDIA RTX 4070 Laptop GPU.</p>
</sec>
</sec>
<sec id="s3" sec-type="results">
<label>3</label>
<title>Results and discussion</title>
<sec id="s3_1">
<label>3.1</label>
<title>Morphological characteristics of tobacco leaf samples</title>
<p>A total of nine commonly used tobacco leaf grades (B1F, B2F, B3F, C1F, C2F, C3F, C3L, C4F, and X2F) were collected, and their images were acquired using a near-infrared hyperspectral imaging system for subsequent analysis (<xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2</bold></xref>). The upper leaves exhibit thicker and more prominent veins, a broader shape with a sharper tip, and medium to high thickness. The middle leaves have moderately developed veins, a slightly curved tip, a broader shape with a blunter tip, and medium to slightly thin thickness. The lower leaves display finer veins, a broader, more rounded shape, and slightly thin to thin thickness. Grade 1 tobacco is characterized by high maturity, a loose leaf structure, high oil content, and deep coloration. Grade 2 tobacco has good maturity, a firm leaf structure, moderate oil content, and strong coloration. Grade 3 tobacco exhibits average maturity, a slightly dense leaf structure, low oil content, and moderate coloration. The morphological characteristics of the tobacco grades can be described as follows: B1F: high maturity, firm leaf structure, slightly high thickness, high oil content, and deep coloration; B2F: high maturity, firm leaf structure, slightly high thickness, moderate oil content, and strong coloration; B3F: high maturity, slightly dense leaf structure, slightly high thickness, moderate oil content, and moderate coloration; C1F: high maturity, loose leaf structure, medium thickness, high oil content, and deep coloration; C2F: high maturity, loose leaf structure, medium thickness, moderate oil content, and strong coloration; C3F: high maturity, loose leaf structure, medium thickness, moderate oil content, and moderate coloration; C3L: high maturity, loose leaf structure, slightly thin thickness, moderate oil content, and moderate coloration; C4F: high maturity, loose leaf structure, slightly thin thickness, low oil content, and moderate coloration; and X2F: high maturity, loose leaf structure, slightly thin thickness, low oily content, and moderate coloration. There are significant differences in the vein structure, leaf shape, and thickness of different parts of the tobacco leaves, which can be used to differentiate the various leaf positions (<xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2</bold></xref>). However, when comparing the same part of different grades of tobacco, such as B1F vs. B2F or C1F vs. C2F, the leaves exhibit similar maturity, structure, and thickness, only differing in oil content and coloration. Leaf structure and thickness differ only when there is a large difference in grades, such as B1F vs. B3F or C1F vs. C4F.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Photograph showing various tobacco samples.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1756218-g002.tif">
<alt-text content-type="machine-generated">Nine rows of dried leaves, each labeled at left with combinations such as B1F, B2F, through X2F, showing three similar yellow-brown leaves per row against a black background for comparison.</alt-text>
</graphic></fig>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Tobacco chemical composition analysis</title>
<sec id="s3_2_1">
<label>3.2.1</label>
<title>Chemical composition of different grades of tobacco leaves</title>
<p>The contents of nicotine (Nic), Cl<sup>&#x2212;</sup>, K<sub>2</sub>O, reducing sugar (RS), and total sugar (TS) in different tobacco grades were determined, and the corresponding sugar-nicotine ratio (S/N) and potassium-chloride ratio (K/Cl) were calculated (<xref ref-type="table" rid="T1"><bold>Table&#xa0;1</bold></xref>). As the primary contributor to the strength and satisfaction of tobacco products, the Nic content directly determines the core quality attributes of the product. Consequently, variations in Nic content across leaf positions and grades are essential considerations in the formulation of tobacco products (<xref ref-type="bibr" rid="B13">Darkis and Hackne, 1952</xref>). The Nic content of the upper leaves was significantly higher than that of the middle and lower leaves; however, the differences in the Nic content of B1F, B2F, and B3F were minimal. These differences can be precisely used in tobacco product formulations to meet different quality requirements. For high-end flue-cured cigarettes with a rich and strong impact, the formula prioritizes the use of upper leaves, leveraging their high Nic content to enhance smoking satisfaction and form the core flavor profile. B3F leaves are specifically incorporated into products such as low-tar cigarettes, providing a lower tar content while maintaining smoking satisfaction through their high nicotine content (<xref ref-type="bibr" rid="B47">McKinney et&#xa0;al., 2014</xref>). RS and TS are positively correlated with the smoothness of smoke; in particular, fructose and glucose can balance irritation by regulating the production of acidic substances. This property makes the differences in sugar content among leaf components and grades the core means for regulating smoke palatability in tobacco product formulation (<xref ref-type="bibr" rid="B76">Stepanov et&#xa0;al., 2025</xref>). The RS and TS contents of the lower leaves were significantly higher than those of the middle and upper leaves, with C3L and X2F exhibiting significantly higher RS and TS levels than the other grades. These differences are precisely applied in tobacco product formulations to achieve specific smoke quality objectives. For mid-to-low-end flue-cured cigarettes characterized by mildness and low irritation, the formula incorporates a larger proportion of lower leaves with high sugar content. It leverages the high RS and TS contents of lower leaves to enhance smoke smoothness, reinforcing the effect of balancing acidic substances and reducing irritation. For mid-range products aiming for moderate sweetness and smoothness without blandness, middle leaves can serve as the main component, blended with an appropriate amount of X2F-grade tobacco leaves. This not only ensures the sweet flavor of the smoke but also balances the taste layers and reduces irritation (<xref ref-type="bibr" rid="B78">Talhout et&#xa0;al., 2006</xref>). The S/N ratio directly affects the sensory acceptability and smoking behavior of tobacco by modulating the synergistic effects of Nic stimulation and sugar sweetness. This makes the variations in S/N ratios across leaf positions and grades a major consideration in the design of tobacco products (<xref ref-type="bibr" rid="B63">Roemer et&#xa0;al., 2012</xref>). The lower leaves had significantly higher S/N ratios than the middle or upper leaves, with C3L and X2F exhibiting significantly higher S/N ratios than the other grades. These differences are systematically used in the formulation of tobacco products. For flue-cured cigarettes with a mild, sweet, and low-irritation taste, the formula prioritizes tobacco leaves with a high S/N ratio, leveraging their strong synergistic effect to neutralize the irritation of smoke and enhance sensory acceptability (<xref ref-type="bibr" rid="B54">Nikolova et&#xa0;al., 2021</xref>). K<sub>2</sub>O and Cl<sup>&#x2212;</sup> are key regulators of tobacco combustibility: K<sup>+</sup> enhances combustibility by lowering the ignition temperature and promoting cellulose decomposition, while Cl<sup>&#x2212;</sup> inhibits combustion speed. Thus, the K/Cl ratio serves as a critical determinant of tobacco combustibility, with ratios exceeding 5 enhancing combustion and values below 1 potentially inhibiting ignition. The variations in K<sub>2</sub>O, Cl<sup>&#x2212;</sup>, and K/Cl ratio among leaf components and grades serve as the core basis for the precise regulation of tobacco combustibility in product formulation (<xref ref-type="bibr" rid="B52">Myhre et&#xa0;al., 1956</xref>). X2F had the highest K<sub>2</sub>O content, while C1F had the highest Cl<sup>&#x2212;</sup> content. The K/Cl ratios of B1F, C2F, C3F, C3L, and C4F were comparable and significantly higher than those of the other grades. However, there was no significant difference in K/Cl ratios among the three leaf positions. These differences are strategically applied in tobacco product formulations to meet various combustion requirements. For high-end flue-cured cigarettes with easy ignition, even combustion, and white ash, the formula prioritizes the use of X2F, B1F, and C3L tobacco leaves while strictly controlling the inclusion of C1F-grade tobacco leaves. This ensures that the overall K/Cl ratio of the leaf blend remains above 5, thereby providing a smoking experience with smooth smoke release and minimal risk of extinguishment (<xref ref-type="bibr" rid="B100">Zhong, 2019</xref>). These differences in chemical composition not only distinguish the grades of first-roasted tobacco but also form the core basis for the design of tobacco product formulations in terms of product style, core quality, and consumer preferences.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Contents of nicotine (Nic), reducing sugar (RS), total sugar (TS), K<sub>2</sub>O, and Cl<sup>&#x2212;</sup>, as well as the sugar-nicotine ratio (S/N) and potassium-chloride ratio (K/Cl), for different tobacco leaf grades.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Grade</th>
<th valign="middle" align="center">Nic (%)</th>
<th valign="middle" align="center">RS (%)</th>
<th valign="middle" align="center">TS (%)</th>
<th valign="middle" align="center">K<sub>2</sub>O (%)</th>
<th valign="middle" align="center">Cl<sup>&#x2212;</sup> (%)</th>
<th valign="middle" align="center">S/N</th>
<th valign="middle" align="center">K/Cl</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">B1F</td>
<td valign="middle" align="center">2.66 &#xb1; 0.71b</td>
<td valign="middle" align="center">23.30 &#xb1; 4.65f</td>
<td valign="middle" align="center">25.50 &#xb1; 5.09d</td>
<td valign="middle" align="center">2.48 &#xb1; 0.58b</td>
<td valign="middle" align="center">0.14 &#xb1; 0.07e</td>
<td valign="middle" align="center">10.70 &#xb1; 4.64e</td>
<td valign="middle" align="center">21.10 &#xb1; 9.87a</td>
</tr>
<tr>
<td valign="middle" align="center">B2F</td>
<td valign="middle" align="center">2.77 &#xb1; 0.68a</td>
<td valign="middle" align="center">23.10 &#xb1; 4.66f</td>
<td valign="middle" align="center">24.70 &#xb1; 4.61d</td>
<td valign="middle" align="center">2.27 &#xb1; 0.47de</td>
<td valign="middle" align="center">0.17 &#xb1; 0.10de</td>
<td valign="middle" align="center">9.75 &#xb1; 4.00e</td>
<td valign="middle" align="center">18.10 &#xb1; 9.39b</td>
</tr>
<tr>
<td valign="middle" align="center">B3F</td>
<td valign="middle" align="center">2.67 &#xb1; 0.53ab</td>
<td valign="middle" align="center">22.90 &#xb1; 5.12f</td>
<td valign="middle" align="center">23.30 &#xb1; 4.98e</td>
<td valign="middle" align="center">2.13 &#xb1; 0.57f</td>
<td valign="middle" align="center">0.19 &#xb1; 0.10cd</td>
<td valign="middle" align="center">9.23 &#xb1; 3.21e</td>
<td valign="middle" align="center">14.80 &#xb1; 9.44c</td>
</tr>
<tr>
<td valign="middle" align="center">C1F</td>
<td valign="middle" align="center">2.07 &#xb1; 0.55c</td>
<td valign="middle" align="center">25.40 &#xb1; 2.84d</td>
<td valign="middle" align="center">27.50 &#xb1; 3.23bc</td>
<td valign="middle" align="center">2.23 &#xb1; 0.54ef</td>
<td valign="middle" align="center">0.30 &#xb1; 0.18a</td>
<td valign="middle" align="center">14.40 &#xb1; 4.95d</td>
<td valign="middle" align="center">11.90 &#xb1; 9.97d</td>
</tr>
<tr>
<td valign="middle" align="center">C2F</td>
<td valign="middle" align="center">1.91 &#xb1; 0.58d</td>
<td valign="middle" align="center">26.60 &#xb1; 4.60c</td>
<td valign="middle" align="center">28.50 &#xb1; 5.09b</td>
<td valign="middle" align="center">2.45 &#xb1; 0.65b</td>
<td valign="middle" align="center">0.23 &#xb1; 0.19b</td>
<td valign="middle" align="center">16.80 &#xb1; 7.45c</td>
<td valign="middle" align="center">19.80 &#xb1; 14.00ab</td>
</tr>
<tr>
<td valign="middle" align="center">C3F</td>
<td valign="middle" align="center">1.54 &#xb1; 0.43f</td>
<td valign="middle" align="center">25.00 &#xb1; 5.53e</td>
<td valign="middle" align="center">26.90 &#xb1; 6.56c</td>
<td valign="middle" align="center">2.29 &#xb1; 0.59cde</td>
<td valign="middle" align="center">0.19 &#xb1; 0.15cd</td>
<td valign="middle" align="center">19.10 &#xb1; 7.90b</td>
<td valign="middle" align="center">20.40 &#xb1; 16.70ab</td>
</tr>
<tr>
<td valign="middle" align="center">C3L</td>
<td valign="middle" align="center">1.36 &#xb1; 0.47g</td>
<td valign="middle" align="center">30.10 &#xb1; 7.23a</td>
<td valign="middle" align="center">33.00 &#xb1; 7.46a</td>
<td valign="middle" align="center">2.40 &#xb1; 0.65bc</td>
<td valign="middle" align="center">0.20 &#xb1; 0.16bc</td>
<td valign="middle" align="center">28.70 &#xb1; 17.81a</td>
<td valign="middle" align="center">21.50 &#xb1; 21.10a</td>
</tr>
<tr>
<td valign="middle" align="center">C4F</td>
<td valign="middle" align="center">1.67 &#xb1; 0.50e</td>
<td valign="middle" align="center">24.80 &#xb1; 5.91e</td>
<td valign="middle" align="center">28.00 &#xb1; 6.83bc</td>
<td valign="middle" align="center">2.36 &#xb1; 0.57bcd</td>
<td valign="middle" align="center">0.16 &#xb1; 0.10de</td>
<td valign="middle" align="center">19.10 &#xb1; 9.13b</td>
<td valign="middle" align="center">18.80 &#xb1; 9.83ab</td>
</tr>
<tr>
<td valign="middle" align="center">X2F</td>
<td valign="middle" align="center">1.25 &#xb1; 0.34h</td>
<td valign="middle" align="center">29.00 &#xb1; 4.62b</td>
<td valign="middle" align="center">32.60 &#xb1; 5.00a</td>
<td valign="middle" align="center">2.81 &#xb1; 0.62a</td>
<td valign="middle" align="center">0.21 &#xb1; 0.13bc</td>
<td valign="middle" align="center">28.70 &#xb1; 10.96a</td>
<td valign="middle" align="center">18.00 &#xb1; 9.39b</td>
</tr>
<tr>
<th valign="middle" align="center">Part</th>
<th valign="middle" colspan="7" align="center"/>
</tr>
<tr>
<td valign="middle" align="center">Upper leaves</td>
<td valign="middle" align="center">2.70 &#xb1; 0.65a</td>
<td valign="middle" align="center">23.10 &#xb1; 4.81c</td>
<td valign="middle" align="center">24.50 &#xb1; 4.98c</td>
<td valign="middle" align="center">2.29 &#xb1; 0.56bc</td>
<td valign="middle" align="center">0.17 &#xb1; 0.09b</td>
<td valign="middle" align="center">9.90 &#xb1; 4.04c</td>
<td valign="middle" align="center">18.00 &#xb1; 9.90a</td>
</tr>
<tr>
<td valign="middle" align="center">Middle leaves</td>
<td valign="middle" align="center">1.71 &#xb1; 0.57b</td>
<td valign="middle" align="center">28.00 &#xb1; 6.17b</td>
<td valign="middle" align="center">28.80 &#xb1; 6.40b</td>
<td valign="middle" align="center">2.35 &#xb1; 0.61bc</td>
<td valign="middle" align="center">0.22 &#xb1; 0.17a</td>
<td valign="middle" align="center">21.60 &#xb1; 11.50b</td>
<td valign="middle" align="center">18.50 &#xb1; 15.30a</td>
</tr>
<tr>
<td valign="middle" align="center">Lower leaves</td>
<td valign="middle" align="center">1.25 &#xb1; 0.34c</td>
<td valign="middle" align="center">29.00 &#xb1; 4.62a</td>
<td valign="middle" align="center">32.60 &#xb1; 5.00a</td>
<td valign="middle" align="center">2.81 &#xb1; 0.62a</td>
<td valign="middle" align="center">0.21 &#xb1; 0.13a</td>
<td valign="middle" align="center">26.70 &#xb1; 10.96a</td>
<td valign="middle" align="center">18.00 &#xb1; 9.39a</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Lowercase letters indicate significant differences in the chemical composition of the raw tobacco leaves (P &lt; 0.05).</p></fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3_2_2">
<label>3.2.2</label>
<title>Principal component analysis of chemical composition of tobacco leaves</title>
<p>PCA was performed on the chemical components of the collected tobacco leaves (<xref ref-type="fig" rid="f3"><bold>Figure&#xa0;3</bold></xref>), revealing that the first two principal components cumulatively accounted for 63.8% of the total variance (PC1, 43.81%; PC2, 19.99%). Despite PC1 and PC2 explaining 63.8% of the total variance, leaves of different grades could not be clearly distinguished, likely due to the low dimensionality of the chemical composition data and the fact that tobacco grades are primarily differentiated based on physical characteristics such as maturity, leaf structure, thickness, and oil content. Although chemical composition underlies the physical appearance of tobacco and can influence it to some extent, the relationship between the two is complex. Therefore, chemical composition alone cannot be directly used to differentiate tobacco grades.</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>PCA score plot of chemical composition across first-roasted tobacco leaf grades.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1756218-g003.tif">
<alt-text content-type="machine-generated">Principal component analysis scatter plot displaying samples by grade, with each grade represented by differently colored triangles shown in the legend. PC1 explains 43.81 percent of variance and PC2 explains 19.99 percent. Points overlap in the center, with color-coded ellipses highlighting group distributions.</alt-text>
</graphic></fig>
</sec>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Spectral analysis of tobacco leaves</title>
<sec id="s3_3_1">
<label>3.3.1</label>
<title>Tobacco leaf spectrum</title>
<p>The spectra of nine tobacco grades were scanned in the range of 950&#x2013;1650 nm, and their average values were calculated, with each spectrum consisting of 351 bands (<xref ref-type="fig" rid="f4"><bold>Figure&#xa0;4</bold></xref>). The reflectance spectra of the tobacco leaves are generated through the interaction between light and the leaf tissue. The spectra exhibit distinct peaks at approximately 1180 and 1470 nm. The second overtone or combination frequency vibrations of C&#x2013;H bonds in fructose, glucose, and portions of cellulose and pectin in flue-cured tobacco leaves give rise to a characteristic reflectance peak at 1180 nm (<xref ref-type="bibr" rid="B88">Wu et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B37">Liu et&#xa0;al., 2024</xref>). Additionally, surface shrinkage and texture homogenization in flue-cured tobacco leaves enhance the scattering effect at 1180 nm, further contributing to the characteristic reflectance peak. In addition, the low moisture content of flue-cured tobacco leaves reduces O&#x2013;H bond absorption in water, resulting in a relative increase in reflectance near 1470 nm (<xref ref-type="bibr" rid="B97">Zhang and He, 2011</xref>; <xref ref-type="bibr" rid="B36">Liu et&#xa0;al., 2023</xref>).</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Average reflectance spectra of different tobacco leaf grades.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1756218-g004.tif">
<alt-text content-type="machine-generated">Line graph showing reflectance versus wavelength in nanometers from 950 to 1650, with nine colored lines labeled B1F, B2F, B3F, C1F, C2F, C3F, C3L, C4F, and X2F. All lines exhibit a similar trend with a sharp increase in reflectance between 1300 and 1400 nanometers.</alt-text>
</graphic></fig>
<p>A comparison of the average reflectance spectra (<xref ref-type="fig" rid="f4"><bold>Figure&#xa0;4</bold></xref>) of nine grades of first-roasted tobacco revealed that the spectra of B1F and B2F were similar; those of B3F, C1F, and C2F were similar; and those of C3F, C3L, C4F, and X2F were similar. Aside from slight differences in color intensity and oil content, the appearance characteristics of B1F and B2F first-roasted tobacco leaves are largely consistent, resulting in minimal spectral variation and similar spectral profiles. The spectral curves of tobacco leaves of different grades show that the spectral reflectance of B1F, B2F, C1F, and C2F is lower than that of B3F, C3F, and C4F within the range of 1000&#x2013;1400 nm. This is because B1F, B2F, C1F, and C2F have higher maturity and a looser leaf structure. In addition, B3F contains lower levels of RS and TS than B1F and B2F, while C1F and C2F have significantly higher nicotine content than C3F and C4F. These differences result in variations in the spectral reflectance of hydrogen-containing functional groups such as C&#x2013;H and O&#x2013;H, ultimately causing differences in the reflectance spectra between B1F, B2F, C1F, C2F and B3F, C3F, C4F within the range of 1000&#x2013;1400 nm (<xref ref-type="bibr" rid="B41">Lu et&#xa0;al., 2023b</xref>).</p>
<p>From a molecular spectroscopy perspective, these observed spectral differences stem from the overtone and combination vibrations of hydrogen-containing functional groups, directly tied to leaf chemistry (<xref ref-type="bibr" rid="B95">Yang et&#xa0;al., 2015</xref>). Within this band, the spectral features of key chemical components in tobacco leaves stem from the vibrations of specific chemical bonds. For carbohydrates (e.g., RS and TS), characteristic reflectance originates from O&#x2013;H stretching-bending combination bands and C&#x2013;H vibrations. Higher sugar content generally results in weak reflectance at 1200&#x2013;1300 nm and 1400&#x2013;1500 nm, lowering overall spectral reflectance levels (<xref ref-type="bibr" rid="B69">Soares et&#xa0;al., 2019</xref>). For Nic, its molecular functional groups (pyridine ring, N&#x2013;CH<sub>3</sub>) exhibit significant absorption at 1100&#x2013;1200 nm and 1300&#x2013;1400 nm due to C&#x2013;H and N&#x2013;H vibrations (<xref ref-type="bibr" rid="B95">Yang et&#xa0;al., 2015</xref>). Consequently, the observed differences in average reflectance spectra result from the combined effects of the following factors: higher-grade leaves such as B1F and B2F generally contain higher sugar content and appropriate nicotine levels, leading to greater concentrations of O&#x2013;H and C&#x2013;H groups, stronger overall absorption in the 1000&#x2013;1400 nm region, and thus lower reflectance. In contrast, the relatively lower sugar content in B3F weakens its O&#x2013;H related absorption, while the higher nicotine in C1F and C2F enhances absorption from C&#x2013;H and N&#x2013;H groups. Variations in the concentrations of these specific groups directly determine differences in reflectance intensity at characteristic wavelengths. In summary, the patterns and variations in average reflectance spectra essentially reflect the combined influence of the concentrations of internal chemical groups (O&#x2013;H, C&#x2013;H, N&#x2013;H) and the physical structure of tobacco leaves across different grades.</p>
<p>The lower leaves of first-roasted tobacco had the highest reflectance, followed by the middle leaves, with the upper leaves showing the lowest reflectance. Variations in the structures of tobacco leaves lead to differences in optical behavior. The upper leaves are usually thicker, and the cells are more tightly arranged, which results in reduced light penetration, shorter scattering paths, and lower reflectance. The middle and lower leaves are thinner with a higher proportion of spongy tissues, resulting in increased light scattering and higher reflectance. Significant differences were observed in the chemical compositions of first-roasted tobacco leaves. The Nic content was highest in the upper leaves and lowest in the lower leaves, while the RS and TS contents were lowest in the upper leaves and highest in the middle and lower leaves. Due to the strong absorption characteristics of hydrogen-containing groups (e.g., C&#x2013;H, O&#x2013;H, and N&#x2013;H) in the 1200&#x2013;1400 nm range, the differences in chemical compositions among leaf positions further amplified variations in their spectral reflectance (<xref ref-type="bibr" rid="B69">Soares et&#xa0;al., 2019</xref>). Therefore, different parts and grades of tobacco&#xa0;exhibit distinct spectral responses and characteristic spectral features.</p>
</sec>
<sec id="s3_3_2">
<label>3.3.2</label>
<title>PCA of tobacco leaf sample spectra</title>
<p>PCA was performed on the collected hyperspectral data of tobacco leaves (<xref ref-type="fig" rid="f5"><bold>Figure&#xa0;5</bold></xref>), which showed that the first two principal components cumulatively accounted for 96.4% of the total variance (PC1, 86.3%; PC2, 10.1%). PC1 is the primary factor for distinguishing tobacco leaf grades, encompassing most of the variance and highlighting the pronounced near-infrared hyperspectral differences between leaves of different grades. Near-infrared hyperspectral data have high dimensionality and contain abundant physical and chemical information of flue-cured tobacco leaves, including macroscopic traits such as moisture content, cell structure, and thickness, as well as the chemical information inferred from these characteristics (<xref ref-type="bibr" rid="B45">Marcelo et&#xa0;al., 2019</xref>). Additionally, the grades of flue-cured tobacco leaves are classified based on their external physical characteristics, which explains the high contribution rates of the first two principal components. However, PCA could not effectively differentiate between tobacco grades, likely because fluctuations in the O&#x2013;H bond reflectance peak at 1470 nm, corresponding to water content, were much stronger than the grade-related chemical signals, such as the C&#x2013;H bond at 1180 nm (<xref ref-type="bibr" rid="B42">Lu et&#xa0;al., 2021b</xref>). Moisture content was consistent across all nine grades and did not differ significantly between grades. Therefore, moisture content is unlikely to be a major driver of variation in the PCA or a key factor distinguishing grades. This overshadowed the spectral differences between grades, including those arising from structural variations, preventing PCA from effectively distinguishing tobacco grades.</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>PCA score plot of hyperspectral data for different grades of first-roasted tobacco leaves.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1756218-g005.tif">
<alt-text content-type="machine-generated">Scatter plot showing PCA results with scores on the x-axis labeled PC1 (eighty-six point three percent) and y-axis labeled PC2 (ten point one percent). Ten differently colored triangle symbols represent grades B1F, B2F, B3F, C1F, C2F, C3F, C3L, C4F, and X2F, all clustered near the center. A semi-transparent ellipse highlights the main data concentration.</alt-text>
</graphic></fig>
<p>Although first-roasted tobacco grades showed differences in spectral data and chemical composition, PCA tended to drown out the spectral differences between different grades. The dimensionality of chemical composition data was relatively low, making PCA ineffective for differentiation.</p>
</sec>
</sec>
<sec id="s3_4">
<label>3.4</label>
<title>Mantel test analysis of tobacco leaf chemical composition, spectra, and grades</title>
<p>The color of each square in the heat map represents the positive or negative of the correlation coefficient between chemical components. Statistical significance is indicated by asterisks: *** P &#x2264; 0.001 (extremely significant); ** 0.001 &lt; P &#x2264; 0.01 (highly significant). The value represents the size of the correlation coefficient. The thickness of the line indicates the strength of the correlation, while the color of the line indicates the degree of salience.</p>
<p>Mantel test correlation analysis was performed to explore the relationships between near-infrared hyperspectral data, chemical compositions, and grades of first-roasted tobacco leaves, as well as inter-chemical correlations (<xref ref-type="fig" rid="f6"><bold>Figure&#xa0;6</bold></xref>). Core results showed that: (1) Nic exhibited a strong, highly significant negative correlation with RS, TS, and S/N ratio (P &#x2264; 0.001); (2) Cl<sup>-</sup> was strongly and highly significantly negatively correlated with K/Cl ratio (P &#x2264; 0.001); (3) RS and TS showed strong, highly significant positive correlations with each other and with S/N ratio (P &#x2264; 0.001); (4) Tobacco grade was highly significantly correlated with Nic, RS, TS, and S/N ratio (P &lt; 0.01) but not with K<sup>+</sup>, Cl<sup>-</sup>, or K/Cl ratio (P &gt; 0.05); (5) Hyperspectral data were highly significantly correlated with Nic (P &lt; 0.01), significantly correlated with RS (0.01 &lt; P &lt; 0.05), but not significantly correlated with TS, K<sup>+</sup>, Cl<sup>-</sup>, S/N ratio, or K/Cl ratio (P &gt; 0.05).</p>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>Mantel test analysis of correlations among chemical composition (Nic, RS, TS, K, Cl, S/N, and K/Cl), spectra, and grades of first-roasted tobacco leaves.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1756218-g006.tif">
<alt-text content-type="machine-generated">Correlation matrix and network diagram showing Pearson correlations among seven variables (Nic, RS, TS, K, Cl, S/N, K/Cl), represented by color gradients and values in a square grid, with negative correlations in blue and positive in red. Network below connects Spectrum and Grade to variables with link thickness for Mantel correlation and color for Mantel P-value, as explained in the legend at right.</alt-text>
</graphic></fig>
<p>The antagonistic relationship between Nic and sugars (RS/TS) is consistent with previous findings that enhanced Nic synthesis consumes sugars or inhibits sugar accumulation in tobacco, leading to a negative correlation between these components (<xref ref-type="bibr" rid="B50">Mo et&#xa0;al., 2022</xref>). The strong positive correlation between RS and TS is attributed to RS accounting for 60&#x2013;80% of TS, a common compositional feature of tobacco carbohydrates (<xref ref-type="bibr" rid="B2">Bano&#x17e;i&#x107; et&#xa0;al., 2020</xref>). As a composite index reflecting carbon-nitrogen metabolic balance, the S/N ratio is jointly regulated by sugars and Nic; its positive correlation with RS/TS and negative correlation with Nic aligns with the well-documented antagonism between carbon and nitrogen metabolism in tobacco (<xref ref-type="bibr" rid="B7">Bush and Tso, 2015</xref>).</p>
<p>The non-significant correlation between tobacco grade and K<sup>+</sup>, Cl<sup>-</sup> (or K/Cl ratio) is explained by the fact that tobacco grading primarily relies on appearance traits (maturity, color, thickness, oil content) rather than mineral content (<xref ref-type="bibr" rid="B30">Li et&#xa0;al., 2024a</xref>). K<sup>+</sup> and Cl<sup>-</sup> are mainly affected by soil fertility and fertilizer management, with no direct link to appearance traits. In contrast, Nic, RS, and TS are closely associated with appearance quality: sufficient nitrogen supply promotes Nic synthesis, enhancing leaf tissue compactness and color development (<xref ref-type="bibr" rid="B24">Huan et&#xa0;al., 2024</xref>); sugars contribute to oiliness and luster via hygroscopic properties and Maillard reactions (generating pigments and aroma compounds during curing) (<xref ref-type="bibr" rid="B86">Wu et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B93">Yang et&#xa0;al., 2023</xref>); and the S/N ratio correlates strongly with leaf maturity, a key grading criterion (<xref ref-type="bibr" rid="B18">Grsic and &#x10c;avlek, 2019</xref>).</p>
<p>The spectral correlation patterns are determined by molecular vibrational characteristics. Hyperspectral signals in the near-infrared region (780&#x2013;1650 nm) originate from C&#x2013;H, O&#x2013;H, and N&#x2013;H functional group vibrations. Nic and RS exhibit detectable characteristic absorption: Nic&#x2019;s N&#x2013;H and C&#x2013;H groups absorb at 1228&#x2013;1370 nm, while RS&#x2019;s O&#x2013;H and C&#x2013;H groups show distinct near-infrared absorption peaks (<xref ref-type="bibr" rid="B17">Golic et&#xa0;al., 2003</xref>; <xref ref-type="bibr" rid="B62">Robertson et&#xa0;al., 2019</xref>). In contrast, K<sup>+</sup> and Cl<sup>-</sup> are spectrally inert due to the absence of hydrogen-containing functional groups; TS signals are masked by spectral overlap with other hydroxyl-containing compounds (e.g., RS, cellulose, starch) (<xref ref-type="bibr" rid="B59">Qin et&#xa0;al., 2021</xref>); and the S/N ratio, as a non-chemical entity, cannot be directly detected due to overlapping signals from sugars and Nic.</p>
<p>The Mantel test correlation analysis reveals the relationships among chemical components, tobacco grades, and spectral data through the following logical chain. The chemical components of tobacco leaves serve as their intrinsic basis, directly determining their physical properties and external appearance. Furthermore, grade serves as an external manifestation of the chemical composition, allowing the intrinsic quality to be visualized through empirical grading standards. Meanwhile, hyperspectral imaging quantifies these grade-related variations in physical traits and chemical composition into hyperspectral data, with the resulting spectral profiles providing a comprehensive reflection of tobacco leaf quality. Using chemical components as the foundational basis, the quantified spectral data and the appearance traits that define grade are integrated, forming an interconnected system that links the spectrum, grade, and chemical components. This provides a foundation for predicting the grades of first-roasted tobacco leaves using hyperspectral technology.</p>
</sec>
<sec id="s3_5">
<label>3.5</label>
<title>Comparison of spectral preprocessing and model selection</title>
<p>Three preprocessing methods (namely, MSC, SG, and SNV) were applied to the hyperspectral data of different grades of first-roasted tobacco leaves, and the spectral curves of all samples after preprocessing were plotted (<xref ref-type="fig" rid="f7"><bold>Figure&#xa0;7</bold></xref>). After data preprocessing, the spectral curves of first-roasted tobacco leaves were more concentrated than the original spectra. Spectral data processed by both SG and SNV exhibited increased dispersion at the boundary wavelengths, attributed to inherent algorithmic limitations&#x2014;namely, boundary extrapolation uncertainty for SG and noise amplification in low signal-to-noise regions for SNV (<xref ref-type="bibr" rid="B6">Buddenbaum and Steffens, 2012</xref>; <xref ref-type="bibr" rid="B65">Schmid et&#xa0;al., 2022</xref>). In contrast, MSC-corrected spectra remained highly consistent with the original spectral profiles.</p>
<fig id="f7" position="float">
<label>Figure&#xa0;7</label>
<caption>
<p>Comparison of raw and preprocessed spectra. <bold>(A)</bold> Original spectra. <bold>(B)</bold> Spectra after MSC preprocessing. <bold>(C)</bold> Spectra after SG preprocessing. <bold>(D)</bold> Spectra after SNV preprocessing.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1756218-g007.tif">
<alt-text content-type="machine-generated">Four grouped line graphs labeled A, B, C, and D display reflectance versus wavelength from approximately 950 to 1650 nanometers using various colored lines, showing spectral data patterns for different conditions or processing methods.</alt-text>
</graphic></fig>
<p>To establish four classification models (RF, BPNN, ELM, and PLS-DA), the three preprocessed spectral datasets were used as input variables, and the tobacco grades were used as output variables. The correct classification rate of the test set was used as the evaluation index for these models (<xref ref-type="table" rid="T2"><bold>Table&#xa0;2</bold></xref>). SNV-RF had the lowest classification accuracy of only 34.0%, while MSC-PLS-DA had the highest classification accuracy of 98.5%. The lowest average classification accuracy among the three preprocessing methods was 74.6% for SNV, and the highest average classification accuracy was 90.1% for MSC. When first-roasted tobacco grades were categorized using near-infrared hyperspectroscopy, the differences between grades were mainly reflected in the contents of key chemical constituents such as nicotine, total sugars, and reducing sugars (<xref ref-type="bibr" rid="B103">Zhu et&#xa0;al., 2022</xref>). MSC enhances chemical compositional differences by removing spectral baseline shifts caused by surface scattering or uneven particle distribution (<xref ref-type="bibr" rid="B31">Li et&#xa0;al., 2024b</xref>). SG removes high-frequency noise but has limited capability to correct for systematic noise and does not fully preserve chemical information (<xref ref-type="bibr" rid="B65">Schmid et&#xa0;al., 2022</xref>). SNV processes each spectrum individually, which may disrupt the continuity of the chemical gradient between samples (<xref ref-type="bibr" rid="B49">Mishra et&#xa0;al., 2021</xref>). As a result, MSC achieves the highest average classification accuracy (<xref ref-type="bibr" rid="B61">Rinnan et&#xa0;al., 2009</xref>).</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Accuracy of different preprocessing methods and classification models for tobacco leaf grade identification using full-band spectra.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" rowspan="2" align="center">Model Preprocessing</th>
<th valign="middle" colspan="4" align="center">Grade</th>
<th valign="middle" rowspan="2" align="center">Average classification accuracy</th>
</tr>
<tr>
<th valign="middle" align="center">RF</th>
<th valign="middle" align="center">BPNN</th>
<th valign="middle" align="center">ELM</th>
<th valign="middle" align="center">PLS-DA</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">MSC</td>
<td valign="middle" align="center">79.8%</td>
<td valign="middle" align="center">93.6%</td>
<td valign="middle" align="center">88.4%</td>
<td valign="middle" align="center">98.5%</td>
<td valign="middle" align="center">90.1%</td>
</tr>
<tr>
<td valign="middle" align="center">SG</td>
<td valign="middle" align="center">60.2%</td>
<td valign="middle" align="center">93.5%</td>
<td valign="middle" align="center">95.1%</td>
<td valign="middle" align="center">98.1%</td>
<td valign="middle" align="center">86.7%</td>
</tr>
<tr>
<td valign="middle" align="center">SNV</td>
<td valign="middle" align="center">34.0%</td>
<td valign="middle" align="center">89.7%</td>
<td valign="middle" align="center">77.2%</td>
<td valign="middle" align="center">97.4%</td>
<td valign="middle" align="center">74.6%</td>
</tr>
<tr>
<td valign="middle" align="center">Average classification<break/>accuracy</td>
<td valign="middle" align="center">58.0%</td>
<td valign="middle" align="center">92.3%</td>
<td valign="middle" align="center">86.9%</td>
<td valign="middle" align="center">98.0%</td>
<td valign="middle" align="center"/>
</tr>
</tbody>
</table>
</table-wrap>
<p>Comparing the four classification models, the PLS-DA model achieved the best performance with an average classification accuracy of 98.0%, while RF had the lowest average classification accuracy of 58.0%. Hyperspectral data are characterized by high dimensionality and strong collinearity. PLS-DA, which combines PCA and partial least squares regression, can effectively extract features relevant to the classification target while reducing data dimensionality, thereby improving classification accuracy. During&#xa0;hyperspectral scanning, instrumental noise, baseline drift, and other issues can occur. RF is sensitive to noise, while BPNN and ELM tend to amplify noise in the low signal-to-noise ratio regions, resulting in decreased classification stability for all three models (<xref ref-type="bibr" rid="B1">Agjee et&#xa0;al., 2018</xref>; <xref ref-type="bibr" rid="B26">Huang et&#xa0;al., 2019</xref>). Considering model accuracy and differences in classification performance, MSC was chosen as the preprocessing method, and PLS-DA was selected as the classification model.</p>
</sec>
<sec id="s3_6">
<label>3.6</label>
<title>Comparison of characteristic band algorithms</title>
<p>When an excessive number of bands is selected (e.g., more than 50 bands, corresponding to &gt;15% of the original set), the retained redundant information tends to obscure the inherent differences in the feature selection preferences of the two algorithms. Conversely, an insufficient number of bands (e.g., fewer than 20 bands, or &lt;6% of the original set) leads to the loss of critical spectral information, resulting in a &#x201c;floor effect&#x201d; that would diminish their informativeness (<xref ref-type="bibr" rid="B79">Tang et&#xa0;al., 2014</xref>; <xref ref-type="bibr" rid="B28">Li et&#xa0;al., 2024c</xref>). Therefore, 35 feature wavelengths, accounting for 10% of the original spectral data, were selected to compare the performance of the SPA and CARS algorithms. The CARS algorithm selected characteristic wavelengths primarily concentrated around 1130 nm, 1410 nm, 1440 nm, 1600 nm, and 1630&#x2013;1650 nm, which are distributed across the entire near&#x2212;infrared spectral range (<xref ref-type="fig" rid="f8"><bold>Figure&#xa0;8</bold></xref>). These bands correspond to multiple chemical&#x2212;bond vibration signals associated with key tobacco components: the band near 1130 nm (attributed to overtones and combination tones of C&#x2013;H bonds) primarily reflects carbohydrate content (<xref ref-type="bibr" rid="B101">Zhou et&#xa0;al., 2025</xref>); the bands at 1410 nm and 1440 nm (resulting from combination vibrations of O&#x2013;H and C&#x2013;H bonds) specifically indicate the content and structural features of soluble sugars in tobacco (<xref ref-type="bibr" rid="B30">Li et&#xa0;al., 2024a</xref>); the bands around 1600 nm and 1630&#x2013;1650 nm (associated with C&#x2013;H and N&#x2013;H bond vibrations) reflect variations in Nic content (<xref ref-type="bibr" rid="B34">Liang et&#xa0;al., 2022</xref>). Thus, CARS effectively integrates spectral information related to sugars and Nic.</p>
<fig id="f8" position="float">
<label>Figure&#xa0;8</label>
<caption>
<p>CARS characteristic band distribution.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1756218-g008.tif">
<alt-text content-type="machine-generated">Line Graph: Distribution of Selected Bands Using the CARS Feature Band Selection Algorithm.</alt-text>
</graphic></fig>
<p>In contrast, SPA predominantly selected wavelengths within the 1580&#x2013;1650 nm range (<xref ref-type="fig" rid="f9"><bold>Figure&#xa0;9</bold></xref>), which corresponds to combination vibrations of C&#x2013;H bonds. This spectral region is less affected by light scattering caused by cellular structures, thereby minimizing physical interference and enhancing the absorption signals of C&#x2013;H-containing constituents such as sugars and Nic (<xref ref-type="bibr" rid="B103">Zhu et&#xa0;al., 2022</xref>). Consequently, these wavelengths serve as highly informative characteristic bands for characterizing the chemical profile of tobacco.</p>
<fig id="f9" position="float">
<label>Figure&#xa0;9</label>
<caption>
<p>SPA characteristic band distribution.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1756218-g009.tif">
<alt-text content-type="machine-generated">Line Graph: Distribution of Selected Bands Using the SPA Feature Band Selection Algorithm.</alt-text>
</graphic></fig>
<p>In summary, both algorithms successfully reduced the dimensionality of the hyperspectral data while retaining key spectral information linked to the core chemical constituents of tobacco, thereby achieving a balance between model fitting performance and generalization capability.</p>
</sec>
<sec id="s3_7">
<label>3.7</label>
<title>Model evaluation and analysis</title>
<sec id="s3_7_1">
<label>3.7.1</label>
<title>PLS-DA classification model</title>
<p>The PLS-DA model demonstrates consistently higher grade classification accuracy than other classification models across all three preprocessing methods, achieving an accuracy of 98.5% under MSC preprocessing. This is because there is strong collinearity among the bands in the near-infrared spectra. PLS-DA, however, extracts latent variables by maximizing covariance, automatically filtering out noise and emphasizing discriminative bands. Thus, the classification accuracy of PLS-DA is significantly higher than that of the other three models.</p>
<p>The experiment employed the confusion matrix as the performance evaluation index for the model (<xref ref-type="fig" rid="f10"><bold>Figure&#xa0;10</bold></xref>). Values along the diagonal of the confusion matrix represent the proportion of correctly categorized samples, while values on the off-diagonal denote the proportion of incorrectly categorized samples. The classification accuracy for C1F, C4F, and X2F reached 100%, while that for B1F, C2F, and C3F averaged 98.0%, with only a very small portion of samples misclassified. B2F is similar to B1F in both appearance and intrinsic chemical composition, and the near-infrared spectra primarily reflect the overall chemical information of tobacco leaves. If different grades of tobacco differ only slightly in key components, their spectral features may overlap in the principal component space, making it difficult for the model to distinguish them. As a result, 2.1% of B2F samples were miscategorized as B1F. The slight differences in the physical structures of C3L and C3F in the middle leaves were not sufficiently reflected in the near-infrared spectra, leading to 2.1% of C3L samples being classified as C3F. Although a small number of misclassifications occurred, the confusion matrix demonstrates that the MSC-PLS-DA model has strong classification performance, accurately distinguishing between tobacco grades.</p>
<fig id="f10" position="float">
<label>Figure&#xa0;10</label>
<caption>
<p>Confusion matrix for the hierarchical classification of PLS-DA models.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1756218-g010.tif">
<alt-text content-type="machine-generated">Confusion matrix chart displaying predicted versus true classification labels with strong diagonal dominance. All classes exhibit high accuracy, most above ninety-eight percent, as shown by dark blue squares. Percentage scale bar is on the right.</alt-text>
</graphic></fig>
</sec>
<sec id="s3_7_2">
<label>3.7.2</label>
<title>Comparison of different numbers of characteristic bands</title>
<p>The MSC-preprocessed data were input into the SPA and CARS characteristic band selection algorithms, which selected varying numbers of characteristic bands. These numbers were then placed in ascending order according to the ratio of characteristic bands to the full wavelength range and fed into the grade classification model to evaluate the classification accuracy for different band counts. In the SPA, the classification accuracy of the RF model does not increase significantly as the number of feature bands increases (<xref ref-type="table" rid="T3"><bold>Table&#xa0;3</bold></xref>). The remaining three models have unsatisfactory performance when the number of feature bands is small. However, the classification accuracy of all three models improves as the number of feature bands increases, increasing by 20% for ELM and by 40% for BPNN and PLS-DA. In the CARS algorithm, the classification performance of all four models is unsatisfactory when the number of feature bands is small (<xref ref-type="table" rid="T4"><bold>Table&#xa0;4</bold></xref>). Similar to the SPA, the classification accuracy of all four models improves as the number of feature bands in the CARS model increases, increasing by 40% for RF, by 30% for BPNN and PLS-DA, and by 10% for ELM. When the number of selected feature bands is limited, the wavelengths chosen by SPA are predominantly concentrated in the spectral end regions. RF owing to its ensemble learning mechanism, exhibits strong robustness to locally concentrated high-information features, enabling effective extraction of discriminative information from such intervals (<xref ref-type="bibr" rid="B4">Bin et&#xa0;al., 2016</xref>). In contrast, both BPNN and ELM rely on nonlinear correlations among multiple constituent features, while PLS-DA depends on linear relationships across multiple bands to construct meaningful latent variables (<xref ref-type="bibr" rid="B27">Jiang et&#xa0;al., 2017</xref>; <xref ref-type="bibr" rid="B67">Shao et&#xa0;al., 2022</xref>). A single spectral interval is insufficient to meet the input requirements of these models, leading to relatively lower classification accuracy. Although the wavelengths selected by CARS cover key spectral regions associated with sugars and nicotine, its selection bias&#x2014;rooted in the regression coefficients of PLS and compounded by the randomness introduced through Monte Carlo sampling&#x2014;tends to discard signal bands that are critical for classification (<xref ref-type="bibr" rid="B70">Song et&#xa0;al., 2020</xref>). Consequently, the resulting feature subset may lack representativeness, hindering the ability of all classification models to extract effective discriminatory information. Therefore, when the number of feature bands is limited, all classification models based on bands selected by SPA (with the exception of the RF model) or CARS fail to achieve satisfactory classification accuracy.</p>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Number of SPA-selected feature bands and the corresponding hierarchical classification accuracy across different models.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" rowspan="2" align="center">Number of characteristic bands (pieces)</th>
<th valign="middle" colspan="4" align="center">Accuracy (%)</th>
</tr>
<tr>
<th valign="middle" align="center">RF</th>
<th valign="middle" align="center">BPNN</th>
<th valign="middle" align="center">ELM</th>
<th valign="middle" align="center">PLS-DA</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">35 (10%)</td>
<td valign="middle" align="center">76.1</td>
<td valign="middle" align="center">58.9</td>
<td valign="middle" align="center">69.2</td>
<td valign="middle" align="center">46.0</td>
</tr>
<tr>
<td valign="middle" align="center">70 (20%)</td>
<td valign="middle" align="center">77.0</td>
<td valign="middle" align="center">69.0</td>
<td valign="middle" align="center">73.3</td>
<td valign="middle" align="center">73.8</td>
</tr>
<tr>
<td valign="middle" align="center">105 (30%)</td>
<td valign="middle" align="center">76.3</td>
<td valign="middle" align="center">71.0</td>
<td valign="middle" align="center">78.7</td>
<td valign="middle" align="center">81.9</td>
</tr>
<tr>
<td valign="middle" align="center">140 (40%)</td>
<td valign="middle" align="center">75.5</td>
<td valign="middle" align="center">76.8</td>
<td valign="middle" align="center">81.1</td>
<td valign="middle" align="center">88.8</td>
</tr>
<tr>
<td valign="middle" align="center">175 (50%)</td>
<td valign="middle" align="center">76.8</td>
<td valign="middle" align="center">79.4</td>
<td valign="middle" align="center">83.0</td>
<td valign="middle" align="center">91.6</td>
</tr>
<tr>
<td valign="middle" align="center">210 (60%)</td>
<td valign="middle" align="center">77.8</td>
<td valign="middle" align="center">81.7</td>
<td valign="middle" align="center">82.1</td>
<td valign="middle" align="center">93.6</td>
</tr>
<tr>
<td valign="middle" align="center">245 (70%)</td>
<td valign="middle" align="center">77.9</td>
<td valign="middle" align="center">84.5</td>
<td valign="middle" align="center">86.7</td>
<td valign="middle" align="center">94.0</td>
</tr>
<tr>
<td valign="middle" align="center">280 (80%)</td>
<td valign="middle" align="center">77.6</td>
<td valign="middle" align="center">86.7</td>
<td valign="middle" align="center">85.1</td>
<td valign="middle" align="center">95.5</td>
</tr>
<tr>
<td valign="middle" align="center">315 (90%)</td>
<td valign="middle" align="center">79.6</td>
<td valign="middle" align="center">92.5</td>
<td valign="middle" align="center">87.1</td>
<td valign="middle" align="center">96.3</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T4" position="float">
<label>Table&#xa0;4</label>
<caption>
<p>Number of CARS-selected feature bands and the corresponding hierarchical classification accuracy across different models.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" rowspan="2" align="center">Number of characteristic bands (pieces)</th>
<th valign="middle" colspan="4" align="center">Accuracy (%)</th>
</tr>
<tr>
<th valign="middle" align="center">RF</th>
<th valign="middle" align="center">BPNN</th>
<th valign="middle" align="center">ELM</th>
<th valign="middle" align="center">PLS-DA</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">35 (10%)</td>
<td valign="middle" align="center">35.1</td>
<td valign="middle" align="center">50.3</td>
<td valign="middle" align="center">74.6</td>
<td valign="middle" align="center">61.5</td>
</tr>
<tr>
<td valign="middle" align="center">70 (20%)</td>
<td valign="middle" align="center">59.3</td>
<td valign="middle" align="center">64.1</td>
<td valign="middle" align="center">75.3</td>
<td valign="middle" align="center">77.0</td>
</tr>
<tr>
<td valign="middle" align="center">105 (30%)</td>
<td valign="middle" align="center">62.1</td>
<td valign="middle" align="center">72.7</td>
<td valign="middle" align="center">78.9</td>
<td valign="middle" align="center">83.7</td>
</tr>
<tr>
<td valign="middle" align="center">140 (40%)</td>
<td valign="middle" align="center">63.6</td>
<td valign="middle" align="center">78.5</td>
<td valign="middle" align="center">84.9</td>
<td valign="middle" align="center">92.0</td>
</tr>
<tr>
<td valign="middle" align="center">175 (50%)</td>
<td valign="middle" align="center">67.7</td>
<td valign="middle" align="center">75.7</td>
<td valign="middle" align="center">86.4</td>
<td valign="middle" align="center">91.6</td>
</tr>
<tr>
<td valign="middle" align="center">210 (60%)</td>
<td valign="middle" align="center">70.7</td>
<td valign="middle" align="center">82.8</td>
<td valign="middle" align="center">87.5</td>
<td valign="middle" align="center">91.8</td>
</tr>
<tr>
<td valign="middle" align="center">245 (70%)</td>
<td valign="middle" align="center">72.5</td>
<td valign="middle" align="center">83.6</td>
<td valign="middle" align="center">86.5</td>
<td valign="middle" align="center">94.4</td>
</tr>
<tr>
<td valign="middle" align="center">280 (80%)</td>
<td valign="middle" align="center">77.4</td>
<td valign="middle" align="center">82.1</td>
<td valign="middle" align="center">85.4</td>
<td valign="middle" align="center">93.1</td>
</tr>
<tr>
<td valign="middle" align="center">315 (90%)</td>
<td valign="middle" align="center">79.6</td>
<td valign="middle" align="center">87.1</td>
<td valign="middle" align="center">88.0</td>
<td valign="middle" align="center">96.8</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>In both BPNN and PLS-DA models, the use of characteristic band algorithms results in a significant improvement in classification accuracy as the number of characteristic bands increases. The performance of the BPNN and PLS-DA models is influenced by the information density and signal-to-noise ratio of the input features, while the SPA and CARS algorithms optimize feature subsets, significantly improving data quality and thereby enhancing model performance. Specifically, the SPA, with its low collinearity and small-scale feature subsets, aligns with the sensitivity of BPNN and the projection requirements of PLS-DA, thus significantly improving classification accuracy. In contrast, the CARS algorithm retains a broad range of wavelengths, making it more suitable for tasks requiring comprehensive spectral information. After applying the SPA characteristic wavelength selection method, the accuracy of the SPA-RF model remains essentially unchanged. When the number of selected feature bands reached 70% of the total spectral bands (i.e., 246 bands), the classification accuracy of the SPA-ELM and SPA-PLS-DA models decreased by only 1.7% and 4.5%, respectively, compared to the full-band models. These results demonstrate that a 30% reduction in data dimensionality (i.e., 105 bands)&#x2014;achieved at the cost of a marginal drop in accuracy&#x2014;significantly lowers the computational load and storage requirements of the models. This feature selection strategy effectively avoids the redundancy associated with full-band modeling while addressing the performance limitations of low-dimensional feature sets. It thus offers a practical and feasible solution for the lightweight deployment and engineering application of hyperspectral-based tobacco grading technology.</p>
<p>The PLS-DA model achieves a significantly higher classification accuracy than the other three models when classifying grades based on both full bands and selected feature bands. When the number of feature bands reaches 70% of the total, the classification accuracy of the PLS-DA model is comparable to that achieved using the full set of bands, indicating that PLS-DA is well suited for tobacco grade classification. In this study, a model developed by integrating hyperspectral technology with machine learning has classified nine different grades of first-roasted tobacco with an accuracy of 98.5%, achieving accurate discrimination across different parts and grades.</p>
<p>Compared with previous studies, this research mainly focuses on analyzing the near-infrared hyperspectral data of nine common grades of flue-cured tobacco leaves. <xref ref-type="bibr" rid="B85">Wei et&#xa0;al., 2024</xref> used the 1D-CNN model combined with the LAR characteristic band algorithm to classify 10 grades of flue-cured tobacco leaves, achieving an accuracy of 96.3%. Similarly, <xref ref-type="bibr" rid="B39">Lu et&#xa0;al., 2021a</xref> used CNN to classify tobacco leaf grades based on global images and local patches, with a classification accuracy of 91.3%. In contrast, the classification accuracy of this study reached 98.5%, representing improvements of 2.2% and 7.2% over the aforementioned studies of <xref ref-type="bibr" rid="B85">Wei et&#xa0;al., 2024</xref> and <xref ref-type="bibr" rid="B39">Lu et&#xa0;al., 2021a</xref> respectively. Furthermore, <xref ref-type="bibr" rid="B85">Wei et&#xa0;al., 2024</xref> used the visible&#x2013;near-infrared (401&#x2013;1046 nm) spectral range, while <xref ref-type="bibr" rid="B39">Lu et&#xa0;al., 2021a</xref> performed a classification based on tobacco leaf images. This study has employed near-infrared hyperspectroscopy (950&#x2013;1650 nm), which provides greater stability and richer spectral information, making it more suitable for classifying tobacco leaf grades.</p>
<p>In data-driven models for agricultural product quality classification, variations in ecological conditions, cultivation practices, and inter-annual environmental fluctuations across different regions can lead to shifts in spectral features. The core challenge arising from this phenomenon is that when a model is directly applied to crops from other regions or subsequent years, it may encounter novel feature patterns not sufficiently represented in the training data, consequently leading to a degradation in classification performance (<xref ref-type="bibr" rid="B84">Wang et&#xa0;al., 2025</xref>). Although the classification model developed in this study demonstrates excellent performance on the experimental samples, it is crucial to acknowledge this inherent limitation. The generality and robustness of the model are not absolute; its long-term and broad applicability relies on a sustained update mechanism. This entails continuously incorporating new samples from diverse regions and harvest years to recalibrate and retrain the model, enabling it to dynamically adapt to the spatiotemporal variability in tobacco leaf spectral characteristics. This limitation is not unique to our model but represents a common challenge that all data-driven agricultural product quality classification models must address to transition successfully into practical application (<xref ref-type="bibr" rid="B84">Wang et&#xa0;al., 2025</xref>).</p>
</sec>
</sec>
</sec>
<sec id="s4" sec-type="conclusions">
<label>4</label>
<title>Conclusion</title>
<p>In this study, hyperspectral imaging technology was combined with machine learning methods to classify the grades of first-roasted tobacco, and the intrinsic relationships among grade, spectrum, and chemical composition were explored through multivariate statistical analysis. Mantel test correlation analysis revealed significant correlations among spectral data, tobacco grades, and chemical components. Chemical components constitute the intrinsic basis that determines the external characteristics and grades of tobacco leaves. By capturing the vibrational information of hydrogen-containing groups, spectra quantify the physical and chemical characteristics associated with tobacco grades, serving as a bridge between grades and chemical components and providing theoretical support for hyperspectral-based grading. The MSC, SG, and SNV preprocessing methods were combined with the RF, BPNN, ELM, and PLS-DA classification models to classify first-roasted tobacco leaves by grade. Additionally, the SPA and CARS feature band algorithms were used to extract feature bands from the spectral data. The classification accuracy of MSC-PLS-DA reached 98.5%. Employing only 70% of the feature bands (245 bands), the SPA-ELM and SPA-PLS-DA models maintained classification accuracies of 86.7% and 94.0%, on par with the full-band models (88.4% and 98.5%, respectively), while markedly reducing computational overhead and storage needs. Notably, the selected bands concentrated on the key spectral intervals corresponding to tobacco chemical components, effectively eliminating redundant information and achieving a sound balance between classification accuracy and model efficiency. This study has not only elucidated the multivariate relationships among spectra, chemical components, and grades but also provided an efficient method for the grading of flue-cured tobacco leaves. Additionally, it has provided a solid theoretical basis and mechanistic explanation for the application of hyperspectral technology in practical industrial settings and established a foundation for efficient tobacco leaf grading across the industry, facilitating the realization of automated grading and enhancing the economic benefits of the tobacco sector.</p>
</sec>
</body>
<back>
<sec id="s5" sec-type="data-availability">
<title>Data availability statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation.</p></sec>
<sec id="s6" sec-type="author-contributions">
<title>Author contributions</title>
<p>JAZ: Data curation, Software, Writing &#x2013; original draft, Writing &#x2013; review &amp; editing. HG: Writing &#x2013; original draft, Writing &#x2013; review &amp; editing. DW: Conceptualization, Investigation, Writing &#x2013; review &amp; editing. YC: Data curation, Methodology, Writing &#x2013; review &amp; editing. SD: Methodology, Supervision, Writing &#x2013; review &amp; editing. NS: Methodology, Software, Writing &#x2013; review &amp; editing. SY: Supervision, Validation, Writing &#x2013; review &amp; editing. CH: Supervision, Validation, Writing &#x2013; review &amp; editing. DZ: Validation, Visualization, Writing &#x2013; review &amp; editing. YD: Methodology, Project administration, Writing &#x2013; review &amp; editing. YB: Formal analysis, Visualization, Writing &#x2013; review &amp; editing. NW: Funding acquisition, Resources, Writing &#x2013; review &amp; editing. GW: Funding acquisition, Project administration, Resources, Writing &#x2013; review &amp; editing. ZL: Funding acquisition, Resources, Writing &#x2013; review &amp; editing. JHZ: Supervision, Validation, Visualization, Writing &#x2013; review &amp; editing. PZ: Validation, Visualization, Writing &#x2013; review &amp; editing.</p></sec>
<ack>
<title>Acknowledgments</title>
<p>The authors sincerely acknowledge the valuable support and contributions to this study from Kunming University of Science and Technology, Yunnan Agricultural University, and Kunming Branch of Yunnan Tobacco Company.</p>
</ack>
<sec id="s8" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>Authors DW, SD, and ZL were employed by Yunnan Provincial Tobacco Company.</p>
<p>The remaining author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
<p>The author(s) declared that this work received funding from Yunnan Provincial Tobacco Company, and the funder participated in the study design.</p></sec>
<sec id="s9" sec-type="ai-statement">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p></sec>
<sec id="s10" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p></sec>
<sec id="s11" sec-type="supplementary-material">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fpls.2026.1756218/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fpls.2026.1756218/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Table1.docx" id="SM1" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document"/></sec>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Agjee</surname> <given-names>N. H.</given-names></name>
<name><surname>Mutanga</surname> <given-names>O.</given-names></name>
<name><surname>Peerbhay</surname> <given-names>K.</given-names></name>
<name><surname>Ismail</surname> <given-names>R.</given-names></name>
</person-group> (<year>2018</year>). 
<article-title>The impact of simulated spectral noise on random forest and oblique random forest classification performance</article-title>. <source>J. Spectrosc.</source> <volume>2018</volume>, <elocation-id>8316918</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1155/2018/8316918</pub-id>
</mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Bano&#x17e;i&#x107;</surname> <given-names>M.</given-names></name>
<name><surname>Joki&#x107;</surname> <given-names>S.</given-names></name>
<name><surname>A&#x10d;kar</surname> <given-names>&#x110;.</given-names></name>
<name><surname>Bla&#x17e;i&#x107;</surname> <given-names>M.</given-names></name>
<name><surname>&#x160;ubari&#x107;</surname> <given-names>D.</given-names></name>
</person-group> (<year>2020</year>). 
<article-title>Carbohydrates&#x2014;key players in tobacco aroma formation and quality determination</article-title>. <source>Molecules</source> <volume>25</volume>, <elocation-id>1734</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/molecules25071734</pub-id>, PMID: <pub-id pub-id-type="pmid">32283792</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Barker</surname> <given-names>M.</given-names></name>
<name><surname>Rayens</surname> <given-names>W.</given-names></name>
</person-group> (<year>2003</year>). 
<article-title>Partial least squares for discrimination</article-title>. <source>J. Chemometrics</source> <volume>17</volume>, <fpage>166</fpage>&#x2013;<lpage>173</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/cem.785</pub-id>
</mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Bin</surname> <given-names>J.</given-names></name>
<name><surname>Ai</surname> <given-names>F.-F.</given-names></name>
<name><surname>Fan</surname> <given-names>W.</given-names></name>
<name><surname>Zhou</surname> <given-names>J.-H.</given-names></name>
<name><surname>Yun</surname> <given-names>Y.-H.</given-names></name>
<name><surname>Liang</surname> <given-names>Y.-Z.</given-names></name>
</person-group> (<year>2016</year>). 
<article-title>A modified random forest approach to improve multi-class classification performance of tobacco leaf grades coupled with NIR spectroscopy</article-title>. <source>RSC Adv.</source> <volume>6</volume>, <fpage>30353</fpage>&#x2013;<lpage>30361</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1039/C5RA25052H</pub-id>
</mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Breiman</surname> <given-names>L.</given-names></name>
</person-group> (<year>2001</year>). 
<article-title>Random forests</article-title>. <source>Mach. Learn.</source> <volume>45</volume>, <fpage>5</fpage>&#x2013;<lpage>32</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1023/A:1010933404324</pub-id>
</mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Buddenbaum</surname> <given-names>H.</given-names></name>
<name><surname>Steffens</surname> <given-names>M.</given-names></name>
</person-group> (<year>2012</year>). 
<article-title>The effects of spectral pretreatments on chemometric analyses of soil profiles using laboratory imaging spectroscopy</article-title>. <source>J.&#xa0;Spectrosc.</source> <volume>2012</volume>, <elocation-id>274903</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1155/2012/274903</pub-id>
</mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Bush</surname> <given-names>L. P.</given-names></name>
<name><surname>Tso</surname> <given-names>T. C.</given-names></name>
</person-group> (<year>2015</year>). 
<article-title>Physiology and biochemistry of the tobacco plant. 1. Growth and development - Physiologie und Biochemie der Tabakpflanze: 1. Wachstum und Entwicklung</article-title>. <source>Beitr&#xe4;ge zur Tabakforschung/Contributions to Tobacco Res.</source> <volume>14</volume> (<issue>4</issue>), <page-range>197&#x2013;209</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.2478/cttr-2013-0600</pub-id>
</mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Chandra</surname> <given-names>R.</given-names></name>
<name><surname>Kundu</surname> <given-names>M.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Classification and authentication of operating conditions in different processes using partial least squares</article-title>. <source>Chem. Product Process Modeling</source> <volume>19</volume>, <fpage>135</fpage>&#x2013;<lpage>145</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1515/cppm-2023-0074</pub-id>
</mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Chen</surname> <given-names>Y.</given-names></name>
<name><surname>Bin</surname> <given-names>J.</given-names></name>
<name><surname>Zou</surname> <given-names>C.</given-names></name>
<name><surname>Ding</surname> <given-names>M.</given-names></name>
</person-group> (<year>2021</year>). 
<article-title>Discrimination of fresh tobacco leaves with different maturity levels by near-infrared (NIR) spectroscopy and deep learning</article-title>. <source>J. Spectrosc.</source> <volume>2021</volume>, <elocation-id>9912589</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1155/2021/9912589</pub-id>, PMID: <pub-id pub-id-type="pmid">34211798</pub-id>
</mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Crabot</surname> <given-names>J.</given-names></name>
<name><surname>Clappe</surname> <given-names>S.</given-names></name>
<name><surname>Dray</surname> <given-names>S.</given-names></name>
<name><surname>Datry</surname> <given-names>T.</given-names></name>
</person-group> (<year>2019</year>). 
<article-title>Testing the mantel statistic with a spatially-constrained permutation procedure</article-title>. <source>Methods Ecol. Evol.</source> <volume>10</volume>, <fpage>532</fpage>&#x2013;<lpage>540</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/2041-210X.13141</pub-id>
</mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Cutler</surname> <given-names>D. R.</given-names></name>
<name><surname>Edwards</surname> <given-names>T. C.</given-names> <suffix>Jr.</suffix></name>
<name><surname>Beard</surname> <given-names>K. H.</given-names></name>
<name><surname>Cutler</surname> <given-names>A.</given-names></name>
<name><surname>Hess</surname> <given-names>K. T.</given-names></name>
<name><surname>Gibson</surname> <given-names>J.</given-names></name>
<etal/>
</person-group>. (<year>2007</year>). 
<article-title>Random forests for classification in ecology</article-title>. <source>Ecology</source> <volume>88</volume>, <fpage>2783</fpage>&#x2013;<lpage>2792</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1890/07-0539.1</pub-id>, PMID: <pub-id pub-id-type="pmid">18051647</pub-id>
</mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Dante</surname> <given-names>H.</given-names></name>
<name><surname>Sahu</surname> <given-names>A.</given-names></name>
</person-group> (<year>2018</year>). 
<article-title>Non-destructive rapid quality control method for tobacco grading using VNIR hyperspectral imaging</article-title>. <source>Image Sens. Technologies: Materials Devices Systems Appl. V.</source> <volume>10656</volume>, <fpage>1065603</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1117/12.2305091</pub-id>
</mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Darkis</surname> <given-names>F. R.</given-names></name>
<name><surname>Hackne</surname> <given-names>E. J.</given-names></name>
</person-group> (<year>1952</year>). 
<article-title>Cigarette tobaccos. Chemical changes that occur during processing</article-title>. <source>Ind. Eng. Chem.</source> <volume>44</volume>, <fpage>284</fpage>&#x2013;<lpage>291</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1021/ie50506a020</pub-id>
</mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="web">
<person-group person-group-type="author"><collab>Food and Agriculture Organization of the United Nations. Tobacco production</collab>
</person-group> (<year>2025</year>) Available online at: <uri xlink:href="https://archive.ourworldindata.org/20250909-093708/grapher/tobacco-production.html">https://archive.ourworldindata.org/20250909-093708/grapher/tobacco-production.html</uri> (<date-in-citation content-type="access-date">Accessed January 17, 2026</date-in-citation>).
</mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Fu</surname> <given-names>C.-B.</given-names></name>
<name><surname>Tian</surname> <given-names>A.-H.</given-names></name>
</person-group> (<year>2020</year>). 
<article-title>Classification of hyperspectral images of small samples based on support vector machine and back propagation neural network</article-title>. <source>Sensors Materials</source> <volume>32</volume>, <fpage>447</fpage>&#x2013;<lpage>454</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.18494/SAM.2020.2669</pub-id>
</mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Godin</surname> <given-names>B.</given-names></name>
<name><surname>Agneessens</surname> <given-names>R.</given-names></name>
<name><surname>Delcarte</surname> <given-names>J.</given-names></name>
<name><surname>Dardenne</surname> <given-names>P.</given-names></name>
</person-group> (<year>2015</year>). 
<article-title>Prediction of chemical characteristics of fibrous plant biomasses from their near infrared spectrum: comparing local versus partial least square models and cross-validation versus independent validations</article-title>. <source>J. Near Infrared Spectrosc.</source> <volume>23</volume>, <elocation-id>1138</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1255/jnirs.1138</pub-id>
</mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Golic</surname> <given-names>M.</given-names></name>
<name><surname>Walsh</surname> <given-names>K.</given-names></name>
<name><surname>Lawson</surname> <given-names>P.</given-names></name>
</person-group> (<year>2003</year>). 
<article-title>Short-wavelength near-infrared spectra of sucrose, glucose, and fructose with respect to sugar concentration and temperature</article-title>. <source>Appl. Spectrosc.</source> <volume>57</volume>, <fpage>139</fpage>&#x2013;<lpage>145</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1366/000370203321535033</pub-id>, PMID: <pub-id pub-id-type="pmid">14610949</pub-id>
</mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Grsic</surname> <given-names>K.</given-names></name>
<name><surname>&#x10c;avlek</surname> <given-names>M.</given-names></name>
</person-group> (<year>2019</year>). 
<article-title>Effect of topping height and maturity on the quality of flue-cured tobacco cultivars</article-title>. <source>J. Cent. Eur. Agric.</source> <volume>20</volume>, <fpage>841</fpage>&#x2013;<lpage>851</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.5513/JCEA01/20.3.2097</pub-id>
</mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Hao</surname> <given-names>M.</given-names></name>
<name><surname>Cao</surname> <given-names>X.</given-names></name>
<name><surname>Sun</surname> <given-names>J.</given-names></name>
<name><surname>Sun</surname> <given-names>Y.</given-names></name>
<name><surname>Wang</surname> <given-names>J.</given-names></name>
<name><surname>Zhang</surname> <given-names>H.</given-names></name>
</person-group> (<year>2025</year>). 
<article-title>Detection of external defects in seed potatoes using spectral&#x2013;spatial fusion of hyperspectral images and deep learning</article-title>. <source>Agriculture</source> <volume>16</volume>, <elocation-id>77</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agriculture16010077</pub-id>
</mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Harjoko</surname> <given-names>A.</given-names></name>
<name><surname>Prahara</surname> <given-names>A.</given-names></name>
<name><surname>Supardi</surname> <given-names>T. W.</given-names></name>
<name><surname>Candradewi</surname> <given-names>I.</given-names></name>
<name><surname>Pulungan</surname> <given-names>R.</given-names></name>
<name><surname>Hartati</surname> <given-names>S.</given-names></name>
</person-group> (<year>2019</year>). 
<article-title>Image processing approach for grading tobacco leaf based on color and quality</article-title>. <source>Int. J. Smart Sens. Intelligent Syst.</source> <volume>12</volume>, <fpage>1</fpage>&#x2013;<lpage>10</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.21307/ijssis-2019-010</pub-id>
</mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Hu</surname> <given-names>Y.</given-names></name>
<name><surname>Chen</surname> <given-names>W.</given-names></name>
<name><surname>Gouda</surname> <given-names>M.</given-names></name>
<name><surname>Yao</surname> <given-names>H.</given-names></name>
<name><surname>Zuo</surname> <given-names>X.</given-names></name>
<name><surname>Yu</surname> <given-names>H.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>Fungal fermentation of Fuzhuan brick tea: A comprehensive evaluation of sensory properties using chemometrics, visible near-infrared spectroscopy, and electronic nose</article-title>. <source>Food Res. Int.</source> <volume>186</volume>, <elocation-id>114401</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.foodres.2024.114401</pub-id>, PMID: <pub-id pub-id-type="pmid">38729704</pub-id>
</mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Hu</surname> <given-names>Y.</given-names></name>
<name><surname>Xu</surname> <given-names>L.</given-names></name>
<name><surname>Huang</surname> <given-names>P.</given-names></name>
<name><surname>Luo</surname> <given-names>X.</given-names></name>
<name><surname>Wang</surname> <given-names>P.</given-names></name>
<name><surname>Kang</surname> <given-names>Z.</given-names></name>
</person-group> (<year>2021</year>). 
<article-title>Reliable identification of oolong tea species: Nondestructive testing classification based on fluorescence hyperspectral technology and machine learning</article-title>. <source>Agriculture</source> <volume>11</volume>, <elocation-id>1106</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agriculture11111106</pub-id>
</mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Hu</surname> <given-names>C.</given-names></name>
<name><surname>Zhao</surname> <given-names>T.</given-names></name>
<name><surname>Duan</surname> <given-names>Y.</given-names></name>
<name><surname>Zhang</surname> <given-names>Y.</given-names></name>
<name><surname>Wang</surname> <given-names>X.</given-names></name>
<name><surname>Li</surname> <given-names>J.</given-names></name>
<etal/>
</person-group>. (<year>2025</year>). 
<article-title>Visible-near infrared hyperspectral imaging for non-destructive estimation of leaf nitrogen content under water-saving irrigation in protected tomato cultivation</article-title>. <source>Front. Plant Sci.</source> <volume>16</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2025.1676457</pub-id>, PMID: <pub-id pub-id-type="pmid">41104426</pub-id>
</mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Huan</surname> <given-names>W.</given-names></name>
<name><surname>Irfan</surname> <given-names>M.</given-names></name>
<name><surname>Yokamo</surname> <given-names>S.</given-names></name>
<name><surname>Lu</surname> <given-names>H.</given-names></name>
<name><surname>Lu</surname> <given-names>D.</given-names></name>
<name><surname>Chen</surname> <given-names>X.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>Exploring the effects of nitrogen fertilization management and soil factors on the yield and quality of flue-cured tobacco in China&#x2014;A quantitative research</article-title>. <source>Agronomy</source> <volume>14</volume>, <elocation-id>1365</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agronomy14071365</pub-id>
</mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Huang</surname> <given-names>Y.</given-names></name>
<name><surname>Du</surname> <given-names>G.</given-names></name>
<name><surname>Ma</surname> <given-names>Y.</given-names></name>
<name><surname>Zhou</surname> <given-names>J.</given-names></name>
</person-group> (<year>2021</year>). 
<article-title>Predicting heavy metals in dark sun-cured tobacco by near-infrared spectroscopy modeling based on the optimized variable selections</article-title>. <source>Ind. Crops Products</source> <volume>172</volume>, <elocation-id>114003</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.indcrop.2021.114003</pub-id>
</mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Huang</surname> <given-names>F.</given-names></name>
<name><surname>Lu</surname> <given-names>J.</given-names></name>
<name><surname>Tao</surname> <given-names>J.</given-names></name>
<name><surname>Li</surname> <given-names>L.</given-names></name>
<name><surname>Tan</surname> <given-names>X.</given-names></name>
<name><surname>Liu</surname> <given-names>P.</given-names></name>
</person-group> (<year>2019</year>). 
<article-title>Research on optimization methods of ELM classification algorithm for hyperspectral remote sensing images</article-title>. <source>IEEE Access</source> <volume>7</volume>, <fpage>108070</fpage>&#x2013;<lpage>108089</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ACCESS.2019.2932909</pub-id>
</mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Jiang</surname> <given-names>H.</given-names></name>
<name><surname>Mei</surname> <given-names>C.</given-names></name>
<name><surname>Chen</surname> <given-names>Q.</given-names></name>
</person-group> (<year>2017</year>). 
<article-title>Rapid identification of fermentation stages of bioethanol solid-state fermentation (SSF) using FT-NIR spectroscopy: Comparisons of linear and non-linear algorithms for multiple classification issues</article-title>. <source>Analytical Methods</source> <volume>9</volume>, <fpage>5769</fpage>&#x2013;<lpage>5776</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1039/C7AY01861D</pub-id>
</mixed-citation>
</ref>
<ref id="B28">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Li</surname> <given-names>X.</given-names></name>
<name><surname>Fu</surname> <given-names>X.</given-names></name>
<name><surname>Li</surname> <given-names>H.</given-names></name>
</person-group> (<year>2024</year>c). 
<article-title>A CARS-SPA-GA feature wavelength selection method based on hyperspectral imaging with potato leaf disease classification</article-title>. <source>Sensors</source> <volume>24</volume>, <elocation-id>6566</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/s24206566</pub-id>, PMID: <pub-id pub-id-type="pmid">39460047</pub-id>
</mixed-citation>
</ref>
<ref id="B29">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Li</surname> <given-names>H.</given-names></name>
<name><surname>Liang</surname> <given-names>Y.</given-names></name>
<name><surname>Xu</surname> <given-names>Q.</given-names></name>
<name><surname>Cao</surname> <given-names>D.</given-names></name>
</person-group> (<year>2009</year>). 
<article-title>Key wavelengths screening using competitive adaptive reweighted sampling method for multivariate calibration</article-title>. <source>Analytica Chimica Acta</source> <volume>648</volume>, <fpage>77</fpage>&#x2013;<lpage>84</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.aca.2009.06.046</pub-id>, PMID: <pub-id pub-id-type="pmid">19616692</pub-id>
</mixed-citation>
</ref>
<ref id="B30">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Li</surname> <given-names>J.</given-names></name>
<name><surname>Ma</surname> <given-names>Z.</given-names></name>
<name><surname>Dai</surname> <given-names>H.</given-names></name>
<name><surname>Li</surname> <given-names>H.</given-names></name>
<name><surname>Qiu</surname> <given-names>J.</given-names></name>
<name><surname>Pang</surname> <given-names>X.</given-names></name>
</person-group> (<year>2024</year>a). 
<article-title>Application of PLSR in correlating sensory and chemical properties of middle flue-cured tobacco leaves with honey-sweet and burnt flavour</article-title>. <source>Heliyon</source> <volume>10</volume>, <elocation-id>e29547</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.heliyon.2024.e29547</pub-id>, PMID: <pub-id pub-id-type="pmid">38655300</pub-id>
</mixed-citation>
</ref>
<ref id="B31">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Li</surname> <given-names>J.</given-names></name>
<name><surname>Wu</surname> <given-names>G.</given-names></name>
<name><surname>Guo</surname> <given-names>F.</given-names></name>
<name><surname>Han</surname> <given-names>L.</given-names></name>
<name><surname>Xiao</surname> <given-names>H.</given-names></name>
<name><surname>Cao</surname> <given-names>Y.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>b). 
<article-title>Detection of protein content in alfalfa using visible/near-infrared spectroscopy technology</article-title>. <source>BioResources</source> <volume>19</volume>, <fpage>3808</fpage>&#x2013;<lpage>3825</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.15376/biores.19.2.3808-3825</pub-id>
</mixed-citation>
</ref>
<ref id="B32">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Li</surname> <given-names>R.</given-names></name>
<name><surname>Zhang</surname> <given-names>X.</given-names></name>
<name><surname>Li</surname> <given-names>K.</given-names></name>
<name><surname>Qiao</surname> <given-names>J.</given-names></name>
<name><surname>Wang</surname> <given-names>Y.</given-names></name>
<name><surname>Zhang</surname> <given-names>J.</given-names></name>
<etal/>
</person-group>. (<year>2020</year>). 
<article-title>Nondestructive and rapid grading of tobacco leaves by use of a hand-held near-infrared spectrometer, based on a particle swarm optimization-extreme learning machine algorithm</article-title>. <source>Spectrosc. Lett.</source> <volume>53</volume>, <fpage>685</fpage>&#x2013;<lpage>691</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1080/00387010.2020.1824193</pub-id>
</mixed-citation>
</ref>
<ref id="B33">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Li</surname> <given-names>G.</given-names></name>
<name><surname>Zhen</surname> <given-names>H.</given-names></name>
<name><surname>Jiao</surname> <given-names>F.</given-names></name>
<name><surname>Hao</surname> <given-names>T.</given-names></name>
<name><surname>Wang</surname> <given-names>D.</given-names></name>
<name><surname>Ni</surname> <given-names>K.</given-names></name>
</person-group> (<year>2021</year>). &#x201c;
<article-title>Research on tobacco leaf grading algorithm based on transfer learning</article-title>,&#x201d; in <conf-name>2021 IEEE International Conference on Artificial Intelligence and Computer Applications (ICAICA)</conf-name> (<publisher-loc>Dalian, China; Piscataway (NJ)</publisher-loc>: 
<publisher-name>Institute of Electrical and Electronics Engineers (IEEE)</publisher-name>), <fpage>32</fpage>&#x2013;<lpage>35</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ICAICA52286.2021.9497953</pub-id>
</mixed-citation>
</ref>
<ref id="B34">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Liang</surname> <given-names>Y.</given-names></name>
<name><surname>Zhao</surname> <given-names>L.</given-names></name>
<name><surname>Guo</surname> <given-names>J.</given-names></name>
<name><surname>Wang</surname> <given-names>H.</given-names></name>
<name><surname>Liu</surname> <given-names>S.</given-names></name>
<name><surname>Wang</surname> <given-names>L.</given-names></name>
<etal/>
</person-group>. (<year>2022</year>). 
<article-title>Just-in-time learning-integrated partial least-squares strategy for accurately predicting 71 chemical constituents in Chinese tobacco by near-infrared spectroscopy</article-title>. <source>ACS Omega</source> <volume>7</volume>, <fpage>38650</fpage>&#x2013;<lpage>38659</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1021/acsomega.2c04139</pub-id>, PMID: <pub-id pub-id-type="pmid">36340111</pub-id>
</mixed-citation>
</ref>
<ref id="B35">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Liu</surname> <given-names>Z.</given-names></name>
<name><surname>He</surname> <given-names>K.</given-names></name>
</person-group> (<year>2025</year>). &#x201c;
<article-title>A decade&#x2019;s battle on dataset bias: Are we there yet</article-title>?,&#x201d; in <conf-name>International Conference on Learning Representations (ICLR)</conf-name> (<publisher-loc>Singapore</publisher-loc>: 
<publisher-name>International Conference on Learning Representations (ICLR)</publisher-name>). doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.2403.08632</pub-id>
</mixed-citation>
</ref>
<ref id="B36">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Liu</surname> <given-names>H.</given-names></name>
<name><surname>Meng</surname> <given-names>L.</given-names></name>
<name><surname>Wang</surname> <given-names>S.</given-names></name>
<name><surname>Wang</surname> <given-names>A.</given-names></name>
<name><surname>Du</surname> <given-names>H.</given-names></name>
<name><surname>Zhao</surname> <given-names>P.</given-names></name>
<etal/>
</person-group>. (<year>2023</year>). &#x201c;
<article-title>Study on moisture content prediction of tobacco leaf based on near infrared spectroscopy</article-title>,&#x201d; in <conf-name>2023 7th Asian Conference on Artificial Intelligence Technology (ACAIT)</conf-name> (<publisher-loc>Jiaxing, China; 1065 Piscataway, NJ</publisher-loc>: 
<publisher-name>Institute of Electrical and Electronics Engineers (IEEE)</publisher-name>), <page-range>1413&#x2013;1418</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ACAIT60137.2023.10528519</pub-id>
</mixed-citation>
</ref>
<ref id="B37">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Liu</surname> <given-names>H.</given-names></name>
<name><surname>Tian</surname> <given-names>L.</given-names></name>
<name><surname>Wang</surname> <given-names>L.</given-names></name>
<name><surname>Zhang</surname> <given-names>Z.</given-names></name>
<name><surname>Li</surname> <given-names>J.</given-names></name>
<name><surname>Liu</surname> <given-names>X.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>Real-time grading of roasted tobacco using near infrared spectroscopy technology</article-title>. <source>Microchemical J.</source> <volume>204</volume>, <elocation-id>110963</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.microc.2024.110963</pub-id>
</mixed-citation>
</ref>
<ref id="B38">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Lou</surname> <given-names>H.</given-names></name>
<name><surname>Zhang</surname> <given-names>C.</given-names></name>
</person-group> (<year>2018</year>). &#x201c;
<article-title>Features representation for flue-cured tobacco grading based on transfer learning to hard sample</article-title>,&#x201d; in <conf-name>2018 14th IEEE International Conference on Signal Processing (ICSP)</conf-name>. (<publisher-loc>Piscataway, NJ</publisher-loc>: 
<publisher-name>IEEE</publisher-name>), <page-range>591&#x2013;595</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ICSP.2018.8652385</pub-id>
</mixed-citation>
</ref>
<ref id="B39">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Lu</surname> <given-names>M.</given-names></name>
<name><surname>Jiang</surname> <given-names>S.</given-names></name>
<name><surname>Wang</surname> <given-names>C.</given-names></name>
<name><surname>Chen</surname> <given-names>D.</given-names></name>
<name><surname>Chen</surname> <given-names>T. E.</given-names></name>
</person-group> (<year>2021</year>a). 
<article-title>Tobacco leaf grading based on deep convolutional neural networks and machine vision</article-title>. <source>J. Am. Soc. Agric. Biol. Engineers (ASABE)</source> <volume>65</volume>, <fpage>11</fpage>&#x2013;<lpage>22</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.13031/ja.14537</pub-id>
</mixed-citation>
</ref>
<ref id="B40">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Lu</surname> <given-names>M. Y.</given-names></name>
<name><surname>Wang</surname> <given-names>C.</given-names></name>
<name><surname>Wu</surname> <given-names>W. B.</given-names></name>
<name><surname>Zhu</surname> <given-names>D. L.</given-names></name>
<name><surname>Zhou</surname> <given-names>Q.</given-names></name>
<name><surname>Wang</surname> <given-names>Z. Y.</given-names></name>
<etal/>
</person-group>. (<year>2023</year>a). 
<article-title>Intelligent grading of tobacco leaves using an improved bilinear convolutional neural network</article-title>. <source>IEEE Access</source> <volume>11</volume>, <fpage>68153</fpage>&#x2013;<lpage>68170</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ACCESS.2023.3292340</pub-id>
</mixed-citation>
</ref>
<ref id="B41">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Lu</surname> <given-names>X.</given-names></name>
<name><surname>Zhao</surname> <given-names>C.</given-names></name>
<name><surname>Qin</surname> <given-names>Y.</given-names></name>
<name><surname>Xie</surname> <given-names>L.</given-names></name>
<name><surname>Wang</surname> <given-names>T.</given-names></name>
<name><surname>Wu</surname> <given-names>Z.</given-names></name>
<etal/>
</person-group>. (<year>2023</year>b). 
<article-title>The application of hyperspectral images in the classification of fresh leaves&#x2019; maturity for flue-curing tobacco</article-title>. <source>Processes</source> <volume>11</volume>, <elocation-id>1249</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/pr11041249</pub-id>
</mixed-citation>
</ref>
<ref id="B42">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Lu</surname> <given-names>M. Y.</given-names></name>
<name><surname>Zhou</surname> <given-names>Q.</given-names></name>
<name><surname>Chen</surname> <given-names>T. E.</given-names></name>
<name><surname>Li</surname> <given-names>J. H.</given-names></name>
<name><surname>Jiang</surname> <given-names>S. W.</given-names></name>
<name><surname>Gao</surname> <given-names>Q.</given-names></name>
<etal/>
</person-group>. (<year>2021</year>b). 
<article-title>Qualitative discrimination of intact tobacco leaves based on near-infrared technology</article-title>. <source>J. Spectrosc.</source> <volume>2021</volume>, <elocation-id>8807199</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1155/2021/8807199</pub-id>
</mixed-citation>
</ref>
<ref id="B43">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Luo</surname> <given-names>B.</given-names></name>
<name><surname>Sun</surname> <given-names>H.</given-names></name>
<name><surname>Zhang</surname> <given-names>L.</given-names></name>
<name><surname>Chen</surname> <given-names>F.</given-names></name>
<name><surname>Wu</surname> <given-names>K.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Advances in the tea plants phenotyping using hyperspectral imaging technology</article-title>. <source>Front. Plant Sci.</source> <volume>15</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2024.1442225</pub-id>, PMID: <pub-id pub-id-type="pmid">39148615</pub-id>
</mixed-citation>
</ref>
<ref id="B44">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Luo</surname> <given-names>P.</given-names></name>
<name><surname>Yang</surname> <given-names>Y.</given-names></name>
<name><surname>Zhang</surname> <given-names>H.</given-names></name>
<name><surname>Yi</surname> <given-names>M.</given-names></name>
<name><surname>Zhou</surname> <given-names>X.</given-names></name>
<name><surname>Yang</surname> <given-names>Y.</given-names></name>
<etal/>
</person-group>. (<year>2026</year>). 
<article-title>Identification of tobacco leaf diseases using hyperspectral imaging and machine learning with SHAP interpretability analysis</article-title>. <source>Front. Plant Sci.</source> <volume>16</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2025.1711972</pub-id>, PMID: <pub-id pub-id-type="pmid">41567402</pub-id>
</mixed-citation>
</ref>
<ref id="B45">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Marcelo</surname> <given-names>M. C. A.</given-names></name>
<name><surname>Soares</surname> <given-names>F. L. F.</given-names></name>
<name><surname>Ardila</surname> <given-names>J. A.</given-names></name>
<name><surname>Dias</surname> <given-names>J. C.</given-names></name>
<name><surname>Ped&#xf3;</surname> <given-names>R.</given-names></name>
<name><surname>Kaiser</surname> <given-names>S.</given-names></name>
<etal/>
</person-group>. (<year>2019</year>). 
<article-title>Fast inline tobacco classification by near-infrared hyperspectral imaging and support vector machine-discriminant analysis</article-title>. <source>Analytical Methods</source> <volume>11</volume>, <fpage>1966</fpage>&#x2013;<lpage>1975</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1039/C9AY00413K</pub-id>
</mixed-citation>
</ref>
<ref id="B46">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Marzan</surname> <given-names>C. S.</given-names></name>
<name><surname>Ruiz</surname> <given-names>C. R.</given-names></name>
</person-group> (<year>2019</year>). 
<article-title>Automated tobacco grading using image processing techniques and a convolutional neural network</article-title>. <source>Int. J. Mach. Learn. Computing</source> <volume>9</volume>, <fpage>807</fpage>&#x2013;<lpage>813</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.18178/ijmlc.2019.9.6.877</pub-id>
</mixed-citation>
</ref>
<ref id="B47">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>McKinney</surname> <given-names>D. L.</given-names></name>
<name><surname>Frost-Pineda</surname> <given-names>K.</given-names></name>
<name><surname>Oldham</surname> <given-names>M. J.</given-names></name>
<name><surname>Fisher</surname> <given-names>M. T.</given-names></name>
<name><surname>Wang</surname> <given-names>J.</given-names></name>
<name><surname>Gogova</surname> <given-names>M.</given-names></name>
<etal/>
</person-group>. (<year>2014</year>). 
<article-title>Cigarettes with different nicotine levels affect sensory perception and levels of biomarkers of exposure in adult smokers</article-title>. <source>Nicotine Tobacco Res.</source> <volume>16</volume>, <fpage>948</fpage>&#x2013;<lpage>960</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/ntr/ntu009</pub-id>, PMID: <pub-id pub-id-type="pmid">24638852</pub-id>
</mixed-citation>
</ref>
<ref id="B48">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Medjahed</surname> <given-names>S. A.</given-names></name>
<name><surname>Ouali</surname> <given-names>M.</given-names></name>
</person-group> (<year>2018</year>). 
<article-title>Band selection based on optimization approach for hyperspectral image classification</article-title>. <source>Egyptian J. Remote Sens. Space Sci.</source> <volume>21</volume>, <fpage>413</fpage>&#x2013;<lpage>418</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ejrs.2018.01.003</pub-id>
</mixed-citation>
</ref>
<ref id="B49">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Mishra</surname> <given-names>P.</given-names></name>
<name><surname>Rutledge</surname> <given-names>D. N.</given-names></name>
<name><surname>Roger</surname> <given-names>J.-M.</given-names></name>
<name><surname>Wali</surname> <given-names>K.</given-names></name>
<name><surname>Khan</surname> <given-names>H. A.</given-names></name>
</person-group> (<year>2021</year>). 
<article-title>Chemometric pre-processing can negatively affect the performance of near-infrared spectroscopy models for fruit quality prediction</article-title>. <source>Talanta</source> <volume>229</volume>, <elocation-id>122303</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.talanta.2021.122303</pub-id>, PMID: <pub-id pub-id-type="pmid">33838766</pub-id>
</mixed-citation>
</ref>
<ref id="B50">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Mo</surname> <given-names>Z. J.</given-names></name>
<name><surname>Duan</surname> <given-names>L. L.</given-names></name>
<name><surname>Pu</surname> <given-names>Y. Y.</given-names></name>
<name><surname>Tian</surname> <given-names>Z. L.</given-names></name>
<name><surname>Ke</surname> <given-names>Y. Z.</given-names></name>
<name><surname>Luo</surname> <given-names>W.</given-names></name>
<etal/>
</person-group>. (<year>2022</year>). 
<article-title>Proteomics and co-expression network analysis reveal the importance of hub proteins and metabolic pathways in nicotine synthesis and accumulation in tobacco (Nicotiana tabacum L.)</article-title>. <source>Front. Plant Sci.</source> <volume>13</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2022.860455</pub-id>, PMID: <pub-id pub-id-type="pmid">35574122</pub-id>
</mixed-citation>
</ref>
<ref id="B51">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Moreira</surname> <given-names>E. D. T.</given-names></name>
<name><surname>Pontes</surname> <given-names>M. J. C.</given-names></name>
<name><surname>Galv&#xe3;o</surname> <given-names>R. K. H.</given-names></name>
<name><surname>Ara&#xfa;jo</surname> <given-names>M. C. U.</given-names></name>
</person-group> (<year>2009</year>). 
<article-title>Near infrared reflectance spectrometry classification of cigarettes using the successive projections algorithm for variable selection</article-title>. <source>Talanta</source> <volume>79</volume>, <fpage>1260</fpage>&#x2013;<lpage>1264</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.talanta.2009.05.031</pub-id>, PMID: <pub-id pub-id-type="pmid">19635356</pub-id>
</mixed-citation>
</ref>
<ref id="B52">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Myhre</surname> <given-names>D. L.</given-names></name>
<name><surname>Attoe</surname> <given-names>O. J.</given-names></name>
<name><surname>Ogden</surname> <given-names>W. B.</given-names></name>
</person-group> (<year>1956</year>). 
<article-title>Chlorine and other constituents in relation to tobacco leaf-burn</article-title>. <source>Soil Sci. Soc. America J.</source> <volume>20</volume>, <fpage>547</fpage>&#x2013;<lpage>551</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.2136/sssaj1956.03615995002000040024x</pub-id>
</mixed-citation>
</ref>
<ref id="B53">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Nekola</surname> <given-names>J. C.</given-names></name>
<name><surname>White</surname> <given-names>P. S.</given-names></name>
</person-group> (<year>1999</year>). 
<article-title>The distance decay of similarity in biogeography and ecology</article-title>. <source>J. Biogeography</source> <volume>26</volume>, <fpage>867</fpage>&#x2013;<lpage>878</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1046/j.1365-2699.1999.00305.x</pub-id>
</mixed-citation>
</ref>
<ref id="B54">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Nikolova</surname> <given-names>V. V.</given-names></name>
<name><surname>Nikolov</surname> <given-names>N. P.</given-names></name>
<name><surname>Popova</surname> <given-names>V. T.</given-names></name>
<name><surname>Peeva</surname> <given-names>S. T.</given-names></name>
<name><surname>Drachev</surname> <given-names>D. T.</given-names></name>
</person-group> (<year>2021</year>). 
<article-title>Complex technological profiling of market dominating Oriental tobaccos from Krumovgrad region (Bulgaria)</article-title>. <source>IOP Conf. Series: Materials Sci. Eng.</source> <volume>1031</volume>, <elocation-id>12095</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1088/1757-899X/1031/1/012095</pub-id>
</mixed-citation>
</ref>
<ref id="B55">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Niu</surname> <given-names>Q.</given-names></name>
<name><surname>Liu</surname> <given-names>J.</given-names></name>
<name><surname>Jin</surname> <given-names>Y.</given-names></name>
<name><surname>Chen</surname> <given-names>X.</given-names></name>
<name><surname>Zhu</surname> <given-names>W.</given-names></name>
<name><surname>Yuan</surname> <given-names>Q.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>Tobacco shred varieties classification using Multi-Scale-X-ResNet network and machine vision</article-title>. <source>Front. Plant Sci.</source> <volume>13</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2022.962664</pub-id>, PMID: <pub-id pub-id-type="pmid">36061766</pub-id>
</mixed-citation>
</ref>
<ref id="B56">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Panda</surname> <given-names>D.</given-names></name>
<name><surname>Mohanty</surname> <given-names>S.</given-names></name>
<name><surname>Das</surname> <given-names>S.</given-names></name>
<name><surname>Senapaty</surname> <given-names>J.</given-names></name>
<name><surname>Sahoo</surname> <given-names>D. B.</given-names></name>
<name><surname>Mishra</surname> <given-names>B.</given-names></name>
<etal/>
</person-group>. (<year>2025</year>). 
<article-title>From spectrum to yield: advances in crop photosynthesis with hyperspectral imaging</article-title>. <source>Photosynthetica</source> <volume>63</volume>, <fpage>196</fpage>&#x2013;<lpage>233</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.32615/ps.2025.012</pub-id>, PMID: <pub-id pub-id-type="pmid">40766744</pub-id>
</mixed-citation>
</ref>
<ref id="B57">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Patra</surname> <given-names>T.</given-names></name>
<name><surname>Olsen</surname> <given-names>K.</given-names></name>
<name><surname>Rinnan</surname> <given-names>&#xc5;.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>A multivariate perspective on the stability of oat-based drinks assessed by spectroscopy</article-title>. <source>Food Hydrocolloids</source> <volume>131</volume>, <elocation-id>107831</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.foodhyd.2022.107831</pub-id>
</mixed-citation>
</ref>
<ref id="B58">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Qin</surname> <given-names>Y. H.</given-names></name>
<name><surname>Liu</surname> <given-names>X. P.</given-names></name>
<name><surname>Zhang</surname> <given-names>F. M.</given-names></name>
<name><surname>Shan</surname> <given-names>Q. F.</given-names></name>
<name><surname>Zhang</surname> <given-names>M.</given-names></name>
</person-group> (<year>2023</year>). 
<article-title>Improved deep residual shrinkage network on near infrared spectroscopy for tobacco qualitative analysis</article-title>. <source>Infrared Phys. Technol.</source> <volume>129</volume>, <elocation-id>104575</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.infrared.2023.104575</pub-id>
</mixed-citation>
</ref>
<ref id="B59">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Qin</surname> <given-names>Y.</given-names></name>
<name><surname>Xu</surname> <given-names>Y.</given-names></name>
<name><surname>Wang</surname> <given-names>L.</given-names></name>
<name><surname>An</surname> <given-names>Y.</given-names></name>
<name><surname>Zhang</surname> <given-names>N.</given-names></name>
</person-group> (<year>2021</year>). 
<article-title>Dynamic local multi-model consensus modeling for near infrared calibration of inorganic substances</article-title>. <source>Spectrosc. Lett.</source> <volume>54</volume>, <fpage>723</fpage>&#x2013;<lpage>731</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1080/00387010.2021.1995438</pub-id>
</mixed-citation>
</ref>
<ref id="B60">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Qu</surname> <given-names>H. B.</given-names></name>
<name><surname>Ou</surname> <given-names>D. L.</given-names></name>
<name><surname>Cheng</surname> <given-names>Y. Y.</given-names></name>
</person-group> (<year>2005</year>). 
<article-title>Background correction in near-infrared spectra of plant extracts by orthogonal signal correction</article-title>. <source>J. Zhejiang Univ. Sci. B</source> <volume>6</volume>, <fpage>838</fpage>&#x2013;<lpage>843</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1631/jzus.2005.B0838</pub-id>, PMID: <pub-id pub-id-type="pmid">16052720</pub-id>
</mixed-citation>
</ref>
<ref id="B61">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Rinnan</surname> <given-names>&#xc5;.</given-names></name>
<name><surname>Berg</surname> <given-names>F.</given-names></name>
<name><surname>Engelsen</surname> <given-names>S. B.</given-names></name>
</person-group> (<year>2009</year>). 
<article-title>Review of the most common pre-processing techniques for near-infrared spectra</article-title>. <source>TrAC Trends Analytical Chem.</source> <volume>28</volume>, <fpage>1201</fpage>&#x2013;<lpage>1222</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.trac.2009.07.007</pub-id>
</mixed-citation>
</ref>
<ref id="B62">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Robertson</surname> <given-names>P. A.</given-names></name>
<name><surname>Villani</surname> <given-names>L.</given-names></name>
<name><surname>Robertson</surname> <given-names>E. G.</given-names></name>
</person-group> (<year>2019</year>). 
<article-title>Conformer specific ultraviolet and infrared detection of nicotine in the vapor phase</article-title>. <source>J. Phys. Chem. A</source> <volume>123</volume>, <fpage>10152</fpage>&#x2013;<lpage>10157</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1021/acs.jpca.9b09113</pub-id>, PMID: <pub-id pub-id-type="pmid">31644291</pub-id>
</mixed-citation>
</ref>
<ref id="B63">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Roemer</surname> <given-names>E.</given-names></name>
<name><surname>Schorp</surname> <given-names>M. K.</given-names></name>
<name><surname>Piad&#xe9;</surname> <given-names>J. J.</given-names></name>
<name><surname>Seeman</surname> <given-names>J. I.</given-names></name>
<name><surname>Leyden</surname> <given-names>D. E.</given-names></name>
<name><surname>Haussmann</surname> <given-names>H. J.</given-names></name>
</person-group> (<year>2012</year>). 
<article-title>Scientific assessment of the use of sugars as cigarette tobacco ingredients: A review of published and other publicly available studies</article-title>. <source>Crit. Rev. Toxicol.</source> <volume>42</volume>, <fpage>244</fpage>&#x2013;<lpage>278</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3109/10408444.2011.650789</pub-id>, PMID: <pub-id pub-id-type="pmid">22263649</pub-id>
</mixed-citation>
</ref>
<ref id="B64">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Roger</surname> <given-names>J.-M.</given-names></name>
<name><surname>Mallet</surname> <given-names>A.</given-names></name>
<name><surname>Marini</surname> <given-names>F.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>Preprocessing NIR spectra for aquaphotomics</article-title>. <source>Molecules</source> <volume>27</volume>, <elocation-id>6795</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/molecules27206795</pub-id>, PMID: <pub-id pub-id-type="pmid">36296387</pub-id>
</mixed-citation>
</ref>
<ref id="B65">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Schmid</surname> <given-names>M.</given-names></name>
<name><surname>Rath</surname> <given-names>D.</given-names></name>
<name><surname>Diebold</surname> <given-names>U.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>Why and how Savitzky-Golay filters should be replaced</article-title>. <source>ACS Measurement Sci. Au</source> <volume>2</volume>, <fpage>185</fpage>&#x2013;<lpage>196</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1021/acsmeasuresciau.1c00054</pub-id>, PMID: <pub-id pub-id-type="pmid">35479103</pub-id>
</mixed-citation>
</ref>
<ref id="B66">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name><surname>Setiawan</surname> <given-names>W.</given-names></name>
<name><surname>Purnama</surname> <given-names>A.</given-names></name>
</person-group> (<year>2020</year>). &#x201c;
<article-title>Tobacco leaf images clustering using DarkNet19 and K-Means</article-title>,&#x201d; in <source>2020 6th Information Technology International Seminar (ITIS)</source> (<publisher-loc>Surabaya, Indonesia; Piscataway, NJ</publisher-loc>: 
<publisher-name>IEEE</publisher-name>), <fpage>269</fpage>&#x2013;<lpage>273</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ITIS50118.2020.9321035</pub-id>
</mixed-citation>
</ref>
<ref id="B67">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Shao</surname> <given-names>Y.</given-names></name>
<name><surname>Liu</surname> <given-names>Y.</given-names></name>
<name><surname>Xuan</surname> <given-names>G.</given-names></name>
<name><surname>Shi</surname> <given-names>Y.</given-names></name>
<name><surname>Li</surname> <given-names>Q.</given-names></name>
<name><surname>Hu</surname> <given-names>Z.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>Detection and analysis of sweet potato defects based on hyperspectral imaging technology</article-title>. <source>Infrared Phys. Technol.</source> <volume>127</volume>, <elocation-id>104403</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.infrared.2022.104403</pub-id>
</mixed-citation>
</ref>
<ref id="B68">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Soares</surname> <given-names>S. F. C.</given-names></name>
<name><surname>Gomes</surname> <given-names>A. A.</given-names></name>
<name><surname>Araujo</surname> <given-names>M. C. U.</given-names></name>
<name><surname>Filho</surname> <given-names>A. R. G.</given-names></name>
<name><surname>Galv&#xe3;o</surname> <given-names>R. K. H.</given-names></name>
</person-group> (<year>2013</year>). 
<article-title>The successive projections algorithm</article-title>. <source>TrAC Trends Analytical Chem.</source> <volume>42</volume>, <fpage>84</fpage>&#x2013;<lpage>98</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.trac.2012.09.006</pub-id>
</mixed-citation>
</ref>
<ref id="B69">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Soares</surname> <given-names>F. L. F.</given-names></name>
<name><surname>Marcelo</surname> <given-names>M. C. A.</given-names></name>
<name><surname>Porte</surname> <given-names>L. M. F.</given-names></name>
<name><surname>Pontes</surname> <given-names>O. F. S.</given-names></name>
<name><surname>Kaiser</surname> <given-names>S.</given-names></name>
</person-group> (<year>2019</year>). 
<article-title>Inline simultaneous quantitation of tobacco chemical composition by infrared hyperspectral image associated with chemometrics</article-title>. <source>Microchemical J.</source> <volume>151</volume>, <elocation-id>104225</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.microc.2019.104225</pub-id>
</mixed-citation>
</ref>
<ref id="B70">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Song</surname> <given-names>X.</given-names></name>
<name><surname>Du</surname> <given-names>G.</given-names></name>
<name><surname>Li</surname> <given-names>Q.</given-names></name>
<name><surname>Tang</surname> <given-names>G.</given-names></name>
<name><surname>Huang</surname> <given-names>Y.</given-names></name>
</person-group> (<year>2020</year>). 
<article-title>Rapid spectral analysis of agro-products using an optimal strategy: dynamic backward interval PLS&#x2013;competitive adaptive reweighted sampling</article-title>. <source>Analytical Bioanalytical Chem.</source> <volume>412</volume>, <fpage>2795</fpage>&#x2013;<lpage>2804</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s00216-020-02506-x</pub-id>, PMID: <pub-id pub-id-type="pmid">32090279</pub-id>
</mixed-citation>
</ref>
<ref id="B71">
<mixed-citation publication-type="book">
<person-group person-group-type="author"><collab>Standardization Administration of China</collab>
</person-group> (<year>1992</year>). <source><italic>Flue-cured tobacco</italic> (GB 2635&#x2013;1992)</source> (<publisher-loc>Beijing, China</publisher-loc>: 
<publisher-name>Standardization Administration of China</publisher-name>).
</mixed-citation>
</ref>
<ref id="B72">
<mixed-citation publication-type="book">
<person-group person-group-type="author"><collab>State Tobacco Monopoly Administration</collab>
</person-group> (<year>2003</year>). <source><italic>Tobacco and tobacco products&#x2014;Sensory evaluation methods</italic> (YC/T 173-2003)</source> (<publisher-loc>Beijing, China</publisher-loc>: 
<publisher-name>China Standard Press</publisher-name>).
</mixed-citation>
</ref>
<ref id="B73">
<mixed-citation publication-type="book">
<person-group person-group-type="author"><collab>State Tobacco Monopoly Administration</collab>
</person-group> (<year>2011</year>). <source><italic>Tobacco and tobacco products&#x2014;Determination of polyphenolic compounds&#x2014;High performance liquid chromatography method</italic> (YC/T 162-2011)</source> (<publisher-loc>Beijing, China</publisher-loc>: 
<publisher-name>China Standard Press</publisher-name>).
</mixed-citation>
</ref>
<ref id="B74">
<mixed-citation publication-type="book">
<person-group person-group-type="author"><collab>State Tobacco Monopoly Administration</collab>
</person-group> (<year>2019</year>). <source><italic>Tobacco and tobacco products&#x2014;Determination of water-soluble sugars&#x2014;Continuous flow method</italic> (YC/T 159-2019)</source> (<publisher-loc>Beijing, China</publisher-loc>: 
<publisher-name>China Standard Press</publisher-name>).
</mixed-citation>
</ref>
<ref id="B75">
<mixed-citation publication-type="book">
<person-group person-group-type="author"><collab>State Tobacco Monopoly Administration</collab>
</person-group> (<year>2021</year>). <source><italic>Tobacco and tobacco products&#x2014;Determination of alkaloids&#x2014;Gas chromatography-mass</italic> sp<italic>ectrometry method</italic> (YC/T 468-2021)</source> (<publisher-loc>Beijing, China</publisher-loc>: 
<publisher-name>China Standard Press</publisher-name>).
</mixed-citation>
</ref>
<ref id="B76">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Stepanov</surname> <given-names>I.</given-names></name>
<name><surname>Berman</surname> <given-names>M.</given-names></name>
<name><surname>Brinkman</surname> <given-names>M. C.</given-names></name>
<name><surname>Carll</surname> <given-names>A.</given-names></name>
<name><surname>Exil</surname> <given-names>V.</given-names></name>
<name><surname>Hansen</surname> <given-names>E. G.</given-names></name>
<etal/>
</person-group>. (<year>2025</year>). 
<article-title>Sugars in tobacco products: Toxicity research and implications for tobacco product regulation</article-title>. <source>Chem. Res. Toxicol.</source> <volume>38</volume>, <fpage>747</fpage>&#x2013;<lpage>758</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1021/acs.chemrestox.4c00550</pub-id>, PMID: <pub-id pub-id-type="pmid">40233929</pub-id>
</mixed-citation>
</ref>
<ref id="B77">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Sun</surname> <given-names>Q.</given-names></name>
<name><surname>Liu</surname> <given-names>X.</given-names></name>
<name><surname>Bourennane</surname> <given-names>S.</given-names></name>
</person-group> (<year>2021</year>). 
<article-title>Unsupervised multi-level feature extraction for improvement of hyperspectral classification</article-title>. <source>Remote Sens.</source> <volume>13</volume>, <elocation-id>1602</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/rs13081602</pub-id>
</mixed-citation>
</ref>
<ref id="B78">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Talhout</surname> <given-names>R.</given-names></name>
<name><surname>Opperhuizen</surname> <given-names>A.</given-names></name>
<name><surname>van Amsterdam</surname> <given-names>J. G. C.</given-names></name>
</person-group> (<year>2006</year>). 
<article-title>Sugars as tobacco ingredient: Effects on mainstream smoke composition</article-title>. <source>Food Chem. Toxicol.</source> <volume>44</volume>, <fpage>1789</fpage>&#x2013;<lpage>1798</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.fct.2006.06.016</pub-id>, PMID: <pub-id pub-id-type="pmid">16904804</pub-id>
</mixed-citation>
</ref>
<ref id="B79">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Tang</surname> <given-names>G.</given-names></name>
<name><surname>Huang</surname> <given-names>Y.</given-names></name>
<name><surname>Tian</surname> <given-names>K.</given-names></name>
<name><surname>Song</surname> <given-names>X.</given-names></name>
<name><surname>Yan</surname> <given-names>H.</given-names></name>
<name><surname>Hu</surname> <given-names>J.</given-names></name>
<etal/>
</person-group>. (<year>2014</year>). 
<article-title>A new spectral variable selection pattern using competitive adaptive reweighted sampling combined with successive projections algorithm</article-title>. <source>Analyst</source> <volume>139</volume>, <fpage>4894</fpage>&#x2013;<lpage>4902</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1039/C4AN00837E</pub-id>, PMID: <pub-id pub-id-type="pmid">25078711</pub-id>
</mixed-citation>
</ref>
<ref id="B80">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Theissler</surname> <given-names>A.</given-names></name>
<name><surname>Thomas</surname> <given-names>M.</given-names></name>
<name><surname>Burch</surname> <given-names>M.</given-names></name>
<name><surname>Gerschner</surname> <given-names>F.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>ConfusionVis: Comparative evaluation and selection of multi-class classifiers based on confusion matrices</article-title>. <source>Knowledge-Based Syst.</source> <volume>247</volume>, <elocation-id>108651</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.knosys.2022.108651</pub-id>
</mixed-citation>
</ref>
<ref id="B81">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Toleva</surname> <given-names>B.</given-names></name>
</person-group> (<year>2021</year>). 
<article-title>The proportion for splitting data into training and test set for the bootstrap in classification problems</article-title>. <source>Business Syst. Res. J.</source> <volume>12</volume>, <fpage>228</fpage>&#x2013;<lpage>242</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.2478/bsrj-2021-0015</pub-id>
</mixed-citation>
</ref>
<ref id="B82">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>Y.</given-names></name>
<name><surname>Jiang</surname> <given-names>F.</given-names></name>
<name><surname>Gupta</surname> <given-names>B. B.</given-names></name>
<name><surname>Rho</surname> <given-names>S.</given-names></name>
<name><surname>Liu</surname> <given-names>Q.</given-names></name>
<name><surname>Hou</surname> <given-names>H.</given-names></name>
<etal/>
</person-group>. (<year>2018</year>). 
<article-title>Variable selection and optimization in rapid detection of soybean straw biomass based on CARS</article-title>. <source>IEEE Access</source> <volume>6</volume>, <fpage>5290</fpage>&#x2013;<lpage>5299</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ACCESS.2017.2763596</pub-id>
</mixed-citation>
</ref>
<ref id="B83">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>Y.</given-names></name>
<name><surname>Ou</surname> <given-names>X.</given-names></name>
<name><surname>He</surname> <given-names>H.-J.</given-names></name>
<name><surname>Kamruzzaman</surname> <given-names>M.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Advancements, limitations and challenges in hyperspectral imaging for comprehensive assessment of wheat quality: An up-to-date review</article-title>. <source>Food Chemistry: X</source> <volume>21</volume>, <elocation-id>101235</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.fochx.2024.101235</pub-id>, PMID: <pub-id pub-id-type="pmid">38420503</pub-id>
</mixed-citation>
</ref>
<ref id="B84">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>H.</given-names></name>
<name><surname>Yao</surname> <given-names>Y.</given-names></name>
<name><surname>Junyi</surname> <given-names>L.</given-names></name>
<name><surname>Zhang</surname> <given-names>X.</given-names></name>
<name><surname>Zhao</surname> <given-names>Y.</given-names></name>
<name><surname>Li</surname> <given-names>S.</given-names></name>
<etal/>
</person-group>. (<year>2025</year>). 
<article-title>Unsupervised cross-regional and cross-year adaptation by climate indicator discrepancy for crop classification</article-title>. <source>J. Remote Sens.</source> <volume>5</volume>, <elocation-id>439</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.34133/remotesensing.0439</pub-id>
</mixed-citation>
</ref>
<ref id="B85">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wei</surname> <given-names>X.</given-names></name>
<name><surname>Deng</surname> <given-names>C.</given-names></name>
<name><surname>Fang</surname> <given-names>W.</given-names></name>
<name><surname>Xie</surname> <given-names>C.</given-names></name>
<name><surname>Liu</surname> <given-names>S.</given-names></name>
<name><surname>Lu</surname> <given-names>M.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>Classification method for folded flue-cured tobacco based on hyperspectral imaging and conventional neural networks</article-title>. <source>Ind. Crops Products</source> <volume>212</volume>, <elocation-id>118279</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.indcrop.2024.118279</pub-id>
</mixed-citation>
</ref>
<ref id="B86">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wu</surname> <given-names>S.</given-names></name>
<name><surname>Guo</surname> <given-names>Y.</given-names></name>
<name><surname>Adil</surname> <given-names>M. F.</given-names></name>
<name><surname>Sehar</surname> <given-names>S.</given-names></name>
<name><surname>Cai</surname> <given-names>B.</given-names></name>
<name><surname>Xiang</surname> <given-names>Z.</given-names></name>
<etal/>
</person-group>. (<year>2020</year>). 
<article-title>Comparative proteomic analysis by iTRAQ reveals that plastid pigment metabolism contributes to leaf color changes in tobacco (Nicotiana tabacum) during curing</article-title>. <source>Int. J. Mol. Sci.</source> <volume>21</volume>, <elocation-id>2394</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/ijms21072394</pub-id>, PMID: <pub-id pub-id-type="pmid">32244294</pub-id>
</mixed-citation>
</ref>
<ref id="B87">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wu</surname> <given-names>L.</given-names></name>
<name><surname>Xu</surname> <given-names>Q.</given-names></name>
<name><surname>Su</surname> <given-names>C.</given-names></name>
<name><surname>Yin</surname> <given-names>X.</given-names></name>
<name><surname>Huo</surname> <given-names>X.</given-names></name>
<name><surname>Zhao</surname> <given-names>X.</given-names></name>
<etal/>
</person-group>. (<year>2025</year>). 
<article-title>Classification of quality grading of Anji white tea using hyperspectral imaging and data fusion techniques</article-title>. <source>J. Food Composition Anal.</source> <volume>142</volume>, <elocation-id>107563</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jfca.2025.107563</pub-id>
</mixed-citation>
</ref>
<ref id="B88">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wu</surname> <given-names>S.</given-names></name>
<name><surname>Zeng</surname> <given-names>Y.</given-names></name>
<name><surname>Hao</surname> <given-names>D.</given-names></name>
<name><surname>Liu</surname> <given-names>Q.</given-names></name>
<name><surname>Li</surname> <given-names>J.</given-names></name>
<name><surname>Chen</surname> <given-names>X.</given-names></name>
<etal/>
</person-group>. (<year>2021</year>). 
<article-title>Quantifying leaf optical properties with spectral invariants theory</article-title>. <source>Remote Sens. Environ.</source> <volume>253</volume>, <elocation-id>112131</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.rse.2020.112131</pub-id>
</mixed-citation>
</ref>
<ref id="B89">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Xin</surname> <given-names>X. W.</given-names></name>
<name><surname>Gong</surname> <given-names>H. L.</given-names></name>
<name><surname>Hu</surname> <given-names>R. T.</given-names></name>
<name><surname>Ding</surname> <given-names>X. Q.</given-names></name>
<name><surname>Pang</surname> <given-names>S. P.</given-names></name>
<name><surname>Che</surname> <given-names>Y.</given-names></name>
</person-group> (<year>2023</year>). 
<article-title>Intelligent large-scale flue-cured tobacco grading based on deep densely convolutional network</article-title>. <source>Sci. Rep.</source> <volume>13</volume>, <elocation-id>38334</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-023-38334-z</pub-id>, PMID: <pub-id pub-id-type="pmid">37429961</pub-id>
</mixed-citation>
</ref>
<ref id="B90">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Yan</surname> <given-names>C.</given-names></name>
</person-group> (<year>2025</year>). 
<article-title>A review on spectral data preprocessing techniques for machine&#xa0;learning and quantitative analysis</article-title>. <source>iScience</source> <volume>28</volume>, <elocation-id>112759</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.isci.2025.112759</pub-id>, PMID: <pub-id pub-id-type="pmid">40606754</pub-id>
</mixed-citation>
</ref>
<ref id="B91">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Yang</surname> <given-names>Z. P.</given-names></name>
<name><surname>Jin</surname> <given-names>W. Y.</given-names></name>
<name><surname>Du</surname> <given-names>J. S.</given-names></name>
</person-group> (<year>2025</year>). 
<article-title>Multi-basis continuous wavelet transform feature wavelengths selection and machine learning with hyperspectral imaging for non-destructive prediction of 1,2-propylene glycol content in cased tobacco leaves</article-title>. <source>Appl. Optics</source> <volume>64</volume>, <fpage>5087</fpage>&#x2013;<lpage>5098</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1364/ao.565221</pub-id>, PMID: <pub-id pub-id-type="pmid">40792803</pub-id>
</mixed-citation>
</ref>
<ref id="B92">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Yang</surname> <given-names>Q.</given-names></name>
<name><surname>Kun</surname> <given-names>M.</given-names></name>
<name><surname>Ying</surname> <given-names>L.</given-names></name>
<name><surname>Jiaquan</surname> <given-names>W.</given-names></name>
<name><surname>Xinyu</surname> <given-names>Z.</given-names></name>
<name><surname>Yang</surname> <given-names>S.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Prediction&#xa0;model of nicotine and glycerol in reconstituted tobacco leaves based on support vector machine algorithm</article-title>. <source>J. Braz. Chem. Soc</source>. <volume>35</volume> (<issue>5</issue>), <page-range>1&#x2013;8</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.21577/0103-5053.20230174</pub-id>
</mixed-citation>
</ref>
<ref id="B93">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Yang</surname> <given-names>H. Y.</given-names></name>
<name><surname>Sun</surname> <given-names>G. W.</given-names></name>
<name><surname>Yin</surname> <given-names>G. T.</given-names></name>
<name><surname>Sun</surname> <given-names>H. Y.</given-names></name>
<name><surname>Wang</surname> <given-names>T.</given-names></name>
<name><surname>Bai</surname> <given-names>T.</given-names></name>
<etal/>
</person-group>. (<year>2023</year>). 
<article-title>Browning mechanism of tobacco leaves during flue-curing process: Proteomics and metabolomics analysis reveals the changes in materials</article-title>. <source>Materials Express</source> <volume>13</volume>, <fpage>1068</fpage>&#x2013;<lpage>1080</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1166/mex.2023.2443</pub-id>
</mixed-citation>
</ref>
<ref id="B94">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Yang</surname> <given-names>R.</given-names></name>
<name><surname>Tian</surname> <given-names>H.</given-names></name>
<name><surname>Kan</surname> <given-names>J.</given-names></name>
</person-group> (<year>2018</year>). 
<article-title>Classification of sugar beets based on hyperspectral and extreme learning machine methods</article-title>. <source>Appl. Eng. Agric.</source> <volume>34</volume>, <fpage>891</fpage>&#x2013;<lpage>897</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.13031/aea.12903</pub-id>
</mixed-citation>
</ref>
<ref id="B95">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Yang</surname> <given-names>L.</given-names></name>
<name><surname>Yang</surname> <given-names>Q.-X.</given-names></name>
<name><surname>Yang</surname> <given-names>S.-H.</given-names></name>
<name><surname>Wang</surname> <given-names>J.-J.</given-names></name>
<name><surname>Hou</surname> <given-names>Y.</given-names></name>
<name><surname>Wang</surname> <given-names>B.-X.</given-names></name>
<etal/>
</person-group>. (<year>2015</year>). 
<article-title>Application of near infrared spectroscopy to detect mould contamination in tobacco</article-title>. <source>J. Near Infrared Spectrosc.</source> <volume>23</volume>, <fpage>391</fpage>&#x2013;<lpage>400</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1255/jnirs.1004</pub-id>
</mixed-citation>
</ref>
<ref id="B96">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhang</surname> <given-names>M.</given-names></name>
<name><surname>Chen</surname> <given-names>T.</given-names></name>
<name><surname>Gu</surname> <given-names>X.</given-names></name>
<name><surname>Chen</surname> <given-names>D.</given-names></name>
<name><surname>Wang</surname> <given-names>C.</given-names></name>
<name><surname>Wu</surname> <given-names>W.</given-names></name>
<etal/>
</person-group>. (<year>2023</year>). 
<article-title>Hyperspectral remote sensing for tobacco quality estimation, yield prediction, and stress detection: A review of applications and methods</article-title>. <source>Front. Plant Sci.</source> <volume>14</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2023.1073346</pub-id>, PMID: <pub-id pub-id-type="pmid">36968402</pub-id>
</mixed-citation>
</ref>
<ref id="B97">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhang</surname> <given-names>Y.</given-names></name>
<name><surname>He</surname> <given-names>L.</given-names></name>
</person-group> (<year>2011</year>). 
<article-title>A preliminary study on the near infraredspectral characteristics of purchased flue-cured tobacco</article-title>. <source>Advanced Materials Res.</source> <volume>396&#x2013;398</volume>, <fpage>2027</fpage>&#x2013;<lpage>2032</lpage>. doi:&#xa0; 10.4028/www.scientific.net/AMR.396-398.2027
</mixed-citation>
</ref>
<ref id="B98">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhang</surname> <given-names>W.</given-names></name>
<name><surname>Kasun</surname> <given-names>L. C.</given-names></name>
<name><surname>Wang</surname> <given-names>Q. J.</given-names></name>
<name><surname>Zheng</surname> <given-names>Y.</given-names></name>
<name><surname>Lin</surname> <given-names>Z.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>A review of machine&#xa0;learning for near-infrared spectroscopy</article-title>. <source>J. Spectrosc.</source> <volume>2022</volume>, <elocation-id>9764</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1155/2022/9764</pub-id>
</mixed-citation>
</ref>
<ref id="B99">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhang</surname> <given-names>F.</given-names></name>
<name><surname>Zhang</surname> <given-names>X.</given-names></name>
</person-group> (<year>2011</year>). 
<article-title>Classification and quality evaluation of tobacco leaves based on image processing and fuzzy comprehensive evaluation</article-title>. <source>Sensors</source> <volume>11</volume>, <fpage>2369</fpage>&#x2013;<lpage>2384</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/s110302369</pub-id>, PMID: <pub-id pub-id-type="pmid">22163744</pub-id>
</mixed-citation>
</ref>
<ref id="B100">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhong</surname> <given-names>J.</given-names></name>
</person-group> (<year>2019</year>). 
<article-title>Study of K+ uptake kinetics of flue-cured tobacco in K+-enriched and conventional tobacco genotypes</article-title>. <source>J. Plant Nutr.</source> <volume>42</volume>, <fpage>805</fpage>&#x2013;<lpage>811</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1080/01904167.2018.1450418</pub-id>
</mixed-citation>
</ref>
<ref id="B101">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhou</surname> <given-names>J.</given-names></name>
<name><surname>Li</surname> <given-names>Y.</given-names></name>
<name><surname>Bean</surname> <given-names>S. R.</given-names></name>
<name><surname>Armstrong</surname> <given-names>P. R.</given-names></name>
<name><surname>Wu</surname> <given-names>X.</given-names></name>
</person-group> (<year>2025</year>). 
<article-title>Rapid and nondestructive prediction of total starch and amylose contents in single sorghum kernel (SSK) based on near infrared (NIR) spectroscopy</article-title>. <source>Carbohydr. Polymers</source> <volume>368</volume>, <elocation-id>124257</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.carbpol.2025.124257</pub-id>, PMID: <pub-id pub-id-type="pmid">40947234</pub-id>
</mixed-citation>
</ref>
<ref id="B102">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhu</surname> <given-names>H.</given-names></name>
<name><surname>Chu</surname> <given-names>B.</given-names></name>
<name><surname>Zhang</surname> <given-names>C.</given-names></name>
<name><surname>Liu</surname> <given-names>F.</given-names></name>
<name><surname>Jiang</surname> <given-names>L.</given-names></name>
<name><surname>He</surname> <given-names>Y.</given-names></name>
</person-group> (<year>2017</year>). 
<article-title>Hyperspectral imaging&#xa0;for presymptomatic detection of tobacco disease with successive projections algorithm and machine-learning classifiers</article-title>. <source>Sci. Rep.</source> <volume>7</volume>, <fpage>4125</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-017-04501-2</pub-id>, PMID: <pub-id pub-id-type="pmid">28646177</pub-id>
</mixed-citation>
</ref>
<ref id="B103">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhu</surname> <given-names>Z.</given-names></name>
<name><surname>Qi</surname> <given-names>G.</given-names></name>
<name><surname>Lei</surname> <given-names>Y.</given-names></name>
<name><surname>Jiang</surname> <given-names>D.</given-names></name>
<name><surname>Mazur</surname> <given-names>N.</given-names></name>
<name><surname>Liu</surname> <given-names>Y.</given-names></name>
<etal/>
</person-group>. (<year>2022</year>). 
<article-title>A long short-term memory neural network based simultaneous quantitative analysis of multiple tobacco chemical components by near-infrared hyperspectroscopy images</article-title>. <source>Chemosensors</source> <volume>10</volume>, <elocation-id>164</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/chemosensors10050164</pub-id>
</mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn id="n1" fn-type="custom" custom-type="edited-by">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3152000">Sathishkumar Samiappan</ext-link>, The University of Tennessee, United States</p></fn>
<fn id="n2" fn-type="custom" custom-type="reviewed-by">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1273775">Yue Zhang</ext-link>, Nanjing University of Chinese Medicine, China</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3331051">Sneha Sharma</ext-link>, Department of Primary Industries and Regional Development, Australia</p></fn>
</fn-group>
</back>
</article>