<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" dtd-version="1.3" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Artif. Intell.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Artificial Intelligence</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Artif. Intell.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2624-8212</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/frai.2026.1745720</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Classification of <italic>Lupinus</italic> seeds into sweet and bitter categories using VIS&#x02013;NIR spectroscopy and machine learning</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name><surname>D&#x000ED;az-&#x000C1;lvarez</surname> <given-names>Josefa</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
<uri xlink:href="https://loop.frontiersin.org/people/1309654"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Galea-Gragera</surname> <given-names>Francisco A.</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
<uri xlink:href="https://loop.frontiersin.org/people/3397668"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Ch&#x000E1;vez de la O</surname> <given-names>Francisco</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
<uri xlink:href="https://loop.frontiersin.org/people/3398610"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Salguero-L&#x000F3;pez</surname> <given-names>Pedro A.</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
<uri xlink:href="https://loop.frontiersin.org/people/3398062"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Llera Cid</surname> <given-names>Fernando</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
<uri xlink:href="https://loop.frontiersin.org/people/3397701"/>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>Departamento de Tecnolog&#x000ED;a de los Computadores y Comunicaciones, Centro Universitario de M&#x000E9;rida, Universidad de Extremadura</institution>, <city>M&#x000E9;rida</city>, <country country="es">Spain</country></aff>
<aff id="aff2"><label>2</label><institution>Pasture and Forage Crops Area, Finca La Orden-Valdesequera" Agricultural Research Institute, Extremadura Scientific and Technological Research Centre (CICYTEX)</institution>, <city>Badajoz</city>, <country country="es">Spain</country></aff>
<aff id="aff3"><label>3</label><institution>Departamento de Ingenier&#x000ED;a de Sistemas Inform&#x000E1;ticos y Telem&#x000E1;ticos, Centro Universitario de M&#x000E9;rida, Universidad de Extremadura</institution>, <city>M&#x000E9;rida</city>, <country country="es">Spain</country></aff>
<author-notes>
<corresp id="c001"><label>&#x0002A;</label>Correspondence: Josefa D&#x000ED;az-&#x000C1;lvarez, <email xlink:href="mailto:mjdiaz@unex.es">mjdiaz@unex.es</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-27">
<day>27</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>9</volume>
<elocation-id>1745720</elocation-id>
<history>
<date date-type="received">
<day>13</day>
<month>11</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>05</day>
<month>02</month>
<year>2026</year>
</date>
<date date-type="accepted">
<day>09</day>
<month>02</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2026 D&#x000ED;az-&#x000C1;lvarez, Galea-Gragera, Ch&#x000E1;vez de la O, Salguero-L&#x000F3;pez and Llera Cid.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>D&#x000ED;az-&#x000C1;lvarez, Galea-Gragera, Ch&#x000E1;vez de la O, Salguero-L&#x000F3;pez and Llera Cid</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-27">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<sec>
<title>Purpose</title>
<p>The <italic>Lupinus</italic> germplasm includes sweet and bitter materials distinguished by compounds responsible for bitterness. Conventional identification is often destructive. This study assesses a non-destructive approach based on visible&#x02013;near infrared (VIS-NIR) spectroscopy and machine learning to classify whole seeds from seven <italic>Lupinus</italic> species into sweet or bitter classes.</p></sec>
<sec>
<title>Methods</title>
<p>Five machine-learning algorithms were evaluated on two datasets (reflectance and absorbance) acquired with VIS-NIR spectroscopy. Analyses were conducted on raw spectra and on spectra transformed using four spectral-transformation techniques. Because classes were imbalanced, five resampling methods were compared to improve classification performance.</p></sec>
<sec>
<title>Results</title>
<p>Performance was assessed using <italic>F1-score</italic> and <italic>ROC-AUC</italic>. On reflectance, LGR and SVC reached 92.5 and 92.0%; on absorbance, SVC and RF achieved 93.2 and 92.5%. Hybrid transformations consistently improved discrimination, and resampling reduced overfitting associated with class imbalance.</p></sec>
<sec>
<title>Conclusion</title>
<p>The results indicate that combining VIS&#x02013;NIR spectroscopy with machine learning provides a suitable non-destructive alternative to discriminate sweet and bitter <italic>Lupinus</italic> materials/ecotypes.</p></sec></abstract>
<kwd-group>
<kwd>absorbance spectra</kwd>
<kwd>artificial intelligence</kwd>
<kwd>food sustainability</kwd>
<kwd>resampling methods</kwd>
<kwd>seed phenotyping</kwd>
<kwd>spectral reflectance</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was received for this work and/or its publication. This research is supported by Spanish Ministry of Science and Innovation under project PID2023-147409NB-C22 funded by MCIN-/AEI-/10.130-39/501100011033, Junta de Extremadura, project GR24142 and by &#x0201C;ERDF A way of making Europe.&#x0201D;</funding-statement>
</funding-group>
<counts>
<fig-count count="11"/>
<table-count count="5"/>
<equation-count count="1"/>
<ref-count count="61"/>
<page-count count="18"/>
<word-count count="10792"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>AI in Food, Agriculture and Water</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="introduction" id="s1">
<label>1</label>
<title>Introduction</title>
<p>Lupin is a legume of the genus <italic>Lupinus</italic> (Fabaceae), comprising more than 200 species. Its cultivation has been documented since antiquity, and species are native to the Mediterranean basin, North Africa, and the Americas (North and South). From these centers of origin, distribution later expanded to Asia, Australia, and Europe (<xref ref-type="fig" rid="F1">Figure 1</xref>).</p>
<fig position="float" id="F1">
<label>Figure 1</label>
<caption><p><italic>Lupinus</italic> species. Image credit: Francisco A. Galea-Gragera and Fernando Llera Cid.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-09-1745720-g0001.tif">
<alt-text content-type="machine-generated">Collage showing different lupin plant varieties on the left with various flower colors and shapes arranged in a grid, and on the right, a corresponding grid of close-up images displaying a range of lupin seeds with distinctive colors and markings.</alt-text>
</graphic>
</fig>
<p>The process of domesticating lupins first took place in ancient civilisations. The factors that primarily drove it were its utility as a nutritional source, its capacity to enhance poor soils by fixing nitrogen, and its significant role in rotational growing cycles, which are crucial for a sustainable agricultural system. Since the second half of the 20th century, there has been an increasing interest in cultivating lupins, particularly for their nutritional potential, lack of genetic modification, sustainable intake, health benefits, and lower-cost (<xref ref-type="bibr" rid="B5">Boukid and Pasqualone, 2022</xref>; <xref ref-type="bibr" rid="B24">Johnson et al., 2017</xref>; <xref ref-type="bibr" rid="B57">Valente et al., 2024</xref>).</p>
<p><italic>Lupinus</italic> seeds have a high protein content (36%&#x02013;42%), comparable to or exceeding soybean (36%&#x02013;40%). They are used in bakery products, pasta, snack formulations, and cosmetics (<xref ref-type="bibr" rid="B16">Food Innovation, 2020</xref>). The combination of plant proteins, high fiber content, and low saturated fat has been associated with improved glycaemic control and reduced cardiovascular risk (<xref ref-type="bibr" rid="B5">Boukid and Pasqualone, 2022</xref>). Seeds also contain bioactive compounds&#x02014;polyphenols, carotenoids, phytosterols, tocopherols, alkaloids, and peptides&#x02014;with antioxidant, anti-inflammatory, antimicrobial, and anticancer activities, of potential public-health relevance (<xref ref-type="bibr" rid="B36">Mazumder et al., 2024</xref>). This portfolio of properties has likewise attracted interest from the pharmaceutical sector (<xref ref-type="bibr" rid="B46">Romeo et al., 2018</xref>).</p>
<p>Lupin alkaloids, particularly quinolizidine alkaloids, represent a natural defense mechanism; nevertheless, they are toxic to humans and their ingestion poses a significant health risk. Lupin alkaloid poisoning in humans can affect the nervous, circulatory, and digestive system (<xref ref-type="bibr" rid="B50">Salsano et al., 2025</xref>).</p>
<p>Alkaloid content underpins the operational classification of material as bitter (&#x02248;1%&#x02013;2%) or sweet (&#x0003C;0.05%), which in turn guides feed vs. food uses. &#x0201C;Sweet&#x0201D; material is not naturally occurring but results from selection and breeding to reduce alkaloid concentration. Cultivated species with sweet cultivars include <italic>Lupinus albus</italic> (white lupin), <italic>L. angustifolius</italic> (blue or narrow-leaf), <italic>L. luteus</italic> (yellow), and <italic>L. mutabilis</italic> (tarwi). Even within these groups, alkaloid levels vary among varieties, and debittering&#x02014;brine washing, cooking, fermentation, or ultrasound&#x02014;is often required (<xref ref-type="bibr" rid="B9">EFSA Panel on Contaminants in the Food Chain (CONTAM) et al., 2019</xref>; <xref ref-type="bibr" rid="B13">Estivi et al., 2023</xref>).</p>
<p>Traditionally, sweet/bitter determination has relied on the quantification of alkaloids using destructive reference methods, including LC&#x02013;MS/MS (<xref ref-type="bibr" rid="B27">Khedr et al., 2023</xref>), GC&#x02013;MS (<xref ref-type="bibr" rid="B46">Romeo et al., 2018</xref>), Soxhlet/Randall extraction (<xref ref-type="bibr" rid="B28">Kniepkamp, 2024</xref>), and HPLC (<xref ref-type="bibr" rid="B14">Eugelio et al., 2023</xref>). In parallel, non-destructive techniques such as near-infrared spectroscopy (NIRS), FTIR spectroscopy, and hyperspectral imaging (HSI) have gained prominence. NIRS is a rapid and effective tool for agri-food quality control, including the indirect assessment of bitterness-related traits in <italic>Lupinus</italic> (<xref ref-type="bibr" rid="B52">Schwertfirm et al., 2024</xref>). FTIR provides high spectral resolution for functional-group identification (<xref ref-type="bibr" rid="B38">Mirza et al., 2023</xref>). Finally, HSI integrates spatial and spectral information for quality evaluation (<xref ref-type="bibr" rid="B10">Elmasry et al., 2012</xref>; <xref ref-type="bibr" rid="B55">Siche et al., 2016</xref>)</p>
<p>This research aims to classify <italic>Lupinus</italic> material as sweet or bitter using seed-level VIS&#x02013;NIR spectra (reflectance and absorbance) and a range of machine-learning algorithms. The combination of VIS&#x02013;NIR spectroscopy and machine learning provides a rapid, non-destructive approach to discriminating between sweet and bitter classes. We adopt a data-driven machine-learning/AI strategy with a proven track record across agri-food applications.</p>
<p>The use of artificial intelligence to address real-world problems has proven effective across multiple domains. Within AI, machine learning (ML) is particularly prominent due to its broad applicability and methodological reach. Precision agriculture still presents substantial methodological and operational challenges. In this setting, ML techniques provide analytical power and practical solutions to well-characterized problems.</p>
<p>Research on <italic>Lupinus</italic> follows this global trend, as reflected by an increasing number of publications. A prominent line of work addresses the detection of invasive lupins (<xref ref-type="bibr" rid="B47">Sabat-Tomala et al., 2021</xref>, <xref ref-type="bibr" rid="B48">2024</xref>; <xref ref-type="bibr" rid="B51">Schulze-Br&#x000FC;ninghoff et al., 2021</xref>). For instance, <xref ref-type="bibr" rid="B8">Danilevicz et al. (2023)</xref> developed a deep-learning model operating on segmented images to identify sandplain lupins, particularly narrow-leaf lupins, achieving 80.3% performance. In <xref ref-type="bibr" rid="B60">Wijesingha et al. (2020)</xref>, a Random Forest classifier discriminated lupin vs. non-lupin images with 89% mean accuracy. A comprehensive systematic review is provided by <xref ref-type="bibr" rid="B56">Singh et al. (2024)</xref>. Furthermore, <xref ref-type="bibr" rid="B43">Petropoulos et al. (2025)</xref> evaluated six ML models for crop-yield prediction using Sentinel-2 data, with XGBoost performing best (<italic>R</italic><sup>2</sup> &#x0003D; 87.56%).</p>
<p>Evidence on lupin classification remains limited. In <xref ref-type="bibr" rid="B6">Co&#x000EF;sson et al. (2011)</xref>, <italic>L. albus</italic> and <italic>L. angustifolius</italic> were discriminated by applying principal component analysis (PCA) and self-organizing maps (SOM) to chemical parameters; the first three components explained 76.3% of the variance. In <xref ref-type="bibr" rid="B17">Freire Diaz et al. (2021)</xref>, an artificial-vision system was proposed to classify &#x0201C;sweet&#x0201D; <italic>L. mutabilis</italic> grains based on color and shape features.</p>
<p>Sweet&#x02013;bitter discrimination has predominantly relied on alkaloid quantification using destructive analytical methods. Most studies employ GC&#x02013;MS, LC&#x02013;MS/MS, or related techniques. For example, <xref ref-type="bibr" rid="B30">Kroc et al. (2017)</xref> assessed quantitative and qualitative alkaloid composition in 367 <italic>L. albus</italic> ecotypes from the Polish Gene Bank, revealing marked differentiation associated with domestication. In <xref ref-type="bibr" rid="B31">Lee et al. (2020)</xref>, quinolizidine alkaloids were quantified in three lupin products (beans, cookies, and a beverage) to evaluate consumer safety. Furthermore, <xref ref-type="bibr" rid="B27">Khedr et al. (2023)</xref> applied LC&#x02013;MS/MS to five <italic>Lupinus</italic> species from a single farm targeting six alkaloids, and <xref ref-type="bibr" rid="B26">Khedr et al. (2024)</xref> compared the alkaloid content of commercial narrow-leafed lupin with wild ecotypes.</p>
<p>In <xref ref-type="bibr" rid="B34">Madelou et al. (2024)</xref> developed a methodology for quantifying simultanously eight major lupin alkaloids using quantitative NMR spectroscopy (qNMR) as an efficient and rapid analytical tool to detect variations in alkaloids content between species and subespecies. Recently, <xref ref-type="bibr" rid="B54">Sharma et al. (2024)</xref> presented a complete literature review about the most latest advancements and methodologies for detection and separation of alkaloids. <xref ref-type="bibr" rid="B49">Saini et al. (2024)</xref> also published the latest update of analytical methods for bioactive alkaloids.</p>
<p>Recently, <xref ref-type="bibr" rid="B15">Eugelio et al. (2024)</xref> proposed an approach based on UHPLC-QqQ-LIT-MS/MS data to detect the alkaloid profile. They presented an unsupervised learning proposal using Hierarchical Cluster Analysis (HCA) and a supervised approach with partial least squares discriminant analysis (PLS-DA) to classify samples according to their geographical origin. In a similar way, <xref ref-type="bibr" rid="B40">Namdar et al. (2024)</xref> used data provided by LS-MS/MS to identify the alkaloid profile using an unsupervised approach with HCA to relate lupin species and their QA. After a comprehensive literature review in Scopus and Google Scholar, it is worth to mention these two previous studies are just a few of the research works found that includes a supervised and/or unsupervised learning perspective to address some kind of classification of <italic>Lupinus</italic>. This highlights the originality of the research work, which aims to discriminate between bitter or sweet <italic>Lupinus</italic> based on spectral data collected from seven different <italic>Lupinus</italic> species.</p></sec>
<sec sec-type="materials|methods" id="s2">
<label>2</label>
<title>Materials and methods</title>
<p>This section describes the methodology and data used to achieve the research objectives. The methodological workflow is shown in <xref ref-type="fig" rid="F2">Figure 2</xref>. The subsections are organized as follows: (i) plant material and data acquisition; (ii) preprocessing, including a brief summary of spectral transformations and resampling methods used to address class imbalance; and (iii) machine-learning techniques evaluated in this study.</p>
<fig position="float" id="F2">
<label>Figure 2</label>
<caption><p>General scheme depicting the methodological phases to reach our research objectives.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-09-1745720-g0002.tif">
<alt-text content-type="machine-generated">Flowchart illustrating a machine learning workflow for raw data processing: raw data undergoes spectral transformation and preprocessing, then hyperparameter search, resampling methods, and machine learning algorithms. Model performance is evaluated and compared based on metrics in a results table.</alt-text>
</graphic>
</fig>
<sec>
<label>2.1</label>
<title>Materials</title>
<p>This research work assessed seven annual species of lupin, including the three most widely cultivated species&#x02014;<italic>Lupinus albus</italic> L. (white lupin), <italic>Lupinus angustifolius</italic> L. (blue lupin), <italic>Lupinus luteus</italic> L. (yellow lupin)&#x02014;and four wild species: <italic>Lupinus hispanicus</italic> Bois and Reut, <italic>Lupinus gredensis</italic> Gand, <italic>Lupinus micranthus</italic> Geus, and <italic>Lupinus cosentinii</italic> Guss. The seeds were selected from the official active collection of the genus <italic>Lupinus</italic> at the CICYTEX Germplasm Bank, located at the &#x0201C;Finca La Orden-Valdesequera&#x0201D; Agricultural Research Institute (&#x0002B;38&#x000B0;51&#x02032;2.5<sup>&#x02032;&#x02032;</sup>, &#x02212;6&#x000B0;40&#x02032;14.7<sup>&#x02032;&#x02032;</sup>; <xref ref-type="fig" rid="F1">Figure 1</xref>). Conservation of ecotypes is conducted in a temperature-controlled environment, where the temperature is carefully regulated between 0 and 2&#x000B0; and 30% relative humidity to ensure the optimal conditions (<xref ref-type="bibr" rid="B19">Galea-Gragera and Llera Cid, 2025</xref>). Before analysis, seeds were acclimatized to room temperature to homogenize their moisture content and ensure representativeness in the spectral measurements.</p>
<sec>
<label>2.1.1</label>
<title>Assignment of sweet and bitter classes</title>
<p>The classification of accessions as sweet (low alkaloid content) or bitter (high alkaloid content) was obtained from the characterization and evaluation dataset associated with the Official Active Collection of grain <italic>Lupinus</italic> maintained at the CICYTEX Germplasm Bank. This information is managed independently from passport data (MCPD) and is assigned at the accession level (accename), following standard procedures commonly applied in germplasm banks and plant breeding programmes. In this context, material classified as sweet generally refers to seeds with a total quinolizidine alkaloid (QA) content below commonly accepted safety thresholds (approximately 0.02%&#x02013;0.05% on a dry weight basis). These thresholds were established in previous studies using quantitative analytical techniques such as gas chromatography&#x02013;mass spectrometry (GC&#x02013;MS) and liquid chromatography&#x02013;mass spectrometry (LC&#x02013;MS) (<xref ref-type="bibr" rid="B1">Barzaghi et al., 2025</xref>; <xref ref-type="bibr" rid="B12">Engel et al., 2022</xref>; <xref ref-type="bibr" rid="B40">Namdar et al., 2024</xref>).</p>
<p>The sweet/bitter distinction constitutes a classical functional descriptor in the genus <italic>Lupinus</italic>, as quinolizidine alkaloids are the main secondary metabolites responsible for the characteristic bitterness and potential toxicity of the seeds. This criterion has been historically used in germplasm conservation, characterization, and breeding activities (<xref ref-type="bibr" rid="B18">Frick et al., 2017</xref>). In the specific case of the CICYTEX collection, this classification has been established through rapid qualitative alkaloid screening based on the Dragendorff test. This test is a classical colorimetric method widely used for the preliminary discrimination between low- and high-alkaloid <italic>Lupinus</italic> materials in germplasm bank and selection contexts (<xref ref-type="bibr" rid="B1">Barzaghi et al., 2025</xref>; <xref ref-type="bibr" rid="B18">Frick et al., 2017</xref>)</p>
<p>In the present study, the sweet/bitter labels were used as reference classes derived from official characterization records, without performing additional destructive chemical analyses. This decision was made because the primary objective of the work was to evaluate the potential of VIS&#x02013;NIR spectroscopy combined with machine learning techniques as a rapid and non-destructive classification approach for screening and discriminating sweet and bitter material, rather than to replace the quantitative chemical determination of quinolizidine alkaloids by reference analytical techniques such as GC&#x02013;MS or LC&#x02013;MS (<xref ref-type="bibr" rid="B1">Barzaghi et al., 2025</xref>; <xref ref-type="bibr" rid="B40">Namdar et al., 2024</xref>)</p></sec></sec>
<sec>
<label>2.2</label>
<title>Methods</title>
<sec>
<label>2.2.1</label>
<title>Spectral data acquisition</title>
<p>Non-destructive analysis of seeds is performed primarily using near-infrared spectroscopy (NIRS); visible&#x02013;near infrared (VIS&#x02013;NIR) spectroscopy is also used and, when spatial as well as spectral information is required, hyperspectral imaging (HSI). In this study, a FieldSpec 3 spectroradiometer (ASD Inc.) was used to acquire VIS&#x02013;NIR spectra in interactance&#x02013;reflectance mode, covering 350&#x02013;2, 500 nm and yielding 2,151 spectral variables. These spectra capture information related to color as well as physiological and biochemical traits. The spectral resolution was 3 nm (maximum half-width) at 700 and 10 nm (maximum full half-width) at 1,400 and 2,100 nm.</p>
<p>Each measurement series was preceded by the recording of the reference spectrum or white reference using a ceramic plate, a procedure repeated for every five samples. The recording of the reference spectra and <italic>Lupinus</italic> seeds was carry out using RS3 Spectral Acquisition Software (ASD Inc.) and the ASD Turntable<xref ref-type="fn" rid="fn0003"><sup>1</sup></xref> (150 mm, 22 rpm) equipped with a halogen light source.</p>
<p>Spectral measurements were acquired under controlled temperature, relative humidity, and lighting conditions. To prevent ambient-light interference, acquisitions were performed in darkness. The spectroscopy room of the Radiometry and Remote Sensing Laboratory (Forage Crops and Pastures Area) at the &#x0201C;Finca La Orden&#x02013;Valdesequera&#x0201D; institute met all requirements. <xref ref-type="fig" rid="F3">Figure 3</xref> shows the FieldSpec 3 and ASD Turntable used.</p>
<fig position="float" id="F3">
<label>Figure 3</label>
<caption><p>Equipment used for the capture and recording of spectral data: FieldSpec 3 spectroradiometer and ASD Turntable probe. Images on the right represent seed images of the seven <italic>Lupinus</italic> species included. Image credit: Francisco A. Galea-Gragera and Fernando Llera Cid.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-09-1745720-g0003.tif">
<alt-text content-type="machine-generated">Laboratory imaging equipment with a camera and light source is positioned beside a control unit on the left. On the right, a collage shows seeds of various colors and patterns arranged in a grid.</alt-text>
</graphic>
</fig>
<p>It should be noted that the controlled temperature and relative humidity conditions refer to seed conservation prior to analysis in the germplasm bank facilities. Spectral acquisition was performed in a dedicated spectroscopy room under controlled lighting conditions (complete darkness) and stable laboratory temperature, following a standardized protocol developed at the Radiometry and Remote Sensing Laboratory of CICYTEX.</p>
<p>Each sample was analyzed in quadruplicate, and the spectrum was computed as the average of twenty-five consecutive scans. This procedure minimizes particle-size effects, improves the representativeness of the measurements, and increases data reproducibility and reliability (<xref ref-type="bibr" rid="B20">Gragera, 2015</xref>; <xref ref-type="bibr" rid="B42">Nicola&#x000EF; et al., 2007</xref>).</p>
<p>Reflectance and absorbance define how materials interact with incident electromagnetic radiation. In this research work, both measurements were collected. The reflectance data (<italic>R</italic>) were directly collected from the FieldSpec 3 instrument. Absorbance spectra was obtained applying the standard formula <italic>A</italic> &#x0003D; &#x02212;log<sub>10</sub>(<italic>R</italic>) for all spectral measurements. As a result, the reflectance and absorbance spectra are organized into two independent dataset.</p></sec>
<sec>
<label>2.2.2</label>
<title>Preprocessing stage: data transformations</title>
<p>Raw spectra are affected by instrumental noise, environmental conditions, and sample complexity (light scattering, particle size), which degrade data quality and may mask relevant patterns. Consequently, spectral preprocessing is critical to ensure reliable analysis and improve the robustness of ML models (<xref ref-type="bibr" rid="B29">Kotsiantis et al., 2006</xref>).</p>
<p>Preprocessing operated on the averaged spectra obtained during acquisition (Section 2.2.1), which reduces instrumental and sample-related noise and supports robust downstream analysis (<xref ref-type="bibr" rid="B42">Nicola&#x000EF; et al., 2007</xref>). As a first step, a quality-control check for missing values was performed, where none were found. The final dataset comprised 871 spectra, acquired in accordance with the procedure described in Section 2.2.1. Next, transformations were applied to mitigate the impact of signal-to-noise ratio issues, distortions, scattering, and baseline/background effects in the spectra (<xref ref-type="bibr" rid="B37">Miller, 2010</xref>).</p>
<p>The selection of techniques and their combinations was guided by the literature and by empirical tuning on the data (<xref ref-type="bibr" rid="B21">Grisanti et al., 2018</xref>), as these pretreatments have been shown to improve accuracy, reveal spectral patterns, and facilitate interpretation (<xref ref-type="bibr" rid="B39">Mishra et al., 2020</xref>).</p>
<p>The transformation techniques considered were: standard normal variate (SNV, per-spectrum centering and scaling to correct scattering and slope), Savitzky&#x02013;Golay first derivative (1D, baseline reduction and detail enhancement). This technique requires careful parameter tuning to avoid noise amplification, particularly when combined with SNV, and baseline-shift correction (BSS) with polynomial fitting (removal of systematic background trends).</p>
<p>The selection of the combinations of transformation techniques proposed in this research work, a brief description and justification (<xref ref-type="bibr" rid="B4">Bian, 2022</xref>; <xref ref-type="bibr" rid="B44">Rinnan et al., 2009</xref>) are shown below:</p>
<list list-type="bullet">
<list-item><p>SNV: it corrects for scattering effects. It is a simple and straightforward processing flow that is suitable for in-line applications or embedded devices where computational resources are limited.</p></list-item>
<list-item><p>SNV&#x0002B;1D (2-3-3): SNV followed by first derivative (window 2-3-3) It is strongly recommended for spectra with narrow peaks, so the shortest window preserves spectral details without compromising the global metric.</p></list-item>
<list-item><p>SNV&#x0002B;1D (2-7-7): SNV followed by first derivative (window 2-7-7). It represents a highly accurate alternative with one less stage, making it ideal for reducing processing time without compromising on robustness.</p></list-item>
<list-item><p>BSS&#x0002B;SNV&#x0002B;1D (2-7-7): sequential correction: BSS, SNV, and first derivative (window 2-7-7). It is perfect for studies where correcting for drift is important and computer resources are not an issue due its accuracy and consistency.</p></list-item>
</list>
<p>Pretreatments were applied to both reflectance and absorbance, producing for each transformation technique a specific dataset; together with the raw (unpretreated) dataset, these constitute a series of independent datasets used in subsequent analyses. This structure facilitated systematic comparisons and assessment of each pretreatment&#x00027;s impact on the performance of ML algorithms for classifying <italic>Lupinus</italic> seeds as sweet or bitter.</p>
<p>Stratified <italic>K</italic>-fold cross-validation (<italic>K</italic> = 5) was applied to evaluate the classification performance on the reflectance and absorbance datasets. This partitioning reduces the risk of overfitting, particularly under class imbalance (<xref ref-type="bibr" rid="B32">L&#x000F3;pez et al., 2022</xref>).</p></sec>
<sec>
<label>2.2.3</label>
<title>Preprocessing: class balancing</title>
<p>Class imbalance is a salient challenge in ML, particularly in real-world settings where classifiers tend to bias toward the majority class, increasing errors on the minority class and the overall misclassification cost (<xref ref-type="bibr" rid="B25">Kaur et al., 2019</xref>). Here, we address sweet vs. bitter discrimination using spectral information from ecotypes drawn from seven <italic>Lupinus</italic> species. The analysis revealed a non-uniform class distribution (<xref ref-type="fig" rid="F4">Figure 4a</xref>), with under-representation of sweet material in both the reflectance and absorbance datasets.</p>
<fig position="float" id="F4">
<label>Figure 4</label>
<caption><p>Frequency distribution of bitter/sweet <italic>Lupinus</italic> across both datasets (reflectance and absorbance). <bold>(a)</bold> Original data, <italic>IR</italic> &#x0003D; 10.77; after resampling: <bold>(b)</bold> SMOTE-ENN, <italic>IR</italic> &#x0003D; 1.02; <bold>(c)</bold> ADASYN, <italic>IR</italic> &#x0003D; 1.01. Distributions for the other methods are similar.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-09-1745720-g0004.tif">
<alt-text content-type="machine-generated">Pie chart illustration comparing bitter and sweet preferences. Chart a shows 91.5 percent bitter and 8.5 percent sweet. Chart b shows 50.3 percent bitter and 49.7 percent sweet. Chart c shows 50.1 percent bitter and 49.9 percent sweet. Each chart is labeled a, b, or c below.</alt-text>
</graphic>
</fig>
<p>The imbalance ratio (IR) is a simple and popular class imbalance measure that computes as the ratio of the number of samples in the majority class to the minority class (<italic>IR</italic> &#x0003D; <italic>N</italic>_<italic>Instances</italic><sub><italic>Majority</italic></sub>/<italic>N</italic>_<italic>Instances</italic><sub><italic>Minority</italic></sub>). The larger the IR, the more severe the imbalance issue (<xref ref-type="bibr" rid="B33">L&#x000F3;pez et al., 2013</xref>). This measure was computed for the two datasets employed in this study, yielding a figure of <italic>IR</italic> &#x0003D; 10.77. The accuracy of ML classification is highly affected by imbalanced dataset. To rebalance the class distributions, reduce the imbalance ratio and improve the robustness of the models various resampling methods were evaluated. Resampling methods are statistical techniques that create new samples from the training set and gather more information about a sample, thereby improving the model accuracy (<xref ref-type="bibr" rid="B11">Elreedy et al., 2024</xref>; <xref ref-type="bibr" rid="B58">Wang et al., 2021</xref>). <xref ref-type="fig" rid="F4">Figure 4</xref> also shows the frequency distribution after applying two of the resampling methods as illustrative examples. <xref ref-type="fig" rid="F4">Figure 4b</xref> for SMOTE-ENN and <xref ref-type="fig" rid="F4">Figure 4c</xref> for ADASYN. The application of rest of the resampling methods resulted in a completed balanced datasets.</p>
<p><xref ref-type="table" rid="T1">Table 1</xref> summarizes the resampling methods applied and the IR resulting after their application. SMOTE-based methods and advanced techniques like ADASYN, that focuses on the hard-to-learn instances, were evaluated. For example, BorderLine SMOTE, SMOTE-Tomek, and SVM-SMOTE equalized the number of samples per class (IR = 1.00), while other methods, such as SMOTE-ENN and ADASYN are close to 1, SMOTE-ENN <italic>IR</italic> &#x0003D; 1.02, (c) ADASYN <italic>IR</italic> &#x0003D; 1.01.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>Description of the resampling methods applied in this research study and imbalanced ratio after resampling.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Resampling tech</bold>.</th>
<th valign="top" align="left"><bold>Description</bold></th>
<th valign="top" align="left"><bold>IR</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">SMOTE-ENN</td>
<td valign="top" align="left">SMOTE Edited Nearest Neighbors. The minority class is oversampled using SMOTE, and then ENN is used to remove noisy or misclassified samples. This results in a dataset that is both more balanced and less noisy (<xref ref-type="bibr" rid="B2">Batista et al., 2004</xref>).</td>
<td valign="top" align="left">1.02</td>
</tr>
<tr>
<td valign="top" align="left">SMOTE-Tomek</td>
<td valign="top" align="left">SMOTE Tomek Links. Oversampling the minority class using SMOTE and undersampling with Tomek Links. TL identify pairs of samples from opposing classes that are each other&#x00027;s nearest neighbors and lie near the decision boundary. Eliminating the majority class instance from each Tomek Link enhances class separability and mitigates noise in imbalanced datasets (<xref ref-type="bibr" rid="B2">Batista et al., 2004</xref>).</td>
<td valign="top" align="left">1.00</td>
</tr>
<tr>
<td valign="top" align="left">SVM-SMOTE</td>
<td valign="top" align="left">SMOTE oversamples minority samples along the borderline and, then applies an SVM classifier for predicting new instances (<xref ref-type="bibr" rid="B41">Nguyen et al., 2011</xref>).</td>
<td valign="top" align="left">1.00</td>
</tr>
<tr>
<td valign="top" align="left">BL-SMOTE</td>
<td valign="top" align="left">BorderLine SMOTE. BL-SMOTE oversamples minority classes on borderline samples, instances that are often misclassified by their nearest neighbors (<xref ref-type="bibr" rid="B22">Han et al., 2005</xref>).</td>
<td valign="top" align="left">1.00</td>
</tr>
<tr>
<td valign="top" align="left">ADASYN</td>
<td valign="top" align="left">Oversampling focuses on instances that are harder to learn by adaptively assigning weights based on local data distribution (<xref ref-type="bibr" rid="B23">He et al., 2008</xref>).</td>
<td valign="top" align="left">1.01</td>
</tr></tbody>
</table>
</table-wrap>
</sec>
<sec>
<label>2.2.4</label>
<title>Machine learning techniques</title>
<p>Machine learning methods are widely used for their ability to identify complex patterns in data and produce accurate predictions. In this study, we evaluated five supervised algorithms to classify <italic>Lupinus</italic> materials/ecotypes as sweet or bitter from VIS&#x02013;NIR spectral data. The classifiers compared were logistic regression (LGR), multilayer perceptron (MLP), support vector machine (SVC), random forest (RF), and decision tree (DTR). A brief description of each classifier is provided in <xref ref-type="table" rid="T1">Table 1</xref>.</p>
<p>Hyperparameters govern the learning process and strongly influence classifier performance. Selecting an appropriate combination is therefore critical to maximize accuracy. In this study, hyperparameter tuning was performed using GridSearchCV (grid search with cross-validation) from scikit-learn in Python. The large number of possible combinations results in considerable variability in outcomes, depending on the algorithm, transformation technique, and resampling method. GridSearchCV trains and evaluates the model via <italic>K</italic>-fold cross-validation, averaging performance across folds. This ensures robust, generalizable hyperparameter selection and allows control over model complexity. The set of hyperparameters and ranges evaluated for each algorithm and each preprocessing combination is provided in <xref ref-type="table" rid="T2">Table 2</xref>. The optimal values selected for each ML algorithm, data-transformation technique, and resampling method were fixed for the subsequent experiments. To reduce the risk of overfitting under a high-dimensional setting (2,151 wavelengths) relative to sample size, the hyperparameter search space was explicitly defined to control model complexity. For Logistic Regression, the inverse regularization strength (C) and the regularization scheme (e.g., L2 penalty) directly constrain coefficient magnitude, limiting overly flexible decision boundaries. For Random Forests and Decision Trees, complexity was controlled through parameters such as maximum tree depth (max_depth), minimum samples per split (min_samples_split), and minimum samples per leaf (min_samples_leaf), which restrict tree growth and reduce variance. For SVC with RBF kernels, C, and gamma jointly regulate the margin and the smoothness of the decision function. For MLP, architectural choices (hidden layer sizes) and regularization (alpha) constrain capacity. The ranges explored for these hyperparameters are reported in <xref ref-type="table" rid="T3">Table 3</xref>, and the selected values reflect the best cross-validated performance within the bounded search space.</p>
<table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p>Description of the ML techniques evaluated to classify <italic>Lupinus</italic> seeds as sweet/bitter.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>ML tech</bold>.</th>
<th valign="top" align="left"><bold>Description</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">LGR</td>
<td valign="top" align="left">It estimates the probability that a sample belongs to a specific class by applying a logistic regression function. It is suitable for binary and multi-class classification.</td>
</tr>
<tr>
<td valign="top" align="left">MLP</td>
<td valign="top" align="left">It is a particular type of artificial neural network designed for classification tasks. It consists of one input layer, one or more hidden layers and an output layer. Non-linear activation functions such as sigmoid or relu are used by Hidden Layers to model complex, non-linear relationships and to classify data.</td>
</tr>
<tr>
<td valign="top" align="left">RF</td>
<td valign="top" align="left">It is a widely used ML algorithm for classification and regression. It combines multiple decision trees based on the idea of ensemble learning, where each tree defines a model that is trained using samples that are randomly chosen and then predicts. The final prediction is decided by majority voting.</td>
</tr>
<tr>
<td valign="top" align="left">SVC</td>
<td valign="top" align="left">It is a variant of the support vector machine (SVM) that has been specially designed for classification tasks. It is based on support vectors, which are used to find the hyperplane that most effectively separates the classes in the data</td>
</tr>
<tr>
<td valign="top" align="left">DTR</td>
<td valign="top" align="left">It is a hierarchical tree structure made up of a root node, branches, internal decision and leaf nodes. DTR recursively splits the data based on the feature values using a defined criterion (Gini impurity or information gain) until the stopping condition is met. Internal nodes apply split rules based on feature values, which in turn generates new branches. The leaf node identifies the final decision.</td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>LGR, logistic regression; MLP, multilayer perceptron; RF, random forest; SVC, support vector classification; DTR, decision tree.</p>
</table-wrap-foot>
</table-wrap>
<table-wrap position="float" id="T3">
<label>Table 3</label>
<caption><p>Hyperparameters and their value ranges explored in the hyperparameter tuning process for each algorithm.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>ML tech</bold></th>
<th valign="top" align="left"><bold>Final hyperparameter values</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">LGR</td>
<td valign="top" align="left">C: 0.001, 0.01, 0.1, 1, 10, 100, 1,000; penalty: l1, l2; solver: newton-cg, lbfgs, liblinear, sag, saga</td>
</tr>
<tr>
<td valign="top" align="left">MLP</td>
<td valign="top" align="left">hidden_layer_sizes: 10, 20, (10, 10); activation: logistic, relu; solver: sgd, adam; alpha: 0.0001, 0.05; learning_rate: adaptive; max_iter: 200</td>
</tr>
<tr>
<td valign="top" align="left">RF</td>
<td valign="top" align="left">n_estimators: 50, 100, 200, 500; max_depth: none, 10, 20, 30; min_samples_split: 2, 5, 10; min_samples_leaf: 1, 2, 4; max_features: sqrt, log2; bootstrap: true, false</td>
</tr>
<tr>
<td valign="top" align="left">SVC</td>
<td valign="top" align="left">Kernel: rbf, poly; C: 0.01, 0.1, 1, 10, 100, 1,000; gamma: auto, scale</td>
</tr>
<tr>
<td valign="top" align="left">DTR</td>
<td valign="top" align="left">Criterion: gini, entropy, log_loss; max_depth: none, 10, 20, 30, 50; min_samples_split: 2, 5, 10, 20; min_samples_leaf: 1, 2, 4, 10; max_features: sqrt, log2, none; splitter: best, random</td>
</tr></tbody>
</table>
</table-wrap>
</sec></sec></sec>
<sec id="s3">
<label>3</label>
<title>Experimental results</title>
<p>This section presents the experimental framework and classification results for machine-learning models developed to discriminate sweet and bitter <italic>Lupinus</italic> materials/ecotypes using VIS&#x02013;NIR reflectance and absorbance spectra from whole seeds. The performance of five ML algorithms was evaluated using spectral data as input.</p>
<p>Spectral data were analyzed either in raw form or after applying one of four spectral transformations; subsequently, five resampling methods were applied to handle class imbalance.</p>
<p>This experimental framework involves the comparison and identification of spectral transformations and resampling methods aimed at optimizing the analysis for each algorithm and the study objective. All feasible combinations were evaluated using F1-score and AUC&#x02013;ROC, metrics appropriate for imbalanced datasets. F1-score harmonizes precision and recall (<xref ref-type="disp-formula" rid="EQ1">Equation 1</xref>); AUC&#x02013;ROC is standard for binary classification and quantifies the model&#x00027;s discriminative ability.</p>
<disp-formula id="EQ1"><mml:math id="M1"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>F</mml:mi><mml:mn>1</mml:mn><mml:mo>-</mml:mo><mml:mi>S</mml:mi><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mo>=</mml:mo><mml:mn>2</mml:mn><mml:mo>&#x000D7;</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>p</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mo>&#x000D7;</mml:mo><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mi>l</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>/</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>p</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mi>l</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(1)</label></disp-formula>
<p>The results obtained with the stratified <italic>K</italic>-fold cross-validation (<italic>K</italic> = 5) strategy proposed for reflectance and absorbance data are outlined in the next subsection. All information refer to the test results.</p>
<p>Before presenting the classification performance metrics, a dedicated spectral analysis was conducted to characterize the intrinsic differences between bitter and sweet <italic>Lupinus</italic> seeds at the signal level. This analysis is independent of any machine learning model and aims to (i) identify systematic spectral contrasts inherent to the seed material; and (ii) provide a physically grounded basis for the interpretation of subsequent classification results and model-derived spectral importance.</p>
<sec>
<label>3.1</label>
<title>Spectral differentiation and selectivity analysis</title>
<sec>
<label>3.1.1</label>
<title>Exploratory spectral comparison between bitter and sweet seeds</title>
<p>Before evaluating supervised classification performance, this section explores which spectral regions contribute to the natural variability between seeds labeled as <italic>bitter</italic> and <italic>sweet</italic>. In this light, a spectral difference analysis was computed as <inline-formula><mml:math id="M2"><mml:mi>&#x00394;</mml:mi><mml:mi>R</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>&#x003BB;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mover accent="false" class="mml-overline"><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mo accent="true">&#x000AF;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mstyle class="text"><mml:mtext class="textit" mathvariant="italic">bitter</mml:mtext></mml:mstyle></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>&#x003BB;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>-</mml:mo><mml:msub><mml:mrow><mml:mover accent="false" class="mml-overline"><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mo accent="true">&#x000AF;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mstyle class="text"><mml:mtext class="textit" mathvariant="italic">sweet</mml:mtext></mml:mstyle></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>&#x003BB;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> using raw reflectance spectra in the 350&#x02013;2,500 nm range. This approach highlights systematic contrasts in the mean spectral signature between classes, which may be partially masked in individual spectra due to the strong spectral collinearity that characterizes VIS&#x02013;NIR data (<xref ref-type="bibr" rid="B45">Roggo et al., 2007</xref>; <xref ref-type="bibr" rid="B59">Weyer and Lo, 2006</xref>).</p>
<p><xref ref-type="fig" rid="F5">Figure 5</xref> shows the spectral difference profile together with the &#x000B1; standard error of the mean (SEM) band at each wavelength. This representation enables the magnitude of the class contrast and its relative stability against within-class variability to be assessed simultaneously (<xref ref-type="bibr" rid="B53">Sem, 2021</xref>). Consequently, it facilitates the identification of wavelength regions where the separation between <italic>bitter</italic> and <italic>sweet</italic> exceeds internal dispersion, suggesting structured spectral differences rather than instrumental noise.</p>
<fig position="float" id="F5">
<label>Figure 5</label>
<caption><p>Spectral difference between <italic>bitter</italic> and <italic>sweet Lupinus</italic> seeds based on raw VIS&#x02013;NIR reflectance data (350&#x02013;2,500 nm). The solid line represents &#x00394;<italic>R</italic>(&#x003BB;) (Bitter&#x02212;Sweet), and the shaded band indicates &#x000B1; SEM at each wavelength. Negative values indicate higher reflectance in <italic>sweet</italic> seeds, whereas positive values correspond to higher reflectance in <italic>bitter</italic> seeds.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-09-1745720-g0005.tif">
<alt-text content-type="machine-generated">Line graph comparing spectral reflectance differences between bitter and sweet Lupinus seeds across wavelengths from 400 to 2500 nanometers, showing mean values with shaded standard error margins.</alt-text>
</graphic>
</fig>
<p>Overall, the results reveal consistent spectral differences across the VIS&#x02013;NIR domain. In the visible region (approximately 400&#x02013;700 nm), <italic>sweet</italic> seeds tend to exhibit higher reflectance than <italic>bitter</italic> seeds, which is compatible with variation in seed coat color and the contribution of pigment- and phenolic-associated compounds (<xref ref-type="bibr" rid="B59">Weyer and Lo, 2006</xref>). In the near- and short-wave infrared range (700&#x02013;2,500 nm), subtler but persistent differences are observed, linked to overtone and combination bands of molecular vibrations. In particular, contrasts in regions commonly associated with C&#x02013;H, N&#x02013;H, and O&#x02013;H absorptions are consistent with differences in the overall chemical composition of the seed matrix (organic and nitrogen-containing constituents), thereby reflecting an <italic>intrinsic physicochemical signature</italic> of the material rather than effects driven by data processing or algorithmic decisions (<xref ref-type="bibr" rid="B3">Be&#x00107; et al., 2025</xref>; <xref ref-type="bibr" rid="B59">Weyer and Lo, 2006</xref>).</p>
<p>Taken together, the spectral difference analysis confirms the presence of systematic and structured contrasts between <italic>bitter</italic> and <italic>sweet</italic> seeds at the VIS&#x02013;NIR signal level, providing a physical basis for the application of supervised classification methods. However, this descriptive analysis does not directly indicate which spectral regions are effectively exploited by machine learning models during the discrimination process. To enhance interpretability of the observed performance and to link spectral information with classifier behavior, a complementary spectral selectivity analysis based on an interpretable linear model was therefore conducted.</p></sec>
<sec>
<label>3.1.2</label>
<title>Spectral selectivity based on logistic regression coefficients</title>
<p>To identify the spectral regions underlying bitter/sweet discrimination, a spectral selectivity analysis was conducted using the coefficients of a Logistic Regression (LGR) model trained on reflectance spectra preprocessed with Standard Normal Variate (SNV) and first-derivative Savitzky&#x02013;Golay filtering (2-3-3) <xref ref-type="fig" rid="F6">Figure 6</xref>. This configuration was selected because it provided the highest classification performance for LGR under reflectance data. Spectral variables were previously standardized to ensure comparability across wavelengths, and selectivity was represented as the absolute value of the coefficient associated with each wavelength, assuming that in linear models each coefficient reflects the direct contribution of its corresponding feature to the prediction (<xref ref-type="bibr" rid="B7">Contreras and Bocklitz, 2025</xref>).</p>
<fig position="float" id="F6">
<label>Figure 6</label>
<caption><p>Spectral selectivity profile derived from a Logistic Regression (LGR) model trained on reflectance spectra preprocessed with SNV and first-derivative Savitzky&#x02013;Golay filtering (2-3-3). The curve represents the absolute value of the model coefficients for each wavelength, indicating the relative contribution of spectral regions to bitter/sweet discrimination in <italic>Lupinus</italic> seeds. Highlighted peaks suggest regions of delocalised molecular information linked to global compositional differences in the seed matrix (<xref ref-type="bibr" rid="B3">Be&#x00107; et al., 2025</xref>).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-09-1745720-g0006.tif">
<alt-text content-type="machine-generated">Line graph showing the absolute logistic regression coefficient values on the y-axis and wavelength in nanometers, ranging from 350 to 2500, on the x-axis. Peaks are present at several wavelengths, with higher coefficients observed around 500, 1250, 1750, and 2000 nanometers. The graph title references a spectral selectivity profile using logistic regression with reflectance SNV plus first derivative preprocessing.</alt-text>
</graphic>
</fig>
<p>The resulting selectivity profile exhibits informative contributions distributed across the entire VIS&#x02013;NIR range. Local maxima of importance were identified in the visible region (approximately 413&#x02013;414 nm), associated with electronic transitions (<xref ref-type="bibr" rid="B61">Yang and Mouazen, 2012</xref>), and in three short-wave near-infrared regions (approximately 1,735&#x02013;1,795, 2,087&#x02013;2,088, and 2,375&#x02013;2,447 nm), corresponding to overtone and combination vibrations of C&#x02013;H functional groups (<xref ref-type="bibr" rid="B59">Weyer and Lo, 2006</xref>). This pattern indicates that discrimination does not rely on isolated spectral bands, but rather emerges from delocalised contributions along the wavelength axis, which is consistent with the multivariate nature of NIR signals and the complexity of non-fundamental vibrational transitions (<xref ref-type="bibr" rid="B3">Be&#x00107; et al., 2025</xref>; <xref ref-type="bibr" rid="B59">Weyer and Lo, 2006</xref>).</p></sec></sec>
<sec>
<label>3.2</label>
<title>Reflectance results</title>
<p>This subsection presents results for VIS&#x02013;NIR reflectance. Performance was evaluated using F1-score and AUC&#x02013;ROC. <xref ref-type="fig" rid="F7">Figure 7</xref> shows one plot per resampling method; bar colors encode the transformation techniques. The <italic>x</italic>-axis lists the classifiers, and the <italic>y</italic>-axis shows F1-score, computed as the mean over five fold. For each classifier, the maximum F1-score and the associated transformation are reported on the right.</p>
<fig position="float" id="F7">
<label>Figure 7</label>
<caption><p>Performance of ML classifiers on VIS&#x02013;NIR reflectance assessed by F1-score. Each plot corresponds to a resampling technique; the <italic>x</italic>-axis lists the classifiers and the <italic>y</italic>-axis shows F1-score averaged over five fold Each transformation technique is represented with a bar and identified according to a color code, which is shown on the right. For each classifier, the best F1-score and the associated transformation are reported on the right.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-09-1745720-g0007.tif">
<alt-text content-type="machine-generated">Six grouped bar charts compare F1-scores for five machine learning algorithms (LGR, MLP, RF, SVC, DTR) across six resampling strategies: No resampling, SMOTE-ENN, SMOTE-Tomek, SVM-SMOTE, BL-SMOTE, and ADASYN. Each algorithm&#x00027;s performance is shown for five feature sets: RAW, SNV, SNV 1D_2-3-3, SNV 1D_2-7-7, and BSS SNV 1D_2-7-7, with legends and F1-score values for each method provided in the lower right of each plot.</alt-text>
</graphic>
</fig>
<p>Examining per-classifier maxima, the highest F1-scores were obtained predominantly with hybrid transformations, notably SNV&#x0002B;1D (2-3-3) and SNV&#x0002B;1D (2-7-7). RF and SVC performed consistently well across transformations: for RF, maxima ranged from 88.5% (SVM&#x02013;SMOTE) to 89.9% (no resampling); for SVC, from 89.1% (ADASYN and SMOTE&#x02013;Tomek) to 91.8% (BL&#x02013;SMOTE). LGR achieved the overall best result (92.5%) with SMOTE&#x02013;Tomek; across other resampling methods it ranged from 87.4% (no resampling) to 91.9% (SVM&#x02013;SMOTE). Under reflectance, LGR emerges as the most effective option for sweet vs. bitter discrimination with four of five resampling methods.</p>
<p>In general, MLP and DTR classifiers showed the worst performance. However, while MLP exhibited acceptable scores ranged from 78.3% (BL-SMOTE) to 86.2% (SMOTE-Tomek), DTR reached the poorest scores ranged from 72.2% (No resampling) to 84.9% (SMOTE-ENN). It is noteworthy that both showed remarkable improvements when hybrid data transformation techniques were applied, as illustrated the results in <xref ref-type="fig" rid="F7">Figure 7</xref>.</p>
<p>As previously mentioned, the AUC-ROC metric was also computed. As LGR was the classifier with the highest scores using spectral reflectance data and using SMOTE-Tomek and SVM-SMOTE as resampling methods, the AUC-ROC analysis is focused on this two methods as they demonstrated similar levels of performance for all classifiers and spectral transformation techniques. First, <xref ref-type="table" rid="T4">Table 4</xref> outlines AUC values and standard deviation. <xref ref-type="fig" rid="F8">Figure 8</xref> plots ROC curves for LGR with both resampling methods and SNV&#x0002B;1D (2-3-3) and BSS&#x0002B;SNV&#x0002B;1D (2-7-7) as data transformation technique. They were chosen as an example of its behavior to discriminate between bitter and sweet material <italic>Lupinus</italic>. ROC curves plot the True Positive rates (TPR) against the False Positive rates (FPR), which provide with a graphical representation of the performance of a binary classifier at different classification thresholds. The summary of the performance is represented with the AUC value.</p>
<table-wrap position="float" id="T4">
<label>Table 4</label>
<caption><p>AUC values (&#x000B1; standard deviation) of test for ML classifiers and data transformation techniques with SMOTE-Tomek and SVM-SMOTE as resampling methods on VIS&#x02013;NIR reflectance.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Transformation</bold></th>
<th valign="top" align="center" colspan="5"></th>
</tr>
<tr>
<th valign="top" align="left"><bold>technique</bold></th>
<th valign="top" align="center"><bold>LGR</bold></th>
<th valign="top" align="center"><bold>MLP</bold></th>
<th valign="top" align="center"><bold>RF</bold></th>
<th valign="top" align="center"><bold>SVC</bold></th>
<th valign="top" align="center"><bold>DTR</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left" colspan="6"><bold>AUC values - SMOTE-Tomek</bold></td>
</tr>
<tr>
<td valign="top" align="left">RAW</td>
<td valign="top" align="center">0.965 (&#x000B1;0.020)</td>
<td valign="top" align="center">0.798 (&#x000B1;0.136)</td>
<td valign="top" align="center">0.959 (&#x000B1;0.021)</td>
<td valign="top" align="center">0.966 (&#x000B1;0.017)</td>
<td valign="top" align="center">0.894 (&#x000B1;0.113)</td>
</tr>
<tr>
<td valign="top" align="left">SNV</td>
<td valign="top" align="center">0.950 (&#x000B1;0.047)</td>
<td valign="top" align="center">0.858 (&#x000B1;0.053)</td>
<td valign="top" align="center">0.968 (&#x000B1;0.018)</td>
<td valign="top" align="center"><bold>0.981</bold> (&#x000B1;0.011)</td>
<td valign="top" align="center">0.959 (&#x000B1;0.027)</td>
</tr>
<tr>
<td valign="top" align="left">SNV&#x0002B;1D (2-3-3)</td>
<td valign="top" align="center">0.978 (&#x000B1;0.018)</td>
<td valign="top" align="center">0.880 (&#x000B1;0.062)</td>
<td valign="top" align="center">0.975 (&#x000B1;0.018)</td>
<td valign="top" align="center">0.961 (&#x000B1;0.022)</td>
<td valign="top" align="center">0.965 (&#x000B1;0.020)</td>
</tr>
<tr>
<td valign="top" align="left">SNV&#x0002B;1D (2-7-7)</td>
<td valign="top" align="center">0.975 (&#x000B1;0.003)</td>
<td valign="top" align="center"><bold>0.894</bold> (&#x000B1;0.023)</td>
<td valign="top" align="center"><bold>0.976</bold> (&#x000B1;0.003)</td>
<td valign="top" align="center">0.970 (&#x000B1;0.003)</td>
<td valign="top" align="center">0.964 (&#x000B1;0.005)</td>
</tr>
<tr>
<td valign="top" align="left">BSS&#x0002B;SNV&#x0002B;1D (2-7-7)</td>
<td valign="top" align="center"><bold>0.981</bold> (&#x000B1;0.024)</td>
<td valign="top" align="center"><bold>0.894</bold> (&#x000B1;0.053)</td>
<td valign="top" align="center"><bold>0.976</bold> (&#x000B1;0.018)</td>
<td valign="top" align="center">0.970 (&#x000B1;0.028)</td>
<td valign="top" align="center"><bold>0.966</bold> (&#x000B1;0.020)</td>
</tr>
<tr>
<td valign="top" align="left" colspan="6"><bold>AUC values - SVM-SMOTE</bold></td>
</tr>
<tr>
<td valign="top" align="left">RAW</td>
<td valign="top" align="center">0.963 (&#x000B1;0.023)</td>
<td valign="top" align="center">0.852 (&#x000B1;0.097)</td>
<td valign="top" align="center">0.961 (&#x000B1;0.018)</td>
<td valign="top" align="center">0.961 (&#x000B1;0.020)</td>
<td valign="top" align="center">0.939 (&#x000B1;0.045)</td>
</tr>
<tr>
<td valign="top" align="left">SNV</td>
<td valign="top" align="center">0.941 (&#x000B1;0.045)</td>
<td valign="top" align="center">0.868 (&#x000B1;0.088)</td>
<td valign="top" align="center">0.971 (&#x000B1;0.020)</td>
<td valign="top" align="center"><bold>0.978</bold> (&#x000B1;0.014)</td>
<td valign="top" align="center">0.948 (&#x000B1;0.034)</td>
</tr>
<tr>
<td valign="top" align="left">SNV&#x0002B;1D (2-3-3)</td>
<td valign="top" align="center">0.972 (&#x000B1;0.026)</td>
<td valign="top" align="center">0.877 (&#x000B1;0.049)</td>
<td valign="top" align="center">0.975 (&#x000B1;0.018)</td>
<td valign="top" align="center">0.976 (&#x000B1;0.017)</td>
<td valign="top" align="center"><bold>0.969</bold> (&#x000B1;0.020)</td>
</tr>
<tr>
<td valign="top" align="left">SNV&#x0002B;1D (2-7-7)</td>
<td valign="top" align="center"><bold>0.981</bold> (&#x000B1;0.016)</td>
<td valign="top" align="center"><bold>0.891</bold> (&#x000B1;0.041)</td>
<td valign="top" align="center"><bold>0.977</bold> (&#x000B1;0.018)</td>
<td valign="top" align="center">0.974 (&#x000B1;0.024)</td>
<td valign="top" align="center">0.965 (&#x000B1;0.017)</td>
</tr>
<tr>
<td valign="top" align="left">BSS&#x0002B;SNV&#x0002B;1D (2-7-7)</td>
<td valign="top" align="center">0.938 (&#x000B1;0.011)</td>
<td valign="top" align="center"><bold>0.891</bold> (&#x000B1;0.041)</td>
<td valign="top" align="center"><bold>0.977</bold> (&#x000B1;0.018)</td>
<td valign="top" align="center">0.974 (&#x000B1;0.024)</td>
<td valign="top" align="center">0.968 (&#x000B1;0.017)</td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>Both metrics provide similar performance results and behavior. The best values were highlighted in bold.</p>
</table-wrap-foot>
</table-wrap>
<fig position="float" id="F8">
<label>Figure 8</label>
<caption><p>ROC curves for LGR with spectral reflectance data for SMOTE-Tomek <bold>(A)</bold> and SVM-SMOTE <bold>(B)</bold> as resampling methods and SNV&#x0002B;1D (2-3-3) and BSS&#x0002B;SNV&#x0002B;1D (2-7-7) as data transformation techniques, respectively. Both were chosen for exhibiting the best performance scores.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-09-1745720-g0008.tif">
<alt-text content-type="machine-generated">Two side-by-side receiver operating characteristic (ROC) curve plots compare classification performance for SMOTE-Tomek and SVM-SMOTE methods, each showing a solid blue curve with area under the curve (AUC) equal to zero point nine eight, and a dashed diagonal random classifier reference line. Both curves have axes labeled true positive rate versus false positive rate.</alt-text>
</graphic>
</fig></sec>
<sec>
<label>3.3</label>
<title>Absorbance results</title>
<p>This section analyses the experimental results obtained with VIS&#x02013;NIR absorbance data. <xref ref-type="fig" rid="F9">Figure 9</xref> shows a summary focused on the F1-score. This analysis includes an independent graph for each resampling method, where the evolution of the performance of the different classifiers and data transformation technique is outlined. All graphs provide on the right a summary with the best F1-score value for each classifier and transformation strategy.</p>
<fig position="float" id="F9">
<label>Figure 9</label>
<caption><p>Performance assessment for each ML classifier with spectral absorbance data using the F1-Score metric. Every plot shows the results for a resampling methods. <italic>X</italic>-axis represents the ML classifiers and <italic>Y</italic>-axis is the F1-Score averaged over five fold. Each transformation technique is represented with a bar and identified according to a color code, which is shown on the right. The best score for each classifier and transformation strategy is depicted on the right.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-09-1745720-g0009.tif">
<alt-text content-type="machine-generated">Six grouped bar charts comparing F1-scores for five machine learning algorithms (LGR, MLP, RF, SVC, DTR) across six resampling methods: No resampling, SMOTE-ENN, SMOTE-Tomek, SVM-SMOTE, BL-SMOTE, and ADASYN. Each bar represents different data preprocessing techniques (RAW, SNV, SNV 1D 2-3-3, SNV 1D 2-7-7, BSS SNV 1D 2-7-7). A legend and text box in each plot summarize corresponding scores and preprocessing combinations.</alt-text>
</graphic>
</fig>
<p>It is noteworthy that absorbance results confirmed the pattern observed with reflectance. In general, hybrid transformations yielded performance gains. The only exception was DTR, which deviated from this trend on the raw data.</p>
<p>Moreover, all experimental outcomes, when contrasted with the reflectance results, confirmed that RF and SVC exhibited the best performance and LGR presented a good performance, but it never was too close to RF and SVC. In this regard, RF exhibited its lowest performance with original data 89.3% and ranged from 91.6% (BL-SMOTE) to 92.5% (ADASYN), which represent excellent scores in the research objective. SVC performance ranged from 90.6% (BL-SMOTE) and 93.2% (SVM-SMOTE). Therefore, SVC was the classifier with the highest F1-score. Both algorithms confirmed that they are the most appropriate classifiers for VIS&#x02013;NIR absorbance data, demonstrating consistent and relevant results with both original and transformed data, and throughout all resampling methods.</p>
<p>With respect to LGR, identified as the best classifier under reflectance, performance was solid across all experiments, with F1-score between 86.7% (BL&#x02013;SMOTE) and 90.4% (SMOTE&#x02013;ENN). Although these values were slightly lower than those of RF and SVC, LGR remains competitive on VIS&#x02013;NIR absorbance data.</p>
<p>In accordance with reflectance, MLP and DTR classifiers showed the worst performance with absorbance. However, while MLP performed reasonably well with scores ranging from 82.0% (ADASYN) to 87.4% (SVM-SMOTE), which were higher than the scores obtained with spectral reflectance, DTR reached the poorest scores ranging from 78.2% (ADASYN and BL-SMOTE) to 81.4% (SMOTE-Tomek). Once more, both exhibited enhancements when hybrid data transformation techniques were implemented, as outlined in <xref ref-type="fig" rid="F9">Figure 9</xref>. Nevertheless, only DTR with no resampling did not follow this trend, which is an interesting case to be further studied.</p>
<p>These results were also analyzed using the ROC-AUC metric to depict the discrimination ability of a binary classifier between bitter and sweet <italic>Lupinus</italic> materials/ecotypes. After comparing the results, SVM-SMOTE and ADASYN were selected as the resampling methods with the highest performance for RF and SVC. Therefore, <xref ref-type="table" rid="T5">Table 5</xref> summarizes the AUC values and the standard deviation for all classifiers and data transformation techniques for resampling methods chosen. Additionally, <xref ref-type="fig" rid="F10">Figure 10</xref> plots ROC curves for RF and SVC using ADASYN and SVM-SMOTE, respectively. Both were chosen because RF and SVC reached their highest performances with the application of these resampling methods.</p>
<table-wrap position="float" id="T5">
<label>Table 5</label>
<caption><p>AUC values (&#x000B1; standard deviation) for ML classifiers and data transformation techniques with ADASYN and SVM-SMOTE as resampling methods using spectral absorbance data.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Transformation</bold></th>
<th valign="top" align="center" colspan="5"></th>
</tr>
<tr>
<th valign="top" align="left"><bold>technique</bold></th>
<th valign="top" align="center"><bold>LGR</bold></th>
<th valign="top" align="center"><bold>MLP</bold></th>
<th valign="top" align="center"><bold>RF</bold></th>
<th valign="top" align="center"><bold>SVC</bold></th>
<th valign="top" align="center"><bold>DTR</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left" colspan="6"><bold>AUC values - ADASYN</bold></td>
</tr>
<tr>
<td valign="top" align="left">RAW</td>
<td valign="top" align="center">0.966 (&#x000B1;0.026)</td>
<td valign="top" align="center">0.820 (&#x000B1;0.094)</td>
<td valign="top" align="center"><bold>0.977</bold> (&#x000B1;0.044)</td>
<td valign="top" align="center">0.976 (&#x000B1;0.027)</td>
<td valign="top" align="center">0.955 (&#x000B1;0.031)</td>
</tr>
<tr>
<td valign="top" align="left">SNV</td>
<td valign="top" align="center">0.944 (&#x000B1;0.056)</td>
<td valign="top" align="center">0.882 (&#x000B1;0.075)</td>
<td valign="top" align="center"><bold>0.977</bold> (&#x000B1;0.045)</td>
<td valign="top" align="center"><bold>0.982</bold> (&#x000B1;0.020)</td>
<td valign="top" align="center">0.958 (&#x000B1;0.030)</td>
</tr>
<tr>
<td valign="top" align="left">SNV&#x0002B;1D (2-3-3)</td>
<td valign="top" align="center">0.967 (&#x000B1;0.045)</td>
<td valign="top" align="center">0.801 (&#x000B1;0.087)</td>
<td valign="top" align="center">0.966 (&#x000B1;0.041)</td>
<td valign="top" align="center">0.976 (&#x000B1;0.046)</td>
<td valign="top" align="center">0.962 (&#x000B1;0.027)</td>
</tr>
<tr>
<td valign="top" align="left">SNV&#x0002B;1D (2-7-7)</td>
<td valign="top" align="center">0.969 (&#x000B1;0.043)</td>
<td valign="top" align="center"><bold>0.890</bold> (&#x000B1;0.023)</td>
<td valign="top" align="center"><bold>0.977</bold> (&#x000B1;0.041)</td>
<td valign="top" align="center">0.980 (&#x000B1;0.040)</td>
<td valign="top" align="center">0.968 (&#x000B1;0.029)</td>
</tr>
<tr>
<td valign="top" align="left">BSS&#x0002B;SNV&#x0002B;1D (2-7-7)</td>
<td valign="top" align="center"><bold>0.972</bold> (&#x000B1;0.032)</td>
<td valign="top" align="center">0.862 (&#x000B1;0.051)</td>
<td valign="top" align="center"><bold>0.977</bold> (&#x000B1;0.047)</td>
<td valign="top" align="center">0.972 (&#x000B1;0.051)</td>
<td valign="top" align="center"><bold>0.970</bold> (&#x000B1;0.034)</td>
</tr>
<tr>
<td valign="top" align="left" colspan="6"><bold>AUC values - BL-SMOTE</bold></td>
</tr>
<tr>
<td valign="top" align="left">RAW</td>
<td valign="top" align="center">0.965 (&#x000B1;0.026)</td>
<td valign="top" align="center">0.820 (&#x000B1;0.089)</td>
<td valign="top" align="center"><bold>0.978</bold> (&#x000B1;0.044)</td>
<td valign="top" align="center">0.977 (&#x000B1;0.031)</td>
<td valign="top" align="center">0.957 (&#x000B1;0.031)</td>
</tr>
<tr>
<td valign="top" align="left">SNV</td>
<td valign="top" align="center">0.937 (&#x000B1;0.091)</td>
<td valign="top" align="center">0.817 (&#x000B1;0.055)</td>
<td valign="top" align="center">0.976 (&#x000B1;0.047)</td>
<td valign="top" align="center"><bold>0.982</bold> (&#x000B1;0.021)</td>
<td valign="top" align="center">0.958 (&#x000B1;0.035)</td>
</tr>
<tr>
<td valign="top" align="left">SNV&#x0002B;1D (2-3-3)</td>
<td valign="top" align="center">0.968 (&#x000B1;0.052)</td>
<td valign="top" align="center">0.848 (&#x000B1;0.062)</td>
<td valign="top" align="center">0.970 (&#x000B1;0.041)</td>
<td valign="top" align="center">0.978 (&#x000B1;0.042)</td>
<td valign="top" align="center"><bold>0.972</bold> (&#x000B1;0.031)</td>
</tr>
<tr>
<td valign="top" align="left">SNV&#x0002B;1D (2-7-7)</td>
<td valign="top" align="center">0.973 (&#x000B1;0.043)</td>
<td valign="top" align="center"><bold>0.908</bold> (&#x000B1;0.048)</td>
<td valign="top" align="center">0.976 (&#x000B1;0.045)</td>
<td valign="top" align="center">0.979 (&#x000B1;0.041)</td>
<td valign="top" align="center">0.971 (&#x000B1;0.031)</td>
</tr>
<tr>
<td valign="top" align="left">BSS&#x0002B;SNV&#x0002B;1D (2-7-7)</td>
<td valign="top" align="center"><bold>0.981</bold> (&#x000B1;0.029)</td>
<td valign="top" align="center">0.841 (&#x000B1;0.088)</td>
<td valign="top" align="center"><bold>0.978</bold> (&#x000B1;0.045)</td>
<td valign="top" align="center">0.979 (&#x000B1;0.042)</td>
<td valign="top" align="center"><bold>0.972</bold> (&#x000B1;0.029)</td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>These were chosen for providing similar performance results and behavior. The best values were highlighted in bold.</p>
</table-wrap-foot>
</table-wrap>
<fig position="float" id="F10">
<label>Figure 10</label>
<caption><p>ROC curves for RF with ADASYN <bold>(A)</bold>, and SVC with SVM-SMOTE <bold>(B)</bold> and spectral absorbance data. SNV and SNV&#x0002B;1D (2-3-3) as data transformation techniques for RF and SVC, respectively. Both were chosen for exhibited the best performance scores.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-09-1745720-g0010.tif">
<alt-text content-type="machine-generated">Two side-by-side ROC curve charts compare SMOTE-Tomek and SVM-SMOTE with area under the curve values of zero point nine eight, showing strong model performance. X-axes represent false positive rates, Y-axes represent true positive rates.</alt-text>
</graphic>
</fig>
<p>The complementary spectral differentiation and selectivity analyses provide interpretative support for the classification results obtained. The presence of consistent spectral differences between bitter and sweet seeds across the VIS&#x02013;NIR range indicates that discrimination is not driven solely by algorithmic decisions, but is grounded on intrinsic spectral signals of the plant material. Moreover, the selectivity analysis based on an interpretable model shows that the information exploited by the classifier is distributed across multiple spectral regions, consistent with the combined contribution of overtone and combination bands associated with O&#x02013;H, N&#x02013;H and C&#x02013;H bonds. In near-infrared spectroscopy, these spectral features are commonly related to global compositional variations in complex biological matrices rather than to single compounds (<xref ref-type="bibr" rid="B35">Manley, 2014</xref>; <xref ref-type="bibr" rid="B45">Roggo et al., 2007</xref>; <xref ref-type="bibr" rid="B59">Weyer and Lo, 2006</xref>). In this context, spectral selectivity adds an interpretative layer to the predictive results and reinforces the physico-chemical coherence of the proposed approach, without implying a one-to-one assignment between individual spectral regions and specific chemical constituents.</p></sec>
<sec>
<label>3.4</label>
<title>Statistical tests</title>
<p>Once the results of the studies carried out in this work have been presented, a statistical analysis must be performed to evaluate the quality and robustness of the presented techniques.</p>
<p>Given the combined use of spectral transformations and resampling, normality and homoscedasticity were assessed per fold using Shapiro&#x02013;Wilk and Levene tests, respectively. For Shapiro&#x02013;Wilk, <italic>H</italic><sub>0</sub> assumes normality and <italic>H</italic><sub>1</sub> non-normality; for Levene, <italic>H</italic><sub>0</sub> assumes equal variances and <italic>H</italic><sub>1</sub> that at least one variance differs. Based on these diagnostics, the global test was selected: ANOVA (parametric) or Kruskal&#x02013;Wallis (non-parametric) to test <italic>H</italic><sub>0</sub> (all populations equal) vs. <italic>H</italic><sub>1</sub> (at least one different).</p>
<p>This more in-depth study will enable us to determine whether, given the results obtained in the tests for the F1-Score and AUC metrics, we can affirm that there are significant differences between the ML methods. Finally, we can conclude by performing a robustness test, which will allow us to determine the most robust model that has been optimized in the work presented.</p>
<p>The robustness test will facilitate the identification and quantification of the optimal solution to the problem addressed.</p>
<p>Statistical tables with the detailed results on VIS&#x02013;NIR spectral absorbance and reflectance data using F1-score are reported in the <xref ref-type="supplementary-material" rid="SM1">Supplementary material</xref> for clarity. Analyses with AUC yielded similar results. The statistical study was conducted for each of the leading algorithms. Overalll, it confirms that, although minor performance differences can be observed among classifiers depending on the spectral transformation and resampling strategy, these differences are generally small (data presented in <xref ref-type="supplementary-material" rid="SM1">Supplementary Tables S1</xref>, <xref ref-type="supplementary-material" rid="SM1">S4</xref>). In both reflectance and absorbance datasets, SVC, RF and LGR consistently form the most robust group of classifiers, showing stable performance across preprocessing configurations and resampling methods. This result supports the use of these algorithms as reliable alternatives for sweet/bitter discrimination in <italic>Lupinus</italic> seeds.</p></sec>
<sec>
<label>3.5</label>
<title>Wavelength selection during sweet/bitter discrimination for a specific study case</title>
<p>This section presents a succinct analysis of the most relevant spectral bands that were employed by RandomForest to discriminate between sweet and bitter <italic>Lupinus</italic>. BSS-SNV-1D (1-7-7) and BL-SMOTE were selected as the transformation technique and the resampling method, respectively, for reflectance and absorption spectra. Given the number of spectral variables (wavelengths), graphs in <xref ref-type="fig" rid="F11">Figure 11</xref> show the 20 most relevant wavelengths during the test phase. This provides a clear indication of which part of the spectrum was being used.</p>
<fig position="float" id="F11">
<label>Figure 11</label>
<caption><p>Feature importance during the test phase for reflectance <bold>(a)</bold> and absorbance <bold>(b)</bold> spectra. The most relevant 20 wavelengths were represented. The study case represents RandomForest as the ML algorithm, BSS-SNV-1D (1-7-7) and BL-SMOTE as the transformation technique and resampling method, respectively. The <italic>Y</italic>-axis indicates wavelength and the <italic>X</italic>-axis shows the SHAP value. Red color identifies bitterness and blue color means sweetness.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frai-09-1745720-g0011.tif">
<alt-text content-type="machine-generated">Paired summary plots displaying SHAP values for reflectance (left panel) and absorbance (right panel) features, each showing individual feature contributions to a model predicting bitterness or sweetness, with blue and pink dots representing low to high feature values, respectively, along a SHAP value x-axis.</alt-text>
</graphic>
</fig>
<p>The SHapley Additive exPlanations (SHAP) value was used. SHAP explains predictions by measuring the contribution of each feature to deviations from the average model output, grounding in cooperative game theory.</p>
<p>UV (00&#x02013;400 nm), VIS (400&#x02013;700 nm), NIR (700&#x02013;1,300 nm) and SWIR (1,300&#x02013;2,500 nm) spectral regions capture information related to chemical composition and structure.</p>
<p>According to these results, the 20 most relevant wavelengths are distributed across the VIS, NIR, and SWIR regions. However, for reflectance data in this interval, there is a lower representation of the VIS and SWIR regions, and the relevant NIR wavelengths are concentrated near the lower limit of the NIR range. In contrast, for absorbance data, the most relevant NIR wavelengths are located closer to the upper limit of the NIR range. This selection used for RF to discriminate sweet/bitter seeds is in the line with the findings presented in Section 3.1.1.</p></sec></sec>
<sec sec-type="discussion" id="s4">
<label>4</label>
<title>Discussion</title>
<p>This study addressed the binary classification of <italic>Lupinus</italic> materials/ecotypes as sweet or bitter using VIS&#x02013;NIR reflectance and absorbance spectra from whole seeds. The experimental design aimed to assess discriminative ability and to provide a non-destructive, supervised-learning approach. Five ML algorithms were compared on raw spectra and after four spectral transformations, together with five resampling methods to address class imbalance.</p>
<p>An initial exploratory spectral comparison stated that despite their relatively low abundance, recent work has shown that low-alkaloid material can be successfully discriminated using NIR spectroscopy even on intact seeds, leveraging matrix-level compositional differences captured by the spectral signature (<xref ref-type="bibr" rid="B1">Barzaghi et al., 2025</xref>). Moreover, the patterns of spectral differentiation, expressed as broad multivariate contrasts rather than isolated peaks, aligns with current recommendations for interpretation in applied spectroscopy and spectral machine learning, which emphasize linking predictive performance to physically plausible spectral evidence (<xref ref-type="bibr" rid="B3">Be&#x00107; et al., 2025</xref>; <xref ref-type="bibr" rid="B53">Sem, 2021</xref>). Taken together, these findings support VIS&#x02013;NIR spectroscopy as a non-destructive and interpretable tool for rapid germplasm screening (<xref ref-type="bibr" rid="B1">Barzaghi et al., 2025</xref>).</p>
<p>Regarding the selectivity analysis, it is important to note it was performed using original samples without synthetic resampling. Although class-balancing techniques such as SMOTE can improve statistical classification performance, in spectroscopy they generate artificial instances that do not represent real biological states of the seed. This may disrupt the physical covariance structure between delocalised variables, thereby distorting biochemical interpretation and model transparency (<xref ref-type="bibr" rid="B3">Be&#x00107; et al., 2025</xref>; <xref ref-type="bibr" rid="B7">Contreras and Bocklitz, 2025</xref>).</p>
<p>In reference to the data transformation techniques, the combination of various transformations on the data available had a direct impact on the classifiers&#x00027; performance, which have been identified as a general trend across the experimental results for all resampling methods and ML algorithms. With respect to the resampling methods evaluated, the results were quite consistent for all SMOTE-based techniques, ADASYN, even though with the raw data. Although some differences existed when the results of the classifiers were compared, variations in performance for the best classifiers were really narrowed.</p>
<p>Regarding the strategy used to identify optimal hyperparameter values and the risk of overfitting, the performance metrics on the training and test sets for the majority of the experiments showed differences generally between 1 and 5%, suggesting that most models generalize well and did not exhibit overfitting. Slightly larger differences were observed in test sets with few instances of the minority class, likely due to the higher variability inherent in small sample sizes. Nevertheless, further analysis may be required to more thoroughly assess model robustness and potential overfitting effects, particularly in imbalanced or low-sample settings.</p>
<p>In this regard, other alternatives for hyperparameter selection can be addressed using approaches such as randomized search or Bayesian optimization, which enable the identification of promising parameter combinations without exhaustively exploring the entire search space and reduce the risk of overfitting.</p>
<p>Focusing on classifier performance, three methods stood out with VIS&#x02013;NIR reflectance and absorbance data: LGR, RF, and SVC. Under reflectance, LGR and SVC were prominent; under absorbance, RF and SVC were. In particular, LGR achieved the best result (92.5%) with SNV&#x0002B;1D (2-3-3) as the transformation and SMOTE&#x02013;Tomek as the resampling method, on reflectance data. SVC ranked second, especially with raw data (92.0%) and with BL&#x02013;SMOTE (91.8%) as the resampling method, in both cases using SNV&#x0002B;1D (2-7-7). RF exceeded 88.5% across all settings, constituting a robust alternative for the study objective.</p>
<p>Regarding VIS&#x02013;NIR absorbance results, SVC exhibited the best performance across all experiments (90.6%&#x02013;93.2%), consistently when hybrid transformation techniques were applied, demonstrating its potential to discriminate between sweet and bitter <italic>Lupinus</italic> material. RF ranked second, ranging from 89.3% on raw data without resampling to 92.5% with SNV and ADASYN as the transformation and resampling method, respectively. Although LGR did not achieve the top performance on absorbance, it achieved a good score (90.4%) with SNV&#x0002B;1D (2-3-3) and SMOTE&#x02013;ENN.</p>
<p>It should be acknowledged that the high dimensionality of VIS&#x02013;NIR spectra relative to sample size introduces an inherent risk of overfitting and may affect the stability of wavelength-level importance estimates. In this study, this risk was mitigated through stratified cross-validation, bounded hyperparameter search spaces with explicit complexity constraints, and the use of an interpretable linear model (logistic regression) for spectral selectivity. Moreover, spectral differentiation analyses were performed on raw data without model fitting, providing an independent, descriptive verification of structured class contrasts. Nevertheless, variable-importance patterns should be interpreted as indicative regions rather than unique causal wavelengths, and future work with larger and more balanced datasets, external validation, and/or formal wavelength selection will further strengthen the robustness of these interpretations.</p></sec>
<sec id="s5">
<label>5</label>
<title>Conclusions</title>
<p>According to the results with the data available in this study, we can confirm the potential of machine learning models as a non-destructive methodology to discriminate <italic>Lupinus</italic> sweet and bitter materials/ecotypes from VIS&#x02013;NIR reflectance and absorbance spectra. LGR, RF, and SVC were identified as robust ML algorithms for both spectra data, with a classification performance higher than 90.4, 88.5, and 90.6%, respectively. Furthermore, hybrid classification techniques and well-known resampling methods have demonstrated their ability to improve this classification ability.</p>
<p>In addition, the spectral differentiation and selectivity analyses performed confirm that the classification between bitter and sweet <italic>Lupinus</italic> seeds relies on reproducible spectral patterns across the VIS&#x02013;NIR range, further supporting the robustness and physical plausibility of the proposed non-destructive approach.</p>
<p>A partial analysis of the feature importance shows that although Random Forest feature importance does not provide a direct chemical interpretation, the results were consistent with those obtained from the spectral difference and selectivity analyses. This indicates that the classification between bitter and sweet <italic>Lupinus</italic> seeds relies on information distributed across VIS&#x02013;NIR range, further supporting the robustnes of the proposed non-destructive approach.</p>
<p>Nevertheless, although the results obtained are significant and open up a non-destructive method, they must be interpreted cautiously until further analyses with a more balanced dataset are conducted.</p>
<p>Finally, future research should focus on two main aspects. The first one will be to achieve a more balanced dataset by increasing the number and diversity of samples in the minority class, in order to further improve model robustness and generalization. The second research direction will be to move beyond the preliminary identification of informative spectral regions toward formal wavelength selection and/or dimensionality reduction strategies, with the aim of simplifying the models and facilitating their implementation in portable devices or online/at-line classification systems. In this context, the identification of spectrally relevant wavelength regions may also provide guidance for future developments in problem-specific spectral sensing, although such applied perspectives remain outside the scope of the present study.</p></sec>
</body>
<back>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation.</p>
</sec>
<sec sec-type="author-contributions" id="s7">
<title>Author contributions</title>
<p>JD-&#x000C1;: Conceptualization, Funding acquisition, Investigation, Methodology, Project administration, Software, Validation, Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing. FG-G: Conceptualization, Data curation, Methodology, Resources, Validation, Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing. FC: Conceptualization, Funding acquisition, Investigation, Methodology, Project administration, Software, Validation, Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing. PS-L: Investigation, Software, Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing. FL: Data curation, Resources, Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing.</p>
</sec>
<ack><title>Acknowledgments</title><p>The authors also thank the FEDER project BIOLEG (FEDER EXT-BIOLEG) for its support and collaboration, and Mar&#x000ED;a Jos&#x000E9; Lozano L&#x000F3;pez, curator of the official grain legume collection at the CICYTEX Germplasm Bank, for her invaluable assistance during this study.</p>
</ack>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="s9">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p></sec>
<sec sec-type="disclaimer" id="s10">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec sec-type="supplementary-material" id="s11">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/frai.2026.1745720/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/frai.2026.1745720/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Data_Sheet_1.pdf" id="SM1" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/></sec>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Barzaghi</surname> <given-names>S.</given-names></name> <name><surname>Ferrari</surname> <given-names>B.</given-names></name> <name><surname>Biazzi</surname> <given-names>E.</given-names></name> <name><surname>Tava</surname> <given-names>A.</given-names></name> <name><surname>Annicchiarico</surname> <given-names>P.</given-names></name></person-group> (<year>2025</year>). <article-title>Quinolizidine alkaloid composition of white lupin landraces and breeding lines, and near-infrared spectroscopy-based discrimination of low-alkaloid material</article-title>. <source>Plants</source> <volume>14</volume>:<fpage>3327</fpage>. doi: <pub-id pub-id-type="doi">10.3390/plants14213327</pub-id><pub-id pub-id-type="pmid">41225876</pub-id></mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Batista</surname> <given-names>G. E.</given-names></name> <name><surname>Prati</surname> <given-names>R. C.</given-names></name> <name><surname>Monard</surname> <given-names>M. C.</given-names></name></person-group> (<year>2004</year>). <article-title>A study of the behavior of several methods for balancing machine learning training data</article-title>. <source>ACM SIGKDD Explor. Newsl</source>. <volume>6</volume>, <fpage>20</fpage>&#x02013;<lpage>29</lpage>. doi: <pub-id pub-id-type="doi">10.1145/1007730.1007735</pub-id></mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Be&#x00107;</surname> <given-names>K. B.</given-names></name> <name><surname>Grabska</surname> <given-names>J.</given-names></name> <name><surname>Huck</surname> <given-names>C. W.</given-names></name></person-group> (<year>2025</year>). <article-title>Interpretability in near-infrared (NIR) spectroscopy: current pathways to the long-standing challenge</article-title>. <source>Trends Anal. Chem</source>. <volume>189</volume>:<fpage>118254</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.trac.2025.118254</pub-id></mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bian</surname> <given-names>X.</given-names></name></person-group> (<year>2022</year>). <source>Spectral Preprocessing Methods</source>. Singapore: Springer Nature <volume>Singapore</volume>, <fpage>111</fpage>&#x02013;<lpage>168</lpage>. doi: <pub-id pub-id-type="doi">10.1007/978-981-19-1625-0_4</pub-id></mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Boukid</surname> <given-names>F.</given-names></name> <name><surname>Pasqualone</surname> <given-names>A.</given-names></name></person-group> (<year>2022</year>). <article-title>Lupine (<italic>Lupinus</italic> spp.) proteins: characteristics, safety and food applications</article-title>. <source>Eur. Food Res. Technol</source>. <volume>248</volume>, <fpage>345</fpage>&#x02013;<lpage>356</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s00217-021-03909-5</pub-id></mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Co&#x000EF;sson</surname> <given-names>J. D.</given-names></name> <name><surname>Arlorio</surname> <given-names>M.</given-names></name> <name><surname>Locatelli</surname> <given-names>M.</given-names></name> <name><surname>Garino</surname> <given-names>C.</given-names></name> <name><surname>Resta</surname> <given-names>D.</given-names></name> <name><surname>Sirtori</surname> <given-names>E.</given-names></name> <etal/></person-group>. (<year>2011</year>). <article-title>The artificial intelligence-based chemometrical characterisation of genotype/chemotype of <italic>Lupinus albus</italic> and <italic>Lupinus angustifolius</italic> permits their identification and potentially their traceability</article-title>. <source>Food Chem</source>. <volume>129</volume>, <fpage>1806</fpage>&#x02013;<lpage>1812</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.foodchem.2011.05.107</pub-id></mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Contreras</surname> <given-names>J.</given-names></name> <name><surname>Bocklitz</surname> <given-names>T.</given-names></name></person-group> (<year>2025</year>). <article-title>Explainable artificial intelligence for spectroscopy data: a review</article-title>. <source>Pfl&#x000FC;gers Arch</source>. <volume>477</volume>, <fpage>603</fpage>&#x02013;<lpage>615</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s00424-024-02997-y</pub-id><pub-id pub-id-type="pmid">39088045</pub-id></mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Danilevicz</surname> <given-names>M. F.</given-names></name> <name><surname>Rocha</surname> <given-names>R. L.</given-names></name> <name><surname>Batley</surname> <given-names>J.</given-names></name> <name><surname>Bayer</surname> <given-names>P. E.</given-names></name> <name><surname>Bennamoun</surname> <given-names>M.</given-names></name> <name><surname>Edwards</surname> <given-names>D.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Segmentation of sandplain lupin weeds from morphologically similar narrow-leafed lupins in the field</article-title>. <source>Remote Sens</source>. <volume>15</volume>:<fpage>1817</fpage>. doi: <pub-id pub-id-type="doi">10.3390/rs15071817</pub-id></mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="journal"><collab>EFSA Panel on Contaminants in the Food Chain (CONTAM) Schrenk, D. Bodin L. Chipman J. K. del Mazo J. Grasl-Kraupp B. </collab>. (<year>2019</year>). <article-title>Scientific opinion on the risks for animal and human health related to the presence of quinolizidine alkaloids in feed and food, in particular in lupins and lupin-derived products</article-title>. <source>EFSA J</source>. <volume>17</volume>:<fpage>e05860</fpage>. doi: <pub-id pub-id-type="doi">10.2903/j.efsa.2019.5860</pub-id></mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Elmasry</surname> <given-names>G.</given-names></name> <name><surname>Kamruzzaman</surname> <given-names>M.</given-names></name> <name><surname>Sun</surname> <given-names>D.-W.</given-names></name> <name><surname>Allen</surname> <given-names>P.</given-names></name></person-group> (<year>2012</year>). <article-title>Principles and applications of hyperspectral imaging in quality evaluation of agro-food products: a review</article-title>. <source>Crit. Rev. Food Sci. Nutr</source>. <volume>52</volume>, <fpage>999</fpage>&#x02013;<lpage>1023</lpage>. doi: <pub-id pub-id-type="doi">10.1080/10408398.2010.543495</pub-id><pub-id pub-id-type="pmid">22823348</pub-id></mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Elreedy</surname> <given-names>D.</given-names></name> <name><surname>Atiya</surname> <given-names>A. F.</given-names></name> <name><surname>Kamalov</surname> <given-names>F.</given-names></name></person-group> (<year>2024</year>). <article-title>A theoretical distribution analysis of synthetic minority oversampling technique (SMOTE) for imbalanced learning</article-title>. <source>Mach. Learn</source>. <volume>113</volume>, <fpage>4903</fpage>&#x02013;<lpage>4923</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s10994-022-06296-4</pub-id></mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Engel</surname> <given-names>A. M.</given-names></name> <name><surname>Klevenhusen</surname> <given-names>F.</given-names></name> <name><surname>Moenning</surname> <given-names>J.-L.</given-names></name> <name><surname>Numata</surname> <given-names>J.</given-names></name> <name><surname>Fischer-Tenhagen</surname> <given-names>C.</given-names></name> <name><surname>Sachse</surname> <given-names>B.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Investigations on the transfer of quinolizidine alkaloids from <italic>Lupinus angustifolius</italic> into the milk of dairy cows</article-title>. <source>J. Agric. Food Chem</source>. <volume>70</volume>, <fpage>11749</fpage>&#x02013;<lpage>11758</lpage>. doi: <pub-id pub-id-type="doi">10.1021/acs.jafc.2c02517</pub-id><pub-id pub-id-type="pmid">36068017</pub-id></mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Estivi</surname> <given-names>L.</given-names></name> <name><surname>Brandolini</surname> <given-names>A.</given-names></name> <name><surname>Gasparini</surname> <given-names>A.</given-names></name> <name><surname>Hidalgo</surname> <given-names>A.</given-names></name></person-group> (<year>2023</year>). <article-title>Lupin as a source of bioactive antioxidant compounds for food products</article-title>. <source>Molecules</source> <volume>28</volume>:<fpage>7529</fpage>. doi: <pub-id pub-id-type="doi">10.3390/molecules28227529</pub-id><pub-id pub-id-type="pmid">38005249</pub-id></mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Eugelio</surname> <given-names>F.</given-names></name> <name><surname>Palmieri</surname> <given-names>S.</given-names></name> <name><surname>Fanti</surname> <given-names>F.</given-names></name> <name><surname>Messuri</surname> <given-names>L.</given-names></name> <name><surname>Pepe</surname> <given-names>A.</given-names></name> <name><surname>Compagnone</surname> <given-names>D.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Development of an HPLC-MS/MS method for the determination of alkaloids in lupins</article-title>. <source>Molecules</source> <volume>28</volume>:<fpage>1531</fpage>. doi: <pub-id pub-id-type="doi">10.3390/molecules28041531</pub-id><pub-id pub-id-type="pmid">36838519</pub-id></mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Eugelio</surname> <given-names>F.</given-names></name> <name><surname>Palmieri</surname> <given-names>S.</given-names></name> <name><surname>Mascini</surname> <given-names>M.</given-names></name> <name><surname>Della Valle</surname> <given-names>F.</given-names></name> <name><surname>Fanti</surname> <given-names>F.</given-names></name> <name><surname>Oliva</surname> <given-names>E.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>Fingerprinting alkaloids for traceability: semi-untargeted UHPLC-MS/MS approach in raw lupins as a case study</article-title>. <source>Food Chem. X</source> <volume>23</volume>:<fpage>101769</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.fochx.2024.101769</pub-id><pub-id pub-id-type="pmid">39280210</pub-id></mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="web"><collab>Food Innovation</collab> (<year>2020</year>). <article-title>Lupin Protein Market-Global Industry</article-title>. <source>Analysis 2015-2019 and Opportunity Assessment 2020-2030</source>. Available online at: <ext-link ext-link-type="uri" xlink:href="https://www.persistencemarketresearch.com/market-research/lupin-protein-market.asp">https://www.persistencemarketresearch.com/market-research/lupin-protein-market.asp</ext-link> (Accessed August 3, 2025).</mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Freire Diaz</surname> <given-names>P.</given-names></name> <name><surname>L&#x000F3;pez-Mendoza</surname> <given-names>X.</given-names></name> <name><surname>Casignia</surname> <given-names>B.</given-names></name> <name><surname>Cisneros Barahona</surname> <given-names>A. S.</given-names></name> <name><surname>Uvidia Fassler</surname> <given-names>M. I.</given-names></name></person-group> (<year>2021</year>). <article-title>&#x0201C;Classification of andean chocho (<italic>Lupinus mutabilis</italic> sweet) by shape and color using artificial vision,&#x0201D;</article-title> in <source>Artificial Intelligence, Computer and Software Engineering Advances</source>, eds. M. Botto-Tobar, H. Cruz, and A. D&#x000ED;az Cadena (Cham: Springer International Publishing), <fpage>64</fpage>&#x02013;<lpage>78</lpage>. doi: <pub-id pub-id-type="doi">10.1007/978-3-030-68080-0_5</pub-id></mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Frick</surname> <given-names>K. M.</given-names></name> <name><surname>Kamphuis</surname> <given-names>L. G.</given-names></name> <name><surname>Siddique</surname> <given-names>K. H. M.</given-names></name> <name><surname>Singh</surname> <given-names>K. B.</given-names></name> <name><surname>Foley</surname> <given-names>R. C.</given-names></name></person-group> (<year>2017</year>). <article-title>Quinolizidine alkaloid biosynthesis in lupins and prospects for grain quality improvement</article-title>. <source>Front. Plant Sci</source>. <volume>8</volume>:<fpage>87</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fpls.2017.00087</pub-id><pub-id pub-id-type="pmid">28197163</pub-id></mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Galea-Gragera</surname> <given-names>F. A.</given-names></name> <name><surname>Llera Cid</surname> <given-names>F.</given-names></name></person-group> (<year>2025</year>). <source>Spain Germplasm Bank of Annual Forage Legumes Seeds at the Scientific and Technological Research Center of Extremadura (CICYTEX)</source>. Singapore: Springer Nature <volume>Singapore</volume>, <fpage>1</fpage>&#x02013;<lpage>68</lpage>. doi: <pub-id pub-id-type="doi">10.1007/978-981-99-4236-7_102-1</pub-id></mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="thesis"><person-group person-group-type="author"><name><surname>Gragera</surname> <given-names>F. A. G.</given-names></name></person-group> (<year>2015</year>). <source>Desarrollo de un modelo predictivo usando tecnolog</source>&#x000ED;<italic>a NIRs para determinar las extracciones del triticale de doble aptitud (forraje y grano)</italic> (Ph.D. thesis). Universidad de Extremadura, Badajoz, Spain.</mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Grisanti</surname> <given-names>E.</given-names></name> <name><surname>Totska</surname> <given-names>M.</given-names></name> <name><surname>Huber</surname> <given-names>S.</given-names></name> <name><surname>Krick Calderon</surname> <given-names>C.</given-names></name> <name><surname>Hohmann</surname> <given-names>M.</given-names></name> <name><surname>Lingenfelser</surname> <given-names>D.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title>Dynamic localized SNV, peak SNV, and partial peak SNV: novel standardization methods for preprocessing of spectroscopic data used in predictive modeling</article-title>. <source>J. Spectrosc</source>. <volume>2018</volume>:<fpage>5037572</fpage>. doi: <pub-id pub-id-type="doi">10.1155/2018/5037572</pub-id></mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Han</surname> <given-names>H.</given-names></name> <name><surname>Wang</surname> <given-names>W.-Y.</given-names></name> <name><surname>Mao</surname> <given-names>B.-H.</given-names></name></person-group> (<year>2005</year>). <article-title>&#x0201C;Borderline-smote: a new over-sampling method in imbalanced data sets learning,&#x0201D;</article-title> in <source>International Conference on Intelligent Computing</source> (<publisher-loc>Berlin</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>878</fpage>&#x02013;<lpage>887</lpage>. doi: <pub-id pub-id-type="doi">10.1007/11538059_91</pub-id></mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>He</surname> <given-names>H.</given-names></name> <name><surname>Bai</surname> <given-names>Y.</given-names></name> <name><surname>Garcia</surname> <given-names>E. A.</given-names></name> <name><surname>Li</surname> <given-names>S.</given-names></name></person-group> (<year>2008</year>). <article-title>&#x0201C;ADASYN: adaptive synthetic sampling approach for imbalanced learning,&#x0201D;</article-title> in <source>2008 IEEE International Joint Conference on Neural Networks (IEEE World Congress on Computational Intelligence)</source> (<publisher-loc>Hong Kong</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>1322</fpage>&#x02013;<lpage>1328</lpage>. doi: <pub-id pub-id-type="doi">10.1109/IJCNN.2008.4633969</pub-id></mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Johnson</surname> <given-names>S. K.</given-names></name> <name><surname>Clements</surname> <given-names>J.</given-names></name> <name><surname>Villarino</surname> <given-names>C. B. J.</given-names></name> <name><surname>Coorey</surname> <given-names>R.</given-names></name></person-group> (<year>2017</year>). <article-title>&#x0201C;Lupins: their unique nutritional and health-promoting attributes,&#x0201D;</article-title> in <source>Gluten-Free Ancient Grains</source>, eds. J. R. N. Taylor, and J. M. Awika (<publisher-loc>Amsterdam</publisher-loc>: <publisher-name>Elsevier</publisher-name>), <fpage>179</fpage>&#x02013;<lpage>221</lpage>. doi: <pub-id pub-id-type="doi">10.1016/B978-0-08-100866-9.00008-X</pub-id></mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kaur</surname> <given-names>H.</given-names></name> <name><surname>Pannu</surname> <given-names>H. S.</given-names></name> <name><surname>Malhi</surname> <given-names>A. K.</given-names></name></person-group> (<year>2019</year>). <article-title>A systematic review on imbalanced data challenges in machine learning: applications and solutions</article-title>. <source>ACM Comput. Surv</source>. <volume>52</volume>, <fpage>1</fpage>&#x02013;<lpage>36</lpage>. doi: <pub-id pub-id-type="doi">10.1145/3343440</pub-id></mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Khedr</surname> <given-names>T.</given-names></name> <name><surname>Gao</surname> <given-names>L.-L.</given-names></name> <name><surname>Kamphuis</surname> <given-names>L. G.</given-names></name> <name><surname>Bose</surname> <given-names>U.</given-names></name> <name><surname>Juh&#x000E1;sz</surname> <given-names>A.</given-names></name> <name><surname>Colgrave</surname> <given-names>M. L.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>Evaluation of alkaloid levels in commercial and wild genotypes of narrow-leafed lupin</article-title>. <source>J. Food Compos. Anal</source>. <volume>135</volume>:<fpage>106600</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jfca.2024.106600</pub-id></mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Khedr</surname> <given-names>T.</given-names></name> <name><surname>Juh&#x000E1;sz</surname> <given-names>A.</given-names></name> <name><surname>Singh</surname> <given-names>K. B.</given-names></name> <name><surname>Foley</surname> <given-names>R.</given-names></name> <name><surname>Nye-Wood</surname> <given-names>M. G.</given-names></name> <name><surname>Colgrave</surname> <given-names>M. L.</given-names></name></person-group> (<year>2023</year>). <article-title>Development and validation of a rapid and sensitive LC-MS/MS approach for alkaloid testing in different <italic>Lupinus</italic> species</article-title>. <source>J. Food Compos. Anal</source>. <volume>121</volume>:<fpage>105391</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jfca.2023.105391</pub-id></mixed-citation>
</ref>
<ref id="B28">
<mixed-citation publication-type="thesis"><person-group person-group-type="author"><name><surname>Kniepkamp</surname> <given-names>K.</given-names></name></person-group> (<year>2024</year>). <source>Extraction and determination of valuable components from lupin beans</source> (<publisher-loc>Ph.D. thesis</publisher-loc>). Universit&#x000E4;t Oldenburg, Oldenburg, Germany.</mixed-citation>
</ref>
<ref id="B29">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kotsiantis</surname> <given-names>S. B.</given-names></name> <name><surname>Kanellopoulos</surname> <given-names>D.</given-names></name> <name><surname>Pintelas</surname> <given-names>P. E.</given-names></name></person-group> (<year>2006</year>). <article-title>Data preprocessing for supervised leaning</article-title>. <source>Int. J. Comput. Sci</source>. <volume>1</volume>, <fpage>111</fpage>&#x02013;<lpage>117</lpage>. doi: <pub-id pub-id-type="doi">10.5281/zenodo.1082415</pub-id></mixed-citation>
</ref>
<ref id="B30">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kroc</surname> <given-names>M.</given-names></name> <name><surname>Rybi&#x00144;ski</surname> <given-names>W.</given-names></name> <name><surname>Wilczura</surname> <given-names>P.</given-names></name> <name><surname>Kamel</surname> <given-names>K.</given-names></name> <name><surname>Kaczmarek</surname> <given-names>Z.</given-names></name> <name><surname>Barzyk</surname> <given-names>P.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>Quantitative and qualitative analysis of alkaloids composition in the seeds of a white lupin (<italic>Lupinus albus</italic> L.) collection</article-title>. <source>Genet. Resour. Crop Evol</source>. <volume>64</volume>, <fpage>1853</fpage>&#x02013;<lpage>1860</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s10722-016-0473-1</pub-id></mixed-citation>
</ref>
<ref id="B31">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lee</surname> <given-names>H.-W.</given-names></name> <name><surname>Hwang</surname> <given-names>I.-M.</given-names></name> <name><surname>Lee</surname> <given-names>H. M.</given-names></name> <name><surname>Yang</surname> <given-names>J.-S.</given-names></name> <name><surname>Park</surname> <given-names>E. J.</given-names></name> <name><surname>Choi</surname> <given-names>J. W.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Validation and determination of quinolizidine alkaloids (QAS) in lupin products by gas chromatography with flame ionization detection (GC-FID)</article-title>. <source>Anal. Lett</source>. <volume>53</volume>, <fpage>606</fpage>&#x02013;<lpage>613</lpage>. doi: <pub-id pub-id-type="doi">10.1080/00032719.2019.1661423</pub-id></mixed-citation>
</ref>
<ref id="B32">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>L&#x000F3;pez</surname> <given-names>O. A. M.</given-names></name> <name><surname>L&#x000F3;pez</surname> <given-names>A. M.</given-names></name> <name><surname>Crossa</surname> <given-names>J.</given-names></name></person-group> (<year>2022</year>). <article-title>&#x0201C;Overfitting, model tuning, and evaluation of prediction performance,&#x0201D;</article-title> in Multivariate Statistical Machine Learning Methods for Genomic Prediction (Springer), <fpage>109</fpage>&#x02013;<lpage>139</lpage>. doi: <pub-id pub-id-type="doi">10.1007/978-3-030-89010-0_4</pub-id></mixed-citation>
</ref>
<ref id="B33">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>L&#x000F3;pez</surname> <given-names>V.</given-names></name> <name><surname>Fern&#x000E1;ndez</surname> <given-names>A.</given-names></name> <name><surname>Garc&#x000ED;a</surname> <given-names>S.</given-names></name> <name><surname>Palade</surname> <given-names>V.</given-names></name> <name><surname>Herrera</surname> <given-names>F.</given-names></name></person-group> (<year>2013</year>). <article-title>An insight into classification with imbalanced data: empirical results and current trends on using data intrinsic characteristics</article-title>. <source>Inf. Sci</source>. <volume>250</volume>, <fpage>113</fpage>&#x02013;<lpage>141</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.ins.2013.07.007</pub-id></mixed-citation>
</ref>
<ref id="B34">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Madelou</surname> <given-names>N. A.</given-names></name> <name><surname>Melliou</surname> <given-names>E.</given-names></name> <name><surname>Magiatis</surname> <given-names>P.</given-names></name></person-group> (<year>2024</year>). <article-title>Quantitation of <italic>Lupinus</italic> spp. quinolizidine alkaloids by QNMR and accelerated debittering with a resin-based protocol</article-title>. <source>Molecules</source> <volume>29</volume>:<fpage>582</fpage>. doi: <pub-id pub-id-type="doi">10.3390/molecules29030582</pub-id><pub-id pub-id-type="pmid">38338327</pub-id></mixed-citation>
</ref>
<ref id="B35">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Manley</surname> <given-names>M.</given-names></name></person-group> (<year>2014</year>). <article-title>Near-infrared spectroscopy and hyperspectral imaging: non-destructive analysis of biological materials</article-title>. <source>Chem. Soc. Rev</source>. <volume>43</volume>, <fpage>8200</fpage>&#x02013;<lpage>8214</lpage>. doi: <pub-id pub-id-type="doi">10.1039/C4CS00062E</pub-id><pub-id pub-id-type="pmid">25156745</pub-id></mixed-citation>
</ref>
<ref id="B36">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mazumder</surname> <given-names>K.</given-names></name> <name><surname>Aktar</surname> <given-names>A.</given-names></name> <name><surname>Kerr</surname> <given-names>P. G.</given-names></name> <name><surname>Dash</surname> <given-names>R.</given-names></name> <name><surname>Blanchard</surname> <given-names>C. L.</given-names></name> <name><surname>Gulzarul Aziz</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>Insights into seed coats of nine cultivars of Australian lupin: unravelling LC-QTOF MS-based biochemical profiles, nutritional, functional, antioxidant, and antidiabetic properties together with rationalizing antidiabetic mechanism by in <italic>silico</italic> approaches</article-title>. <source>Food Res. Int</source>. <volume>195</volume>:<fpage>114970</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.foodres.2024.114970</pub-id><pub-id pub-id-type="pmid">39277267</pub-id></mixed-citation>
</ref>
<ref id="B37">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Miller</surname> <given-names>C. E.</given-names></name></person-group> (<year>2010</year>). <article-title>&#x0201C;Chemometrics in process analytical technology (PAT),&#x0201D;</article-title> in <source>Process Analytical Technology: Spectroscopic Tools and Implementation Strategies for the Chemical and Pharmaceutical Industries</source>, ed. K. A. Bakeev (Chichester: John Wiley &#x00026;Sons, Ltd.), <fpage>353</fpage>&#x02013;<lpage>438</lpage>. doi: <pub-id pub-id-type="doi">10.1002/9780470689592.ch12</pub-id></mixed-citation>
</ref>
<ref id="B38">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mirza</surname> <given-names>S.</given-names></name> <name><surname>Hussaini</surname> <given-names>A. A.</given-names></name> <name><surname>&#x000D6;zt&#x000FC;rk</surname> <given-names>G.</given-names></name> <name><surname>Turgut</surname> <given-names>M.</given-names></name> <name><surname>&#x000D6;zt&#x000FC;rk</surname> <given-names>T.</given-names></name> <name><surname>Tugay</surname> <given-names>O.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Photocatalytic and antibacterial activities of zno nanoparticles synthesized from <italic>Lupinus albus</italic> and <italic>Lupinus pilosus</italic> plant extracts via green synthesis approach</article-title>. <source>Inorg. Chem. Commun</source>. <volume>155</volume>:<fpage>111124</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.inoche.2023.111124</pub-id></mixed-citation>
</ref>
<ref id="B39">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mishra</surname> <given-names>P.</given-names></name> <name><surname>Biancolillo</surname> <given-names>A.</given-names></name> <name><surname>Roger</surname> <given-names>J. M.</given-names></name> <name><surname>Marini</surname> <given-names>F.</given-names></name> <name><surname>Rutledge</surname> <given-names>D. N.</given-names></name></person-group> (<year>2020</year>). <article-title>New data preprocessing trends based on ensemble of multiple preprocessing techniques</article-title>. <source>Trends Anal. Chem</source>. <volume>132</volume>:<fpage>116045</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.trac.2020.116045</pub-id></mixed-citation>
</ref>
<ref id="B40">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Namdar</surname> <given-names>D.</given-names></name> <name><surname>Mulder</surname> <given-names>P. P. J.</given-names></name> <name><surname>Ben-Simchon</surname> <given-names>E.</given-names></name> <name><surname>Hacham</surname> <given-names>Y.</given-names></name> <name><surname>Basheer</surname> <given-names>L.</given-names></name> <name><surname>Cohen</surname> <given-names>O.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>New analytical approach to quinolizidine alkaloids and their assumed biosynthesis pathways in lupin seeds</article-title>. <source>Toxins</source> <volume>16</volume>:<fpage>163</fpage>. doi: <pub-id pub-id-type="doi">10.3390/toxins16030163</pub-id><pub-id pub-id-type="pmid">38535829</pub-id></mixed-citation>
</ref>
<ref id="B41">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Nguyen</surname> <given-names>H. M.</given-names></name> <name><surname>Cooper</surname> <given-names>E. W.</given-names></name> <name><surname>Kamei</surname> <given-names>K.</given-names></name></person-group> (<year>2011</year>). <article-title>Borderline over-sampling for imbalanced data classification</article-title>. <source>Int. J. Knowl. Eng. Soft Data Paradig</source>. <volume>3</volume>, <fpage>4</fpage>&#x02013;<lpage>21</lpage>. doi: <pub-id pub-id-type="doi">10.1504/IJKESDP.2011.039875</pub-id></mixed-citation>
</ref>
<ref id="B42">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Nicola&#x000EF;</surname> <given-names>B. M.</given-names></name> <name><surname>Beullens</surname> <given-names>K.</given-names></name> <name><surname>Bobelyn</surname> <given-names>E.</given-names></name> <name><surname>Peirs</surname> <given-names>A.</given-names></name> <name><surname>Saeys</surname> <given-names>W.</given-names></name> <name><surname>Theron</surname> <given-names>K. I.</given-names></name> <etal/></person-group>. (<year>2007</year>). <article-title>Nondestructive measurement of fruit and vegetable quality by means of nir spectroscopy: a review</article-title>. <source>Postharvest Biol. Technol</source>. <volume>46</volume>, <fpage>99</fpage>&#x02013;<lpage>118</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.postharvbio.2007.06.024</pub-id></mixed-citation>
</ref>
<ref id="B43">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Petropoulos</surname> <given-names>T.</given-names></name> <name><surname>Benos</surname> <given-names>L.</given-names></name> <name><surname>Berruto</surname> <given-names>R.</given-names></name> <name><surname>Miserendino</surname> <given-names>G.</given-names></name> <name><surname>Marinoudi</surname> <given-names>V.</given-names></name> <name><surname>Busato</surname> <given-names>P.</given-names></name> <etal/></person-group>. (<year>2025</year>). <article-title>Interpretable machine learning for legume yield prediction using satellite remote sensing data</article-title>. <source>Appl. Sci</source>. <volume>15</volume>:<fpage>7074</fpage>. doi: <pub-id pub-id-type="doi">10.3390/app15137074</pub-id></mixed-citation>
</ref>
<ref id="B44">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rinnan</surname> <given-names>&#x000C5;.</given-names></name> <name><surname>Van Den Berg</surname> <given-names>F.</given-names></name> <name><surname>Engelsen</surname> <given-names>S. B.</given-names></name></person-group> (<year>2009</year>). <article-title>Review of the most common pre-processing techniques for near-infrared spectra</article-title>. <source>Trends Anal. Chem</source>. <volume>28</volume>, <fpage>1201</fpage>&#x02013;<lpage>1222</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.trac.2009.07.007</pub-id></mixed-citation>
</ref>
<ref id="B45">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Roggo</surname> <given-names>Y.</given-names></name> <name><surname>Chalus</surname> <given-names>P.</given-names></name> <name><surname>Maurer</surname> <given-names>L.</given-names></name> <name><surname>Lema-Martinez</surname> <given-names>C.</given-names></name> <name><surname>Edmond</surname> <given-names>A.</given-names></name> <name><surname>Jent</surname> <given-names>N.</given-names></name> <etal/></person-group>. (<year>2007</year>). <article-title>A review of near infrared spectroscopy and chemometrics in pharmaceutical technologies</article-title>. <source>J. Pharm. Biomed. Anal</source>. <volume>44</volume>, <fpage>683</fpage>&#x02013;<lpage>700</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jpba.2007.03.023</pub-id><pub-id pub-id-type="pmid">17482417</pub-id></mixed-citation>
</ref>
<ref id="B46">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Romeo</surname> <given-names>F. V.</given-names></name> <name><surname>Fabroni</surname> <given-names>S.</given-names></name> <name><surname>Ballistreri</surname> <given-names>G.</given-names></name> <name><surname>Muccilli</surname> <given-names>S.</given-names></name> <name><surname>Spina</surname> <given-names>A.</given-names></name> <name><surname>Rapisarda</surname> <given-names>P.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title>Characterization and antimicrobial activity of alkaloid extracts from seeds of different genotypes of <italic>Lupinus</italic> spp</article-title>. <source>Sustainability</source> <volume>10</volume>:<fpage>788</fpage>. doi: <pub-id pub-id-type="doi">10.3390/su10030788</pub-id></mixed-citation>
</ref>
<ref id="B47">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sabat-Tomala</surname> <given-names>A.</given-names></name> <name><surname>Raczko</surname> <given-names>E.</given-names></name> <name><surname>Zagajewski</surname> <given-names>B.</given-names></name></person-group> (<year>2021</year>). <article-title>Mapping invasive plant species with hyperspectral data based on iterative accuracy assessment techniques</article-title>. <source>Remote Sens</source>. <volume>14</volume>:<fpage>64</fpage>. doi: <pub-id pub-id-type="doi">10.3390/rs14010064</pub-id></mixed-citation>
</ref>
<ref id="B48">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sabat-Tomala</surname> <given-names>A.</given-names></name> <name><surname>Raczko</surname> <given-names>E.</given-names></name> <name><surname>Zagajewski</surname> <given-names>B.</given-names></name></person-group> (<year>2024</year>). <article-title>Airborne hyperspectral images and machine learning algorithms for the identification of lupine invasive species in natura 2000 meadows</article-title>. <source>Remote Sens</source>. <volume>16</volume>:<fpage>580</fpage>. doi: <pub-id pub-id-type="doi">10.3390/rs16030580</pub-id></mixed-citation>
</ref>
<ref id="B49">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Saini</surname> <given-names>A.</given-names></name> <name><surname>Seni</surname> <given-names>K.</given-names></name> <name><surname>Chawla</surname> <given-names>P. A.</given-names></name> <name><surname>Chawla</surname> <given-names>V.</given-names></name> <name><surname>Ganti</surname> <given-names>S. S.</given-names></name></person-group> (<year>2024</year>). <article-title>An insight into recent updates on analytical techniques for bioactive alkaloids</article-title>. <source>Phytochem. Anal</source>. <volume>35</volume>, <fpage>423</fpage>&#x02013;<lpage>444</lpage>. doi: <pub-id pub-id-type="doi">10.1002/pca.3338</pub-id><pub-id pub-id-type="pmid">38369684</pub-id></mixed-citation>
</ref>
<ref id="B50">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Salsano</surname> <given-names>E.</given-names></name> <name><surname>Rossi</surname> <given-names>O.</given-names></name> <name><surname>Rispoli</surname> <given-names>C.</given-names></name> <name><surname>Zuccarino</surname> <given-names>M.</given-names></name> <name><surname>Landi</surname> <given-names>E.</given-names></name> <name><surname>Carifi</surname> <given-names>G.</given-names></name> <etal/></person-group>. (<year>2025</year>). <article-title>Lupin poisoning in a geriatric patient: a case report</article-title>. <source>Geriatr. Care</source> <volume>11</volume>:<fpage>13627</fpage>. doi: <pub-id pub-id-type="doi">10.4081/gc.2025.13627</pub-id></mixed-citation>
</ref>
<ref id="B51">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Schulze-Br&#x000FC;ninghoff</surname> <given-names>D.</given-names></name> <name><surname>Wachendorf</surname> <given-names>M.</given-names></name> <name><surname>Astor</surname> <given-names>T.</given-names></name></person-group> (<year>2021</year>). <article-title>Potentials and limitations of worldview-3 data for the detection of invasive <italic>Lupinus polyphyllus</italic> Lindl. in semi-natural grasslands</article-title>. <source>Remote Sens</source>. <volume>13</volume>:<fpage>4333</fpage>. doi: <pub-id pub-id-type="doi">10.3390/rs13214333</pub-id></mixed-citation>
</ref>
<ref id="B52">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Schwertfirm</surname> <given-names>G.</given-names></name> <name><surname>Schneider</surname> <given-names>M.</given-names></name> <name><surname>Haase</surname> <given-names>F.</given-names></name> <name><surname>Riedel</surname> <given-names>C.</given-names></name> <name><surname>Lazzaro</surname> <given-names>M.</given-names></name> <name><surname>Ruge-Wehling</surname> <given-names>B.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>Genome-wide association study revealed significant snps for anthracnose resistance, seed alkaloids and protein content in white lupin</article-title>. <source>Theor. Appl. Genet</source>. <volume>137</volume>:<fpage>155</fpage>. doi: <pub-id pub-id-type="doi">10.1007/s00122-024-04665-2</pub-id><pub-id pub-id-type="pmid">38858311</pub-id></mixed-citation>
</ref>
<ref id="B53">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sem</surname> <given-names>V.</given-names></name></person-group> (<year>2021</year>). <article-title>Interpretability of selected variables and performance comparison of variable selection methods in a polyethylene and polypropylene nir classification task</article-title>. <source>Spectrochim. Acta A Mol. Biomol. Spectrosc</source>. <volume>258</volume>:<fpage>119850</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.saa.2021.119850</pub-id><pub-id pub-id-type="pmid">33957449</pub-id></mixed-citation>
</ref>
<ref id="B54">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sharma</surname> <given-names>B.</given-names></name> <name><surname>Bhatia</surname> <given-names>R.</given-names></name> <name><surname>Ganti</surname> <given-names>S. S.</given-names></name> <name><surname>Rangra</surname> <given-names>N. K.</given-names></name></person-group> (<year>2024</year>). <article-title>Recent trends in the detection of alkaloids through analytical, bioanalytical, and electrochemical techniques</article-title>. <source>Curr. Pharm. Anal</source>. <volume>20</volume>, <fpage>241</fpage>&#x02013;<lpage>263</lpage>. doi: <pub-id pub-id-type="doi">10.2174/0115734129307329240430071035</pub-id></mixed-citation>
</ref>
<ref id="B55">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Siche</surname> <given-names>R.</given-names></name> <name><surname>Vejarano</surname> <given-names>R.</given-names></name> <name><surname>Aredo</surname> <given-names>V.</given-names></name> <name><surname>Velasquez</surname> <given-names>L.</given-names></name> <name><surname>Saldana</surname> <given-names>E.</given-names></name> <name><surname>Quevedo</surname> <given-names>R.</given-names></name> <etal/></person-group>. (<year>2016</year>). <article-title>Evaluation of food quality and safety with hyperspectral imaging (HSI)</article-title>. <source>Food Eng. Rev</source>. <volume>8</volume>, <fpage>306</fpage>&#x02013;<lpage>322</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s12393-015-9137-8</pub-id></mixed-citation>
</ref>
<ref id="B56">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Singh</surname> <given-names>K. K.</given-names></name> <name><surname>Surasinghe</surname> <given-names>T. D.</given-names></name> <name><surname>Frazier</surname> <given-names>A. E.</given-names></name></person-group> (<year>2024</year>). <article-title>Systematic review and best practices for drone remote sensing of invasive plants</article-title>. <source>Methods Ecol. Evol</source>. <volume>15</volume>, <fpage>998</fpage>&#x02013;<lpage>1015</lpage>. doi: <pub-id pub-id-type="doi">10.1111/2041-210X.14330</pub-id></mixed-citation>
</ref>
<ref id="B57">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Valente</surname> <given-names>I. M.</given-names></name> <name><surname>Monteiro</surname> <given-names>A.</given-names></name> <name><surname>Sousa</surname> <given-names>C.</given-names></name> <name><surname>Miranda</surname> <given-names>C.</given-names></name> <name><surname>Maia</surname> <given-names>M. R. G.</given-names></name> <name><surname>Castro</surname> <given-names>C.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>Agronomic, nutritional traits, and alkaloids of <italic>Lupinus albus, Lupinus angustifolius</italic> and <italic>Lupinus luteus</italic> genotypes: effect of sowing dates and locations</article-title>. <source>ACS Agric. Sci. Technol</source>. <volume>4</volume>, <fpage>450</fpage>&#x02013;<lpage>462</lpage>. doi: <pub-id pub-id-type="doi">10.1021/acsagscitech.3c00581</pub-id><pub-id pub-id-type="pmid">38638685</pub-id></mixed-citation>
</ref>
<ref id="B58">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>L.</given-names></name> <name><surname>Han</surname> <given-names>M.</given-names></name> <name><surname>Li</surname> <given-names>X.</given-names></name> <name><surname>Zhang</surname> <given-names>N.</given-names></name> <name><surname>Cheng</surname> <given-names>H.</given-names></name></person-group> (<year>2021</year>). <article-title>Review of classification methods on unbalanced data sets</article-title>. <source>IEEE Access</source> <volume>9</volume>, <fpage>64606</fpage>&#x02013;<lpage>64628</lpage>. doi: <pub-id pub-id-type="doi">10.1109/ACCESS.2021.3074243</pub-id></mixed-citation>
</ref>
<ref id="B59">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Weyer</surname> <given-names>L. G.</given-names></name> <name><surname>Lo</surname> <given-names>S.-C.</given-names></name></person-group> (<year>2006</year>). <article-title>&#x0201C;Spectra-structure correlations in the near-infrared,&#x0201D;</article-title> in <source>Handbook of Vibrational Spectroscopy</source>, eds. J. M. Chalmers, and P. R. Griffiths (Chichester: John Wiley &#x00026;Sons, Ltd.), <fpage>1817</fpage>&#x02013;<lpage>1837</lpage>.</mixed-citation>
</ref>
<ref id="B60">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wijesingha</surname> <given-names>J.</given-names></name> <name><surname>Astor</surname> <given-names>T.</given-names></name> <name><surname>Schulze-Br&#x000FC;ninghoff</surname> <given-names>D.</given-names></name> <name><surname>Wachendorf</surname> <given-names>M.</given-names></name></person-group> (<year>2020</year>). <article-title>Mapping invasive <italic>Lupinus polyphyllus</italic> Lindl. in semi-natural grasslands using object-based image analysis of uav-borne images</article-title>. <source>J. Photogramm. Remote Sens. Geoinf. Sci</source>. <volume>88</volume>, <fpage>391</fpage>&#x02013;<lpage>406</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s41064-020-00121-0</pub-id></mixed-citation>
</ref>
<ref id="B61">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>H.</given-names></name> <name><surname>Mouazen</surname> <given-names>A. M.</given-names></name></person-group> (<year>2012</year>). <article-title>&#x0201C;Vis/near- and mid-infrared spectroscopy for predicting soil n and c at a farm scale,&#x0201D;</article-title> in <source>Infrared Spectroscopy</source> - <italic>Life and Biomedical Sciences</italic>, ed. T. Theophanides (<publisher-loc>London</publisher-loc>: <publisher-name>InTech</publisher-name>), <fpage>185</fpage>&#x02013;<lpage>210</lpage>. doi: <pub-id pub-id-type="doi">10.5772/36393</pub-id></mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn fn-type="custom" custom-type="edited-by" id="fn0001">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2842862/overview">Yang Liu</ext-link>, Tarim University, China</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by" id="fn0002">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1800737/overview">Siddhant Ranjan Padhi</ext-link>, Indian Agricultural Research Institute (ICAR), India</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3280692/overview">Vladimir Henao C&#x000E9;spedes</ext-link>, Catholic University of Manizales, Colombia</p>
</fn>
</fn-group>
<fn-group>
<fn id="fn0003"><label>1</label><p><ext-link ext-link-type="uri" xlink:href="https://www.malvernpanalytical.com/">https://www.malvernpanalytical.com/</ext-link></p></fn>
</fn-group>
</back>
</article>