<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Plant Sci.</journal-id>
<journal-title>Frontiers in Plant Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Plant Sci.</abbrev-journal-title>
<issn pub-type="epub">1664-462X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpls.2024.1474207</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Plant Science</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Efficient and accurate tobacco leaf maturity detection: an improved YOLOv10 model with DCNv3 and efficient local attention integration</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Shi</surname>
<given-names>Yi</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Wang</surname>
<given-names>Hong</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Wang</surname>
<given-names>Fei</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Wang</surname>
<given-names>Yingkuan</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Liu</surname>
<given-names>Jianjun</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2772251"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Zhao</surname>
<given-names>Long</given-names>
</name>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2254152"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Wang</surname>
<given-names>Hui</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhang</surname>
<given-names>Feng</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Cheng</surname>
<given-names>Qiongmin</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Qing</surname>
<given-names>Shunhao</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2840407"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>College of Agricultural Equipment Engineering, Henan University of Science and Technology</institution>, <addr-line>Luoyang, Henan</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Henan Province Tobacco Company, Luoyang Company</institution>, <addr-line>Luoyang</addr-line>, <country>China</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Academy of Agricultural Planning and Engineering, Ministry of Agriculture and Rural Affairs</institution>, <addr-line>Beijing</addr-line>, <country>China</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Henan Province Tobacco Company</institution>, <addr-line>Zhengzhou</addr-line>, <country>China</country>
</aff>
<aff id="aff5">
<sup>5</sup>
<institution>College of Horticulture and Plant Protection, Henan University of Science and Technology</institution>, <addr-line>Luoyang</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Zhao Zhang, China Agricultural University, China</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Ruiheng Zhang, Beijing Institute of Technology, China</p>
<p>Guoxu Liu, Weifang University, China</p>
<p>Muhammad Hilal Kabir, China Agricultural University, China</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Jianjun Liu, <email xlink:href="mailto:liujianjun20222024@163.com">liujianjun20222024@163.com</email>; Long Zhao, <email xlink:href="mailto:hkdzhaolong@haust.edu.cn">hkdzhaolong@haust.edu.cn</email>; Hui Wang, <email xlink:href="mailto:huiwang0524@163.com">huiwang0524@163.com</email>
</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>03</day>
<month>01</month>
<year>2025</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>15</volume>
<elocation-id>1474207</elocation-id>
<history>
<date date-type="received">
<day>01</day>
<month>08</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>05</day>
<month>12</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2025 Shi, Wang, Wang, Wang, Liu, Zhao, Wang, Zhang, Cheng and Qing</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Shi, Wang, Wang, Wang, Liu, Zhao, Wang, Zhang, Cheng and Qing</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>The precise determination of tobacco leaf maturity is pivotal for safeguarding the taste and quality of tobacco products, augmenting the financial gains of tobacco growers, and propelling the industry&#x2019;s sustainable progression. This research addresses the inherent subjectivity and variability in conventional maturity evaluation techniques reliant on human expertise by introducing an innovative YOLOv10-based method for tobacco leaf maturity detection. This technique facilitates a rapid and non-invasive assessment of leaf maturity, significantly elevating the accuracy and efficiency of tobacco leaf quality evaluation. In our study, we have advanced the YOLOv10 framework by integrating DCNv3 with C2f to construct an enhanced neck network, designated as C2f-DCNv3. This integration is designed to augment the model&#x2019;s capability for feature integration, particularly concerning the morphological and edge characteristics of tobacco leaves. Furthermore, the incorporation of the Efficient Local Attention (ELA) mechanism at multiple stages of the model has substantially enhanced the efficiency and fidelity of feature extraction. The empirical results underscore the model&#x2019;s pronounced enhancement in performance across all maturity classifications. Notably, the overall precision (P) has been elevated from 0.939 to 0.973, the recall rate (R) has improved from 0.968 to 0.984, the mean average precision at 50% intersection over union (mAP50) has advanced from 0.984 to 0.994, and the mean average precision across the 50% to 95% intersection over union range (mAP50-95) has risen from 0.962 to 0.973. This research presents the tobacco industry with a novel rapid detection instrument for tobacco leaf maturity, endowed with substantial practical utility and broad prospects for application. Future research endeavors will be directed towards further optimization of the model&#x2019;s architecture to bolster its generalizability and to explore its implementation within the realm of actual tobacco cultivation and processing.</p>
</abstract>
<kwd-group>
<kwd>tobacco leaf maturity</kwd>
<kwd>YOLOv10</kwd>
<kwd>DCNv3</kwd>
<kwd>efficient local attention</kwd>
<kwd>targeted detection</kwd>
</kwd-group>
<counts>
<fig-count count="9"/>
<table-count count="5"/>
<equation-count count="5"/>
<ref-count count="38"/>
<page-count count="14"/>
<word-count count="6002"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-in-acceptance</meta-name>
<meta-value>Sustainable and Intelligent Phytoprotection</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>The maturity of tobacco leaves is a critical factor that directly influences their quality and, consequently, the taste and value of tobacco products (<xref ref-type="bibr" rid="B4">Cai et&#xa0;al., 2005</xref>; <xref ref-type="bibr" rid="B32">Yin et&#xa0;al., 2019</xref>). This is of paramount importance for the sustainability of the tobacco industry and the economic well-being of tobacco farmers (<xref ref-type="bibr" rid="B17">Kays, 2011</xref>). Achieving consistent and accurate assessments of tobacco leaf maturity is vital, as it enables more precise harvesting and curing methods that optimize both the aromatic profile and minimize harmful chemicals in the leaves (<xref ref-type="bibr" rid="B5">Cakir and Cebi, 2010</xref>). Traditionally, farmers have relied on subjective experience to assess leaf maturity, which can lead to inconsistent outcomes and missed opportunities for optimal harvest timing (<xref ref-type="bibr" rid="B6">Chen et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B26">Sun et&#xa0;al., 2023b</xref>).</p>
<p>Despite the progress in tobacco classification techniques, including the use of hyperspectral imaging and machine learning models, the practical adoption of these methods has been limited due to high equipment costs, complexity, and the need for specialized skills (<xref ref-type="bibr" rid="B7">Chen et&#xa0;al., 2021</xref>). These factors highlight a significant technical gap: the need for an accessible, non-destructive method for assessing tobacco leaf maturity in the field.</p>
<p>The advantage of object detection methods in maturity recognition lies in their ability to accurately localize and categorize each target within images, thereby enabling rapid and efficient identification and classification of agricultural products at various stages of ripeness. To meet the practical needs of farmers, our research proposes an innovative solution by leveraging advances in machine vision and object detection for real-time, accurate, and affordable field-based maturity detection of tobacco leaves. Specifically, we develop a lightweight YOLOv10-based algorithm integrated with Deformable Convolutional Networks (DCNv3) and an Enhanced Lightweight Attention (ELA) mechanism. Our approach emphasizes real-time processing, affordability, and accuracy, addressing the challenges in field conditions. The primary contributions of this study are as follows:</p>
<list list-type="order">
<list-item>
<p>We propose an advanced network structure combining YOLOv10 and DCNv3, enhancing feature aggregation and detection accuracy.</p>
</list-item>
<list-item>
<p>We introduce the ELA attention mechanism to replace the PSA module in the YOLOv10 backbone, improving feature representation.</p>
</list-item>
<list-item>
<p>We incorporate the ELA attention mechanism between the backbone and neck networks, further boosting overall model performance.</p>
</list-item>
<list-item>
<p>We conduct comprehensive experiments analyzing the influence of various network architectures and attention mechanisms on detection efficacy, aiming to optimize the lightweight performance of the model.</p>
</list-item>
</list>
<p>The remainder of this paper is organized as follows: Section 2 presents a detailed literature review of recent advancements in tobacco leaf classification and detection technologies. Section 3 describes our proposed method, including the YOLOv10 architecture and the ELA attention mechanism. Section 4 provides the experimental setup and results. Finally, Section 5 concludes the paper and outlines potential directions for future work.</p>
</sec>
<sec id="s2">
<label>2</label>
<title>Related work</title>
<p>In recent years, significant advances have been made in the use of spectral data and machine learning for the detection and classification of tobacco leaves. These technologies have proven effective in determining the maturity and quality of leaves, though challenges such as high costs and complex implementations remain.</p>
<p>Spectral imaging has emerged as a powerful tool for the classification of agricultural products, including tobacco leaves. Early efforts, such as those by <xref ref-type="bibr" rid="B21">Long et&#xa0;al. (2019)</xref>, utilized hyperspectral imaging combined with Savitzky-Golay smoothing filters and multiplicative scatter correction, achieving an impressive 99% classification accuracy of tobacco leaves and impurities. Similarly, <xref ref-type="bibr" rid="B22">Lu et&#xa0;al. (2023)</xref> refined the maturity assessment of flue-cured tobacco using Partial Least Squares Discriminant Analysis (PLS-DA), obtaining 99.32% accuracy on the validation set.</p>
<p>However, despite their high accuracy, these hyperspectral approaches face notable barriers, including the cost of spectrometers and their limited portability, making them less accessible to the average tobacco farmer. The reliance on specialized technical skills further complicates the wide adoption of such methods in practical farming scenarios (<xref ref-type="bibr" rid="B2">Be&#x107; et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B14">Hussain et&#xa0;al., 2018</xref>).</p>
<p>In response to the limitations of hyperspectral imaging, machine learning models have been increasingly applied to tobacco leaf classification and detection (<xref ref-type="bibr" rid="B34">Zhang et&#xa0;al., 2024</xref>). <xref ref-type="bibr" rid="B19">Li et&#xa0;al. (2021)</xref> designed a lightweight network based on MobileNetV2 for assessing tobacco leaf maturity. This model balanced accuracy with computational efficiency, making it more practical for real-world deployment. Similarly, <xref ref-type="bibr" rid="B15">Jia et&#xa0;al. (2023)</xref> proposed a model based on YOLOv7 and the LWC algorithm for detecting mixed tobacco strands. This model achieved a high detection accuracy (mAP@0.5&#xa0;=&#xa0;0.932) and fast processing speed, demonstrating the viability of real-time detection in agriculture. <xref ref-type="bibr" rid="B30">Xiong et&#xa0;al. (2024)</xref> introduced the DiffuCNN model, designed for detecting tobacco diseases in complex, low-resolution environments. This model incorporated a diffusion enhancement module and achieved a precision of 0.98 with a processing speed of 62 FPS, outperforming other models in accuracy and efficiency. Meanwhile, <xref ref-type="bibr" rid="B12">He et&#xa0;al. (2023)</xref> developed the FSWPNet model, combining pyramid feature fusion with shifted window self-attention for improved classification of tobacco leaves, achieving an average classification precision of 75.8%.</p>
<p>Deep learning models, particularly those based on convolutional neural networks (CNNs), have played a significant role in advancing agricultural object detection (<xref ref-type="bibr" rid="B3">Biradar and Hosalli, 2024</xref>; <xref ref-type="bibr" rid="B16">Kang and Chen, 2020</xref>; <xref ref-type="bibr" rid="B18">LeCun et&#xa0;al., 2015</xref>; <xref ref-type="bibr" rid="B37">Zhao et&#xa0;al., 2022</xref>). The You Only Look Once (YOLO) series (<xref ref-type="bibr" rid="B13">Hussain, 2023</xref>) and SSD (<xref ref-type="bibr" rid="B20">Liu et&#xa0;al., 2016</xref>) exemplify single-stage algorithms, which swiftly localize and classify objects in a unified forward pass, aligning with the needs of real-time detection tasks (<xref ref-type="bibr" rid="B25">Soviany and Ionescu, 2018</xref>). Conversely, two-stage algorithms, such as Faster R-CNN (<xref ref-type="bibr" rid="B24">Ren et&#xa0;al., 2016</xref>) and Sparse R-CNN (<xref ref-type="bibr" rid="B27">Sun et&#xa0;al., 2023a</xref>), initiate with a Region Proposal Network (RPN) to delineate potential object regions, proceeding with classifiers for nuanced classification and localization (<xref ref-type="bibr" rid="B9">Du et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B33">Zhang et&#xa0;al., 2023a</xref>, <xref ref-type="bibr" rid="B35">2021</xref>). Single-stage algorithms excel in their rapid and efficient processing, well-suited for high-speed application contexts (<xref ref-type="bibr" rid="B11">He et&#xa0;al., 2024</xref>). The YOLO series of models, such as YOLOv5, YOLOv6, and YOLOv7, have demonstrated their suitability for real-time detection tasks due to their single-stage nature, which allows for rapid localization and classification (<xref ref-type="bibr" rid="B13">Hussain, 2023</xref>). Although two-stage algorithms like Faster R-CNN offer higher precision, single-stage models are better suited for real-time applications due to their speed and reduced computational requirements (<xref ref-type="bibr" rid="B1">Bacea and Oniga, 2023</xref>).</p>
<p>Despite these advances, most research has focused on post-harvest tobacco leaf classification, a destructive process that may lead to waste. Few studies have explored non-destructive, field-based methods for detecting tobacco leaf maturity. This represents a critical gap in the literature, as non-destructive methods would allow for more accurate and timely harvesting decisions, ultimately benefiting both the quality of the tobacco and the economic returns for farmers (<xref ref-type="bibr" rid="B36">Zhang et&#xa0;al., 2023b</xref>).</p>
<p>Furthermore, the integration of attention mechanisms and deformable convolutions has been limited in the context of tobacco leaf detection. Recent studies have demonstrated the potential of these techniques to improve feature extraction and enhance model performance (<xref ref-type="bibr" rid="B8">Cheng et&#xa0;al., 2024</xref>; <xref ref-type="bibr" rid="B10">Du et&#xa0;al., 2025</xref>; <xref ref-type="bibr" rid="B23">Qing et&#xa0;al., 2024</xref>), suggesting that their incorporation into lightweight models like YOLOv10 could address both the accuracy and efficiency needs of practical agricultural applications.</p>
<p>The existing literature highlights several successful applications of spectral imaging and deep learning in tobacco leaf classification. However, the technical challenges associated with hyperspectral imaging and the lack of non-destructive methods for assessing tobacco leaf maturity underscore the need for new approaches. Our research builds upon these prior studies by introducing a YOLOv10-based lightweight model that incorporates DCNv3 and the ELA attention mechanism, addressing both the accuracy and computational constraints of field-based tobacco leaf maturity detection.</p>
</sec>
<sec id="s3" sec-type="materials|methods">
<label>3</label>
<title>Materials and methods</title>
<sec id="s3_1">
<label>3.1</label>
<title>Data collection and dataset construction</title>
<p>The research utilized a dataset of tobacco leaf maturity images, which was established from the collection of leaves in the tobacco cultivation region of Luoning County, Luoyang City, within Henan Province. For the acquisition of field data, the study employed the rear camera of a Huawei Honor 20 smartphone, featuring a 32-megapixel high-resolution sensor. To minimize the impact of lighting conditions on the leaf maturity recognition, the data was collected exclusively during daylight and under clear skies. To further augment the complexity of the dataset and enhance the robustness of our model, we employed data augmentation techniques such as rotation, scaling, flipping, and the addition of noise. The tobacco leaves were classified into three distinct maturity stages: immature, mature, and over-mature. Immature leaves, characterized by their green color, are not harvest-ready. Mature leaves are identified as the optimal stage for harvesting without compromising the final product&#x2019;s quality. Over-mature leaves, indicative of an excessive degree of maturity, are prone to significant losses during the harvesting and subsequent processing stages. In this study, the dataset was randomly partitioned following an 8:1:1 ratio into training, validation, and test sets, respectively. The training set consists of 1,752 images, the validation set contains 370 images, and the test set comprises 373 images. Representative images from the developed tobacco leaf maturity dataset are depicted in <xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref>.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>The sample image of the tobacco maturity dataset.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1474207-g001.tif"/>
</fig>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Constructing the tobacco maturity detection model</title>
<sec id="s3_2_1">
<label>3.2.1</label>
<title>The basic network structure of YOLOv10n</title>
<p>YOLOv10, the state-of-the-art real-time, end-to-end object detection model from the research team at Tsinghua University (<xref ref-type="bibr" rid="B28">Wang et&#xa0;al., 2024</xref>), stands as the pinnacle of the YOLO series. It preserves the real-time detection performance while substantially increasing the accuracy and efficiency of detection through a series of innovative advancements. The principal network framework is elegantly portrayed in <xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref>.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>The structure of YOLOv10.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1474207-g002.tif"/>
</fig>
<p>YOLOv10 has discarded the traditional Non-Maximum Suppression (NMS), facilitating an end-to-end training paradigm that forgoes NMS through a coherent dual-task assignment strategy, which in turn minimizes inference latency and expedites detection rates. The architecture of YOLOv10 is distinguished by its refined Backbone, Neck, and Head structures. The Backbone benefits from an advanced Cross Stage Partial Network that amplifies feature extraction prowess, while the Neck adeptly merges multi-scale features via the Path Aggregation Network layer. YOLOv10 introduces the pioneering One-to-Many Head to generate a spectrum of predictions during training, and the One-to-One Head to yield the most refined prediction during inference, all of which contribute to the model&#x2019;s enhanced performance. In pursuit of superior mobile deployment, YOLOv10n has been designated as the foundational detection model for our endeavors.</p>
</sec>
<sec id="s3_2_2">
<label>3.2.2</label>
<title>C2f-DCNv3</title>
<p>DCNv3 is a sophisticated convolutional core operator that enriches the standard convolutional process with the introduction of learnable offsets, enabling the kernels to adjust their sampling positions and conform to the intricacies of the input feature maps. This adaptive capability significantly improves the network&#x2019;s ability to discern the contours and shapes of targets within an image (<xref ref-type="bibr" rid="B29">Wang et&#xa0;al., 2023</xref>). Evolving from its predecessors, DCNv3 has undergone substantial refinements, offering enhanced performance and efficiency (<xref ref-type="bibr" rid="B38">Zhu et&#xa0;al., 2019</xref>). The procedural flow of the DCNv3 module is illustrated in <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3</bold>
</xref>. The input feature map is partitioned into g groups, each subjected to a convolutional operation to generate a corresponding set of offsets and modulation factors for the kernels. The final output feature map is then meticulously constructed from these predictive elements. The mathematical expression defining the deformable convolution v3 is articulated in <xref ref-type="disp-formula" rid="eq1">Equation 1</xref>.</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>The structure of the DCNv3 module.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1474207-g003.tif"/>
</fig>
<disp-formula id="eq1">
<label>(1)</label>
<mml:math display="block" id="M1">
<mml:mrow>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>G</mml:mi>
</mml:munderover>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>K</mml:mi>
</mml:munderover>
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>g</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>m</mml:mi>
<mml:mi>g</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>g</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mstyle>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:mi>&#x394;</mml:mi>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mstyle>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Where, <inline-formula>
<mml:math display="inline" id="im1">
<mml:mrow>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the pixel under consideration, G represents the number of groups, and K is the overall count of sampling points. The matrix <inline-formula>
<mml:math display="inline" id="im2">
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>g</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is defined over <italic>RC&#xd7;C&#x2032;</italic>, where the group dimension is given by <inline-formula>
<mml:math display="inline" id="im3">
<mml:mrow>
<mml:mtext>C</mml:mtext>
<mml:mo>&#x2032;</mml:mo>
<mml:mo>=</mml:mo>
<mml:mi>C</mml:mi>
<mml:mo stretchy="false">/</mml:mo>
<mml:mi>G</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. The modulation scalar <inline-formula>
<mml:math display="inline" id="im4">
<mml:mrow>
<mml:msub>
<mml:mi>m</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> for the k-th sampling point in the g-th group is subjected to normalization via a softmax function. The input feature map is denoted by <inline-formula>
<mml:math display="inline" id="im5">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>g</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> in the space <italic>RC&#xd7;H&#xd7;W</italic>. The term <inline-formula>
<mml:math display="inline" id="im6">
<mml:mrow>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> corresponds to the k-th position sampled by the network, and <inline-formula>
<mml:math display="inline" id="im7">
<mml:mrow>
<mml:mi>&#x394;</mml:mi>
<mml:msub>
<mml:mi>p</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the displacement related to the k-th grid sampling location.</p>
<p>In this study, the DCNv3 module is employed to replace the convolutions within the C2f module, capturing spatial and channel information of the targets more effectively during the feature extraction phase, thereby enhancing the performance of the C2f module. The structure of the improved C2f-DCNv3 module is shown in <xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4</bold>
</xref>.</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>The structure of the C2f-DCNv3 module.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1474207-g004.tif"/>
</fig>
</sec>
<sec id="s3_2_3">
<label>3.2.3</label>
<title>Efficient local attention</title>
<p>The Efficient Local Attention (ELA) mechanism represents a cutting-edge innovation in attention mechanisms, crafted to escalate the efficacy and exactitude of feature extraction within the purview of deep learning models (<xref ref-type="bibr" rid="B31">Xu and Wan, 2024</xref>). Across the disciplines of Natural Language Processing and Computer Vision, attention mechanisms have become instrumental in advancing model capabilities. Despite the substantial computational demands and memory footprints of conventional global attention mechanisms, especially with extensive datasets, ELA offers a sophisticated solution. It harnesses self-attention on localized features, targeting discrete regions within the input feature maps, thereby substantially curtailing the computational and storage requisites.</p>
<p>The essence of ELA&#x2019;s superiority is its localized approach, as illustrated in <xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5</bold>
</xref>. By partitioning the input feature map into an array of compact windows and meticulously applying self-attention within the confines of each, ELA narrows its focus to local interactions, considerably attenuating the computational load. Moreover, ELA refines the computational expenditure by leveraging sparse sampling points to approximate the interrelatedness of local features, all without a detrimental impact on performance.</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>The structure of the ELA attention mechanism.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1474207-g005.tif"/>
</fig>
<p>This research has implemented the ELA attention mechanism in place of the PSA attention mechanism within the YOLOv10n framework, aiming to bolster the model&#x2019;s efficacy. Additionally, the integration of the ELA attention mechanism at the nexus of the backbone and neck network is intended to augment the model&#x2019;s overall performance.</p>
</sec>
<sec id="s3_2_4">
<label>3.2.4</label>
<title>Tobacco leaf detection network architecture</title>
<p>In this research, we have engineered a tobacco leaf maturity detection model predicated on the YOLOv10n framework. To amplify the model&#x2019;s efficacy, we have innovatively combined the DCNv3 with the C2f module, resulting in an enhanced C2f_DCNv3 module. Moreover, we have introduced the ELA attention mechanism as a substitute for the PSA attention mechanism originally present in YOLOv10n. In addition to these modifications, we have strategically integrated the ELA attention mechanism at the interface between the backbone and the neck networks to further augment the model&#x2019;s performance. The schematic representation of the tobacco leaf maturity detection network crafted in this study is illustrated in <xref ref-type="fig" rid="f6">
<bold>Figure&#xa0;6</bold>
</xref>.</p>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>The structure of tobacco maturity network detection.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1474207-g006.tif"/>
</fig>
</sec>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Evaluation indicator</title>
<p>The present investigation applies Precision (P), Recall (R), mAP50, and mAP50-95 as the evaluative metrics for the tobacco leaf maturity detection model. Precision delineates the proportion of tobacco leaves that are accurately classified by the model into a specific maturity stage, signifying the model&#x2019;s trustworthiness in predicting particular maturity levels. Recall measures the model&#x2019;s effectiveness in identifying all instances of a given maturity stage, representing the ratio of correctly detected leaves to the total actual instances. mAP50 emerges as a holistic benchmark in the evaluation of tobacco leaf maturity, encapsulating the model&#x2019;s aggregate proficiency in distinguishing among various stages. It is calculated by averaging the AP values across stages, thereby assessing the model&#x2019;s comprehensive accuracy in classifying tobacco leaf maturity. mAP50-95 expands the IoU threshold scope, pivotal for nuanced visual feature differentiation across maturity stages. This metric furnishes an encompassing view of the model&#x2019;s efficacy across a spectrum of matching stringencies. The respective computational formulas are articulated in <xref ref-type="disp-formula" rid="eq2">Equations 2</xref>-<xref ref-type="disp-formula" rid="eq5">5</xref>.</p>
<disp-formula id="eq2">
<label>(2)</label>
<mml:math display="block" id="M2">
<mml:mrow>
<mml:mtext>Precision</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq3">
<label>(3)</label>
<mml:math display="block" id="M3">
<mml:mrow>
<mml:mtext>Recall</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq4">
<label>(4)</label>
<mml:math display="block" id="M4">
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>A</mml:mi>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mn>50</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mi>N</mml:mi>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:munderover>
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mstyle>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq5">
<label>(5)</label>
<mml:math display="block" id="M5">
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>A</mml:mi>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mn>50</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>95</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mi>N</mml:mi>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:munderover>
<mml:mrow>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mn>91</mml:mn>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mstyle>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>91</mml:mn>
</mml:mrow>
</mml:munderover>
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mstyle>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Where, TP is the tally of veracious positive instances, FP the tally of fallacious positive instances, and FN the tally of fallacious negative instances. <italic>N</italic> encapsulates the aggregate number of categories. <inline-formula>
<mml:math display="inline" id="im8">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the mean precision for the i-th category at an IoU threshold of precisely 0.5. <inline-formula>
<mml:math display="inline" id="im9">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> pertains to the mean precision for the i-th category at an IoU threshold incrementing from 0.5 by increments of 0.05 for each successive j, ranging up to 0.95. The term &#x2018;91&#x2019; embodies the methodical computation of AP across this continuum, spaced into 91 uniform intervals for a meticulous assessment of AP.</p>
</sec>
</sec>
<sec id="s4" sec-type="results">
<label>4</label>
<title>Results and discussion</title>
<sec id="s4_1">
<label>4.1</label>
<title>Experimental environment</title>
<p>The experimental procedures described herein were undertaken within a Windows 11 environment, leveraging the PyTorch deep learning framework at version 2.0.1, with Python 3.9 serving as the programming language of choice and PyCharm acting as the IDE for coding endeavors. The computational experiments were powered by an Intel Core i5-13500h CPU, complemented by 16 GB of system memory. The GPU designated for this research is the NVIDIA GeForce RTX 4050, endowed with 6 GB of graphics memory and 2560 CUDA cores for parallel processing capabilities. To ensure the reliability of our model, we adopted a consistent set of hyperparameters for all training runs. The hyperparameters for model training were sourced from <ext-link ext-link-type="uri" xlink:href="https://github.com/THU-MIG/yolov10/blob/main/ultralytics/cfg/default.yaml">https://github.com/THU-MIG/yolov10/blob/main/ultralytics/cfg/default.yaml</ext-link>. The specific values are summarized in the <xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Model training hyperparameter values.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Hyperparameter</th>
<th valign="top" align="left">Value</th>
<th valign="top" align="left">Description</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">lr0</td>
<td valign="top" align="left">0.01</td>
<td valign="top" align="left">Initial learning rate</td>
</tr>
<tr>
<td valign="top" align="left">lrf</td>
<td valign="top" align="left">0.01</td>
<td valign="top" align="left">Final learning rate (lr0 * lrf)</td>
</tr>
<tr>
<td valign="top" align="left">momentum</td>
<td valign="top" align="left">0.937</td>
<td valign="top" align="left">SGD momentum/Adam beta1</td>
</tr>
<tr>
<td valign="top" align="left">weight_decay</td>
<td valign="top" align="left">0.0005</td>
<td valign="top" align="left">Optimizer weight decay</td>
</tr>
<tr>
<td valign="top" align="left">warmup_epochs</td>
<td valign="top" align="left">3</td>
<td valign="top" align="left">Warmup epochs (fractions ok)</td>
</tr>
<tr>
<td valign="top" align="left">warmup_momentum</td>
<td valign="top" align="left">0.8</td>
<td valign="top" align="left">Warmup initial momentum</td>
</tr>
<tr>
<td valign="top" align="left">warmup_bias_lr</td>
<td valign="top" align="left">0.1</td>
<td valign="top" align="left">Warmup initial bias lr</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s4_2">
<label>4.2</label>
<title>Evaluation of the C2f-DCNv3 integration at distinct phases</title>
<p>In order to better evaluate the impact of C2f-DCNv3 on different parts of the model, this study utilizes C2f-DCNv3 to replace the C2f module in the backbone network and necking network, respectively, in order to enhance the performance of the model. The outcomes from integrating C2f-DCNv3 at these distinct phases are delineated in <xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref>.</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>The accuracy of the model for different stages of applying C2f-DCnv3.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" colspan="2" align="center">Model</th>
<th valign="top" align="center">P</th>
<th valign="top" align="center">R</th>
<th valign="top" align="center">mAP50</th>
<th valign="top" align="center">mAP50-95</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="4" align="center">YOLOv10n</td>
<td valign="middle" align="center">All</td>
<td valign="middle" align="center">0.939</td>
<td valign="middle" align="center">0.968</td>
<td valign="middle" align="center">0.984</td>
<td valign="middle" align="center">0.962</td>
</tr>
<tr>
<td valign="middle" align="center">Over-Mature</td>
<td valign="middle" align="center">0.905</td>
<td valign="middle" align="center">0.949</td>
<td valign="middle" align="center">0.975</td>
<td valign="middle" align="center">0.946</td>
</tr>
<tr>
<td valign="middle" align="center">Mature</td>
<td valign="middle" align="center">0.923</td>
<td valign="middle" align="center">0.966</td>
<td valign="middle" align="center">0.984</td>
<td valign="middle" align="center">0.972</td>
</tr>
<tr>
<td valign="middle" align="center">Immature</td>
<td valign="middle" align="center">0.991</td>
<td valign="middle" align="center">0.989</td>
<td valign="middle" align="center">0.995</td>
<td valign="middle" align="center">0.968</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="center">YOLOv10n+C2f-DCNv3(backbone)</td>
<td valign="middle" align="center">All</td>
<td valign="middle" align="center">0.97</td>
<td valign="middle" align="center">0.974</td>
<td valign="middle" align="center">0.991</td>
<td valign="middle" align="center">0.962</td>
</tr>
<tr>
<td valign="middle" align="center">Over-Mature</td>
<td valign="middle" align="center">0.952</td>
<td valign="middle" align="center">0.96</td>
<td valign="middle" align="center">0.989</td>
<td valign="middle" align="center">0.956</td>
</tr>
<tr>
<td valign="middle" align="center">Mature</td>
<td valign="middle" align="center">0.959</td>
<td valign="middle" align="center">0.971</td>
<td valign="middle" align="center">0.989</td>
<td valign="middle" align="center">0.968</td>
</tr>
<tr>
<td valign="middle" align="center">Immature</td>
<td valign="middle" align="center">0.999</td>
<td valign="middle" align="center">0.991</td>
<td valign="middle" align="center">0.995</td>
<td valign="middle" align="center">0.961</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="center">YOLOv10n+C2f-DCNv3(head)</td>
<td valign="middle" align="center">All</td>
<td valign="middle" align="center">0.973</td>
<td valign="middle" align="center">0.968</td>
<td valign="middle" align="center">0.991</td>
<td valign="middle" align="center">0.972</td>
</tr>
<tr>
<td valign="middle" align="center">Over-Mature</td>
<td valign="middle" align="center">0.96</td>
<td valign="middle" align="center">0.933</td>
<td valign="middle" align="center">0.987</td>
<td valign="middle" align="center">0.963</td>
</tr>
<tr>
<td valign="middle" align="center">Mature</td>
<td valign="middle" align="center">0.967</td>
<td valign="middle" align="center">0.972</td>
<td valign="middle" align="center">0.991</td>
<td valign="middle" align="center">0.979</td>
</tr>
<tr>
<td valign="middle" align="center">Immature</td>
<td valign="middle" align="center">0.991</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">0.995</td>
<td valign="middle" align="center">0.974</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="center">YOLOv10n+C2f-DCNv3</td>
<td valign="middle" align="center">All</td>
<td valign="middle" align="center">0.971</td>
<td valign="middle" align="center">0.967</td>
<td valign="middle" align="center">0.987</td>
<td valign="middle" align="center">0.958</td>
</tr>
<tr>
<td valign="middle" align="center">Over-Mature</td>
<td valign="middle" align="center">0.952</td>
<td valign="middle" align="center">0.934</td>
<td valign="middle" align="center">0.98</td>
<td valign="middle" align="center">0.949</td>
</tr>
<tr>
<td valign="middle" align="center">Mature</td>
<td valign="middle" align="center">0.962</td>
<td valign="middle" align="center">0.966</td>
<td valign="middle" align="center">0.987</td>
<td valign="middle" align="center">0.969</td>
</tr>
<tr>
<td valign="middle" align="center">Immature</td>
<td valign="middle" align="center">0.998</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">0.995</td>
<td valign="middle" align="center">0.957</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>As indicated in <xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref>, the overall model accuracy improved from 0.939 to 0.970, marking a 3.3% increase, when the C2f module in the backbone network was replaced in isolation. The mAP50 metric also saw a slight rise from 0.984 to 0.991, amounting to a 0.7% increase. Notably, within the &#x201c;Immature&#x201d; category, there was a significant leap in accuracy, with mAP50 and mAP50-95 experiencing boosts of 1.5% and 3.3%, respectively. Following the replacement of the neck network, the overall precision was further enhanced to 0.973, a 3.7% increase. The mAP50 metric mirrored the initial rise, while the mAP50-95 improved from 0.962 to 0.972, reflecting a 1.0% increase. Conversely, replacing the C2f modules in both the backbone and neck networks concurrently resulted in an overall precision of 0.971, yet the mAP50-95 dipped slightly to 0.958.</p>
<p>The incorporation of the C2f-DCNv3 module has notably enhanced the YOLOv10 model&#x2019;s performance, particularly within the neck network structure. The C2f-DCNv3&#x2019;s design amalgamates the profound feature extraction capabilities of Convolutional Neural Networks (CNNs) with the adaptability of Deformable Convolutional Networks (DCNs), thus enabling the model to adeptly adjust to the variability in target shapes and spatial configurations. Acting as a conduit between the backbone and detection head, the neck network&#x2019;s efficacy is pivotal to the detection precision. Replacing the C2f module with C2f-DCNv3 in the neck network has bolstered the model&#x2019;s target recognition by enriching feature representation. However, the decline in mAP50-95 when both networks are updated with C2f-DCNv3 could be attributed to potential issues. It may stem from overfitting due to heightened model complexity, especially with limited data. Alternatively, suboptimal feature integration strategies between the backbone and neck networks could lead to information loss or redundancy.</p>
<p>In this research, the strategy of replacing the C2f module in the neck network with C2f-DCNv3 has been selected from the outcomes of employing C2f-DCNv3 at various stages, as it demonstrated the most substantial benefit in enhancing model performance. Consequently, the C2f-DCNv3 module is chosen to replace the C2f module in the neck network to augment the model&#x2019;s capabilities.</p>
</sec>
<sec id="s4_3">
<label>4.3</label>
<title>Model results with attention mechanisms added at different stages</title>
<p>In this research, we have made significant improvements to the YOLOv10n object detection model by incorporating the ELA (Efficient Layer-wise Attention) module to enhance the precision and efficiency of tobacco leaf maturation identification. Initially, we substituted the PSA (Pointwise Spatial Attention) mechanism in YOLOv10n with the ELA, creating the YOLOv10n+ELA1 model. Subsequently, we introduced an additional ELA module at the juncture of the backbone and neck networks within the YOLOv10n+ELA model to potentially elevate the model&#x2019;s performance further. The precision of models with attention mechanisms modified at various stages is detailed in <xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref>.</p>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Accuracy of the improved model for different stages of the attention mechanism.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" colspan="2" align="center">Model</th>
<th valign="middle" align="center">P</th>
<th valign="middle" align="center">R</th>
<th valign="middle" align="center">mAP50</th>
<th valign="middle" align="center">mAP50-95</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="4" align="center">YOLOv10n</td>
<td valign="middle" align="center">All</td>
<td valign="middle" align="center">0.939</td>
<td valign="middle" align="center">0.968</td>
<td valign="middle" align="center">0.984</td>
<td valign="middle" align="center">0.962</td>
</tr>
<tr>
<td valign="middle" align="center">Over-Mature</td>
<td valign="middle" align="center">0.905</td>
<td valign="middle" align="center">0.949</td>
<td valign="middle" align="center">0.975</td>
<td valign="middle" align="center">0.946</td>
</tr>
<tr>
<td valign="middle" align="center">Mature</td>
<td valign="middle" align="center">0.923</td>
<td valign="middle" align="center">0.966</td>
<td valign="middle" align="center">0.984</td>
<td valign="middle" align="center">0.972</td>
</tr>
<tr>
<td valign="middle" align="center">Immature</td>
<td valign="middle" align="center">0.991</td>
<td valign="middle" align="center">0.989</td>
<td valign="middle" align="center">0.995</td>
<td valign="middle" align="center">0.968</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="center">YOLOv10n+ELA1</td>
<td valign="middle" align="center">All</td>
<td valign="middle" align="center">0.964</td>
<td valign="middle" align="center">0.971</td>
<td valign="middle" align="center">0.986</td>
<td valign="middle" align="center">0.965</td>
</tr>
<tr>
<td valign="middle" align="center">Over-Mature</td>
<td valign="middle" align="center">0.95</td>
<td valign="middle" align="center">0.967</td>
<td valign="middle" align="center">0.983</td>
<td valign="middle" align="center">0.958</td>
</tr>
<tr>
<td valign="middle" align="center">Mature</td>
<td valign="middle" align="center">0.951</td>
<td valign="middle" align="center">0.95</td>
<td valign="middle" align="center">0.981</td>
<td valign="middle" align="center">0.97</td>
</tr>
<tr>
<td valign="middle" align="center">Immature</td>
<td valign="middle" align="center">0.991</td>
<td valign="middle" align="center">0.998</td>
<td valign="middle" align="center">0.995</td>
<td valign="middle" align="center">0.966</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="center">YOLOv10n+ELA</td>
<td valign="middle" align="center">All</td>
<td valign="middle" align="center">0.972</td>
<td valign="middle" align="center">0.97</td>
<td valign="middle" align="center">0.992</td>
<td valign="middle" align="center">0.966</td>
</tr>
<tr>
<td valign="middle" align="center">Over-Mature</td>
<td valign="middle" align="center">0.95</td>
<td valign="middle" align="center">0.96</td>
<td valign="middle" align="center">0.989</td>
<td valign="middle" align="center">0.96</td>
</tr>
<tr>
<td valign="middle" align="center">Mature</td>
<td valign="middle" align="center">0.975</td>
<td valign="middle" align="center">0.965</td>
<td valign="middle" align="center">0.992</td>
<td valign="middle" align="center">0.976</td>
</tr>
<tr>
<td valign="middle" align="center">Immature</td>
<td valign="middle" align="center">0.991</td>
<td valign="middle" align="center">0.986</td>
<td valign="middle" align="center">0.995</td>
<td valign="middle" align="center">0.961</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>From <xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref>, it is clear that the enhanced model has shown significant performance improvements across all maturity categories. Specifically, for the &#x201c;Over-Mature&#x201d; category, the YOLOv10n+ELA1 model&#x2019;s accuracy has increased from 0.905 to 0.95, and the mAP50 has improved from 0.975 to 0.983. In the &#x201c;Immature&#x201d; category, both accuracy and mAP50 have reached 0.991 and 0.995, respectively, demonstrating an exceptionally high recognition rate. Moreover, the YOLOv10n+ELA model has achieved an overall precision and mAP50 of 0.972 and 0.992 for the &#x201c;All&#x201d; categories, which is a 3.3% and 0.8% increase compared to the original YOLOv10n model.</p>
<p>The incorporation of the ELA module has notably bolstered the model&#x2019;s capability to capture features indicative of tobacco leaf maturity. The ELA&#x2019;s design, leveraging inter-layer attention mechanisms, effectively enhances the interconnectivity of feature maps, thus improving the model&#x2019;s differentiation between tobacco leaves of varying maturities. Additionally, by incorporating ELA at the interface of the backbone and neck networks, we have further strengthened the conveyance and integration of features, enabling the model to sustain high recognition accuracy even when dealing with images of tobacco leaves against complex backgrounds and under diverse lighting conditions.</p>
<p>However, we have also noted a decrease in mAP50-95 for the &#x201c;Over-Mature&#x201d; category in the YOLOv10n+ELA1 model compared to the original model. This may indicate that the model&#x2019;s ability to recognize extreme cases of tobacco leaf maturity has been somewhat compromised during the enhancement process. This could be attributed to the introduction of the attention mechanism, which may have altered the distribution of features, potentially diminishing the model&#x2019;s generalization capabilities in certain scenarios.</p>
</sec>
<sec id="s4_4">
<label>4.4</label>
<title>Enhanced YOLOv10 model results through multi-stage fusion improvements</title>
<p>In this research, a comprehensive set of enhancements has been strategically applied to substantially elevate the performance of the YOLOv10 model. These improvements encompass the innovative replacement of the C2f module with the C2f-DCNv3 within the neck structure, alongside the sophisticated transition from the PSA (Pointwise Spatial Attention) mechanism to the ELA (Efficient Local Attention) mechanism within the backbone network. The seamless integration of an additional ELA attention mechanism at the interface of the backbone and neck networks has culminated in the development of a model that excels in the sophisticated recognition of tobacco leaf maturity. The model accuracy of the multi-stage improved fusion is shown in <xref ref-type="table" rid="T4">
<bold>Table&#xa0;4</bold>
</xref>.</p>
<table-wrap id="T4" position="float">
<label>Table&#xa0;4</label>
<caption>
<p>Model accuracy for multi-stage improved fusion.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" colspan="2" align="center">Model</th>
<th valign="middle" align="center">P</th>
<th valign="middle" align="center">R</th>
<th valign="middle" align="center">mAP50</th>
<th valign="middle" align="center">mAP50-95</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="4" align="center">YOLOv10n</td>
<td valign="middle" align="center">All</td>
<td valign="middle" align="center">0.939</td>
<td valign="middle" align="center">0.968</td>
<td valign="middle" align="center">0.984</td>
<td valign="middle" align="center">0.962</td>
</tr>
<tr>
<td valign="middle" align="center">Over-Mature</td>
<td valign="middle" align="center">0.905</td>
<td valign="middle" align="center">0.949</td>
<td valign="middle" align="center">0.975</td>
<td valign="middle" align="center">0.946</td>
</tr>
<tr>
<td valign="middle" align="center">Mature</td>
<td valign="middle" align="center">0.923</td>
<td valign="middle" align="center">0.966</td>
<td valign="middle" align="center">0.984</td>
<td valign="middle" align="center">0.972</td>
</tr>
<tr>
<td valign="middle" align="center">Immature</td>
<td valign="middle" align="center">0.991</td>
<td valign="middle" align="center">0.989</td>
<td valign="middle" align="center">0.995</td>
<td valign="middle" align="center">0.968</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="center">ours</td>
<td valign="middle" align="center">All</td>
<td valign="middle" align="center">0.973</td>
<td valign="middle" align="center">0.984</td>
<td valign="middle" align="center">0.994</td>
<td valign="middle" align="center">0.973</td>
</tr>
<tr>
<td valign="middle" align="center">Over-Mature</td>
<td valign="middle" align="center">0.973</td>
<td valign="middle" align="center">0.969</td>
<td valign="middle" align="center">0.991</td>
<td valign="middle" align="center">0.97</td>
</tr>
<tr>
<td valign="middle" align="center">Mature</td>
<td valign="middle" align="center">0.97</td>
<td valign="middle" align="center">0.992</td>
<td valign="middle" align="center">0.995</td>
<td valign="middle" align="center">0.981</td>
</tr>
<tr>
<td valign="middle" align="center">Immature</td>
<td valign="middle" align="center">0.975</td>
<td valign="middle" align="center">0.991</td>
<td valign="middle" align="center">0.995</td>
<td valign="middle" align="center">0.968</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>As demonstrated in <xref ref-type="table" rid="T4">
<bold>Table&#xa0;4</bold>
</xref>, the enhanced model from this study surpasses the original YOLOv10n model in multiple indicators. In general, the precision (P) of our model across all categories has seen a rise from 0.939 to 0.973, which is a 3.4% increase; the recall (R) has also seen an improvement, increasing from 0.968 to 0.984, a 1.6% increase. The Mean Average Precision at 50% intersection over union (mAP50) has increased from 0.984 to 0.994, a 1.0% improvement; and the mAP50-95 has also shown an increase, moving from 0.962 to 0.973, a 1.1% increase.</p>
<p>In the granularity of specific categories, our model exhibits considerable improvement within the &#x201c;Over-Mature&#x201d; classification, with accuracy escalating from 0.905 to 0.973, reflecting a 6.8% enhancement; the recall rate has also witnessed an uptick from 0.949 to 0.969, a 2.0% gain; mAP50 has seen a boost from 0.975 to 0.991, a 1.6% advancement; and mAP50-95 has climbed from 0.946 to 0.970, a 2.4% escalation. Within the &#x201c;Mature&#x201d; classification, accuracy has surged from 0.923 to 0.970, amounting to a 4.7% enhancement; the recall rate has spiked from 0.966 to 0.992, a 2.6% augmentation; mAP50 has risen from 0.984 to 0.995, a 1.1% increment; and mAP50-95 has inched up from 0.972 to 0.981, a 0.9% increase. For the &#x201c;Immature&#x201d; classification, accuracy has slightly edged from 0.991 to 0.975; the recall rate has marginally improved from 0.989 to 0.991, a 0.2% increment; mAP50 has sustained its level at 0.995; and mAP50-95 has maintained its steadiness at 0.968.</p>
<p>The ELA demonstrates excellent performance in terms of computational efficiency and the enhancement of model capabilities. By adeptly capturing local features and providing advanced feature representation, ELA markedly boosts the model&#x2019;s precision and generalization ability. Its primary strengths are the efficient capture of local features, optimization of channel dimensions, and a simplified structure, circumventing the redundancy and increased computational complexity inherent in global feature extraction. These attributes render ELA especially fitting for compact models and real-time applications, thus augmenting overall computational efficiency.</p>
<p>The C2f-DCNv3 module, a fusion of DCNv3 and the C2f module, strengthens the model&#x2019;s adaptability to varied shape changes and spatial configurations. It leverages the adaptability of DCNv3 and the profound feature extraction capabilities of convolutional neural networks to further refine the model&#x2019;s detection precision and robustness. The integration of the C2f-DCNv3 module into the neck network facilitates superior integration of multi-scale features, enhancing the accuracy of target recognition. Additionally, employing the ELA attention mechanism in conjunction with the C2f-DCNv3 module not only enhances detection precision but also bolsters the model&#x2019;s robustness and generalization capabilities.</p>
</sec>
<sec id="s4_5">
<label>4.5</label>
<title>The results of the model comparison experiment</title>
<p>To better demonstrate the capabilities of our model, this study compared it against four existing YOLO series models (specifically, YOLOv5n, YOLOv6n, YOLOv8n, and YOLOv10n). The comparative accuracy of these models is detailed in <xref ref-type="table" rid="T5">
<bold>Table&#xa0;5</bold>
</xref>. The results of tobacco maturity detection for different models are shown in <xref ref-type="fig" rid="f7">
<bold>Figure&#xa0;7</bold>
</xref>.</p>
<table-wrap id="T5" position="float">
<label>Table&#xa0;5</label>
<caption>
<p>The experimental results of different models.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" colspan="2" align="center">Model</th>
<th valign="middle" align="center">P</th>
<th valign="middle" align="center">R</th>
<th valign="middle" align="center">mAP50</th>
<th valign="middle" align="center">mAP50-95</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="4" align="center">YOLOv5n</td>
<td valign="middle" align="center">All</td>
<td valign="middle" align="center">0.919</td>
<td valign="middle" align="center">0.966</td>
<td valign="middle" align="center">0.983</td>
<td valign="middle" align="center">0.933</td>
</tr>
<tr>
<td valign="middle" align="center">Over-Mature</td>
<td valign="middle" align="center">0.871</td>
<td valign="middle" align="center">0.948</td>
<td valign="middle" align="center">0.978</td>
<td valign="middle" align="center">0.932</td>
</tr>
<tr>
<td valign="middle" align="center">Mature</td>
<td valign="middle" align="center">0.898</td>
<td valign="middle" align="center">0.95</td>
<td valign="middle" align="center">0.975</td>
<td valign="middle" align="center">0.934</td>
</tr>
<tr>
<td valign="middle" align="center">Immature</td>
<td valign="middle" align="center">0.988</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">0.995</td>
<td valign="middle" align="center">0.934</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="center">YOLOv6n</td>
<td valign="middle" align="center">All</td>
<td valign="middle" align="center">0.928</td>
<td valign="middle" align="center">0.909</td>
<td valign="middle" align="center">0.962</td>
<td valign="middle" align="center">0.931</td>
</tr>
<tr>
<td valign="middle" align="center">Over-Mature</td>
<td valign="middle" align="center">0.904</td>
<td valign="middle" align="center">0.867</td>
<td valign="middle" align="center">0.942</td>
<td valign="middle" align="center">0.92</td>
</tr>
<tr>
<td valign="middle" align="center">Mature</td>
<td valign="middle" align="center">0.887</td>
<td valign="middle" align="center">0.861</td>
<td valign="middle" align="center">0.948</td>
<td valign="middle" align="center">0.924</td>
</tr>
<tr>
<td valign="middle" align="center">Immature</td>
<td valign="middle" align="center">0.993</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">0.995</td>
<td valign="middle" align="center">0.95</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="center">YOLOv8n</td>
<td valign="middle" align="center">All</td>
<td valign="middle" align="center">0.949</td>
<td valign="middle" align="center">0.948</td>
<td valign="middle" align="center">0.984</td>
<td valign="middle" align="center">0.951</td>
</tr>
<tr>
<td valign="middle" align="center">Over-Mature</td>
<td valign="middle" align="center">0.951</td>
<td valign="middle" align="center">0.903</td>
<td valign="middle" align="center">0.978</td>
<td valign="middle" align="center">0.945</td>
</tr>
<tr>
<td valign="middle" align="center">Mature</td>
<td valign="middle" align="center">0.903</td>
<td valign="middle" align="center">0.942</td>
<td valign="middle" align="center">0.978</td>
<td valign="middle" align="center">0.951</td>
</tr>
<tr>
<td valign="middle" align="center">Immature</td>
<td valign="middle" align="center">0.992</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">0.995</td>
<td valign="middle" align="center">0.958</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="center">YOLOv10n</td>
<td valign="middle" align="center">All</td>
<td valign="middle" align="center">0.939</td>
<td valign="middle" align="center">0.968</td>
<td valign="middle" align="center">0.984</td>
<td valign="middle" align="center">0.962</td>
</tr>
<tr>
<td valign="middle" align="center">Over-Mature</td>
<td valign="middle" align="center">0.905</td>
<td valign="middle" align="center">0.949</td>
<td valign="middle" align="center">0.975</td>
<td valign="middle" align="center">0.946</td>
</tr>
<tr>
<td valign="middle" align="center">Mature</td>
<td valign="middle" align="center">0.923</td>
<td valign="middle" align="center">0.966</td>
<td valign="middle" align="center">0.984</td>
<td valign="middle" align="center">0.972</td>
</tr>
<tr>
<td valign="middle" align="center">Immature</td>
<td valign="middle" align="center">0.991</td>
<td valign="middle" align="center">0.989</td>
<td valign="middle" align="center">0.995</td>
<td valign="middle" align="center">0.968</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="center">ours</td>
<td valign="middle" align="center">All</td>
<td valign="middle" align="center">0.973</td>
<td valign="middle" align="center">0.984</td>
<td valign="middle" align="center">0.994</td>
<td valign="middle" align="center">0.973</td>
</tr>
<tr>
<td valign="middle" align="center">Over-Mature</td>
<td valign="middle" align="center">0.973</td>
<td valign="middle" align="center">0.969</td>
<td valign="middle" align="center">0.991</td>
<td valign="middle" align="center">0.97</td>
</tr>
<tr>
<td valign="middle" align="center">Mature</td>
<td valign="middle" align="center">0.97</td>
<td valign="middle" align="center">0.992</td>
<td valign="middle" align="center">0.995</td>
<td valign="middle" align="center">0.981</td>
</tr>
<tr>
<td valign="middle" align="center">Immature</td>
<td valign="middle" align="center">0.975</td>
<td valign="middle" align="center">0.991</td>
<td valign="middle" align="center">0.995</td>
<td valign="middle" align="center">0.968</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="f7" position="float">
<label>Figure&#xa0;7</label>
<caption>
<p>Tobacco maturity detection results.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1474207-g007.tif"/>
</fig>
<p>As depicted in <xref ref-type="table" rid="T5">
<bold>Table&#xa0;5</bold>
</xref>, our model exhibited superior performance in the task of recognizing the maturity of tobacco leaves. Specifically, it achieved a P of 0.973, a R of 0.984, and mAP50 and mAP50-95 of 0.994 and 0.973, respectively, indicating exceptionally high detection accuracy. Within the subcategories representing different stages of maturity, our model continued to excel, maintaining mAP50-95 values above 0.968 for Over-Mature, Mature, and Immature categories, thereby highlighting the model&#x2019;s high accuracy and robustness in identifying tobacco leaves at various stages of maturity.</p>
<p>In the horizontal analysis of the tobacco leaf maturity recognition models, our model demonstrated significant superiority across all four key performance metrics for all categories. For instance, when compared to the YOLOv10n model, our model showed improvements of 3.4% in precision, 1.6% in recall, 0.10% in mAP50, and 1.1% in mAP50-95. The performance gains were even more pronounced when compared to the YOLOv5n model, with increases of 5.4%, 1.8%, 1.1%, and 4.0% in these metrics, respectively. Similarly, when compared to the YOLOv6n model, our model&#x2019;s improvements were 4.5% in precision, 7.5% in recall, 3.2% in mAP50, and 4.2% in mAP50-95. Although the YOLOv5n and YOLOv6n models showed good performance in certain metrics&#x2014;YOLOv5n, for example, achieved an mAP50 of 0.995 for the immature category&#x2014;our model overall exhibited a more outstanding comprehensive performance across all categories. While the YOLOv8n model was comparable to ours in some subcategories, such as a mAP50-95 of 0.945 for the Over-Mature category, our model showed higher consistency and stability across all maturity categories.</p>
<p>To visually represent the performance of the models, a confusion matrix was employed to directly illustrate the detection capabilities. As shown in <xref ref-type="fig" rid="f8">
<bold>Figure&#xa0;8</bold>
</xref>, our model had the fewest misclassifications across the three maturity stages, followed by YOLOv10n, which had a lower total number of misclassifications. The YOLOv6n model performed the poorest, with the highest total number of misclassifications across all categories.</p>
<fig id="f8" position="float">
<label>Figure&#xa0;8</label>
<caption>
<p>The confusion matrix of the detection results of different models.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1474207-g008.tif"/>
</fig>
<p>To provide a more comprehensive evaluation of the model, this study employs the PR curve to assess the overall performance of the model in terms of recall and precision. The PR curves for different models are illustrated in <xref ref-type="fig" rid="f9">
<bold>Figure&#xa0;9</bold>
</xref>.</p>
<fig id="f9" position="float">
<label>Figure&#xa0;9</label>
<caption>
<p>Precision-Recall graphs for different models.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1474207-g009.tif"/>
</fig>
<p>As shown in <xref ref-type="fig" rid="f9">
<bold>Figure&#xa0;9</bold>
</xref>, the ours model achieves a mAP50 of 0.994 at all categories, significantly surpassing other models. Specifically, YOLOv10n reaches a mAP50 of 0.984, while YOLOv5n and YOLOv8n achieve a mAP50 of 0.983 and 0.962, respectively. This indicates that the ours model has a distinct advantage in precision and recall, particularly maintaining a high level of precision in the high-recall region. Furthermore, the ours model also demonstrates outstanding performance in specific categories, achieving a mAP of 0.942 in the Over-Mature category, compared to YOLOv10n&#x2019;s 0.975, suggesting that the ours model is slightly less effective in this category. However, in the immature and Over-Mature categories, the ours model achieves an mAP of 0.995 at a threshold of 0.5, showcasing its robust performance in these areas. Overall, the ours model exhibits excellent performance across multiple evaluation metrics, particularly with its overall performance of 0.994 mAP at 0.5, which is markedly higher than that of other models, underscoring its exceptional capabilities and potential in object detection tasks.</p>
<p>In conclusion, our model offers an efficient and precise solution in the domain of tobacco leaf maturity recognition. Its exceptional performance in key performance metrics, coupled with its clear advantages over existing models, underscores its significant potential for practical applications in agriculture. Future efforts will focus on further optimizing the model to minimize computational resource consumption and exploring its applicability in a broader range of agricultural monitoring tasks.</p>
</sec>
</sec>
<sec id="s5" sec-type="conclusions">
<label>5</label>
<title>Conclusion</title>
<p>This research successfully developed a lightweight and efficient model for detecting the maturity of tobacco leaves by integrating DCNv3 to enhance the neck network of the YOLOv10 algorithm. We managed to optimize the model&#x2019;s architecture without sacrificing detection precision, resulting in a reduction of parameter count and computational complexity. The experimental outcomes indicate that the application of the C2f-DCNv3 module in the backbone network elevated the overall precision from 0.939 to 0.970, and the mAP50 score from 0.984 to 0.991. Subsequent integration of the C2f-DCNv3 in the neck network achieved an overall precision of 0.973, with the mAP50 score sustained at 0.991, and a notable improvement in mAP50-95 from 0.962 to 0.972. Moreover, the incorporation of the ELA attention mechanism led to a significant boost in precision and mAP50 for the &#x201c;Over-Mature&#x201d; category, and an overall enhancement in the model&#x2019;s performance across &#x201c;All&#x201d; categories, with accuracy and mAP50 scores increasing to 0.972 and 0.992, respectively. This study offers the tobacco industry a potent detection tool that can enhance the precision and efficiency of tobacco leaf harvesting, which is instrumental for improving tobacco leaf quality and the economic returns of tobacco farmers. Future endeavors will concentrate on further refining the model&#x2019;s architecture to bolster its generalization capabilities and on investigating its practical application in field settings to ensure wider real-world utility and contribute to the sustainable growth of the tobacco industry.</p>
</sec>
</body>
<back>
<sec id="s6" sec-type="data-availability">
<title>Data availability statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation.</p>
</sec>
<sec id="s7" sec-type="author-contributions">
<title>Author contributions</title>
<p>YS: Conceptualization, Formal analysis, Resources, Supervision, Validation, Writing &#x2013; review &amp; editing. HoW: Investigation, Validation, Writing &#x2013; review &amp; editing. SQ: Conceptualization, Methodology, Software, Writing &#x2013; original draft. JL: Data curation, Writing &#x2013; review &amp; editing. LZ: Funding acquisition, Project administration, Validation, Writing &#x2013; review &amp; editing. HuW: Visualization, Writing &#x2013; review &amp; editing. FZ: Data curation, Formal analysis, Writing &#x2013; review &amp; editing. QC: Resources, Validation, Writing &#x2013; review &amp; editing. FW: Formal analysis, Resources, Writing &#x2013; review &amp; editing. YW: Formal analysis, Supervision, Writing &#x2013; review &amp; editing.</p>
</sec>
<sec id="s8" sec-type="funding-information">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research, authorship, and/or publication of this article. This work was supported by the China Tobacco Corporation Henan Province company innovation project (No. 2024410000240026).</p>
</sec>
<ack>
<title>Acknowledgments</title>
<p>We are very grateful to Luoning County, Henan Province, for supporting our data collection in the tobacco field.</p>
</ack>
<sec id="s9" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>HoW, HuW, FZ and QC was employed by Henan Province Tobacco Company, Luoyang Company. JL was employed by Henan Province Tobacco Company.</p>
<p>The remaining authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s10" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bacea</surname> <given-names>D.-S.</given-names>
</name>
<name>
<surname>Oniga</surname> <given-names>F.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Single stage architecture for improved accuracy real-time object detection on mobile devices</article-title>. <source>Image Vision Computing</source> <volume>130</volume>, <fpage>104613</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.imavis.2022.104613</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Be&#x107;</surname> <given-names>K. B.</given-names>
</name>
<name>
<surname>Grabska</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Huck</surname> <given-names>C. W.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Principles and applications of miniaturized near-infrared (NIR) spectrometers</article-title>. <source>Chemistry&#x2013;A Eur. J.</source> <volume>27</volume>, <fpage>1514</fpage>&#x2013;<lpage>1532</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/chem.202002838</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Biradar</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Hosalli</surname> <given-names>G.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Segmentation and detection of crop pests using novel U-Net with hybrid deep learning mechanism</article-title>. <source>Pest Manage. Sci</source>. <volume>80</volume> (<issue>8</issue>), <page-range>3795&#x2013;807</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/ps.v80.8</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cai</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Yin</surname> <given-names>Q.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>Study on the quantitative relationship between maturity and quality of tobacco leaf</article-title>. <source>Acta Tabacaria Sin.</source> <volume>11</volume>, <fpage>42</fpage>&#x2013;<lpage>46</lpage>.</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cakir</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Cebi</surname> <given-names>U.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>The effect of irrigation scheduling and water stress on the maturity and chemical composition of Virginia tobacco leaf</article-title>. <source>Field Crops Res.</source> <volume>119</volume>, <fpage>269</fpage>&#x2013;<lpage>276</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.fcr.2010.07.017</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Bin</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Kang</surname> <given-names>C.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Application of machine vision and convolutional neural networks in discriminating tobacco leaf maturity on mobile devices</article-title>. <source>Smart Agric. Technol.</source> <volume>5</volume>, <fpage>100322</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.atech.2023.100322</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Bin</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zou</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Ding</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Discrimination of fresh tobacco leaves with different maturity levels by near-infrared (NIR) spectroscopy and deep learning</article-title>. <source>J. Analytical Methods Chem</source>. 9912589, <fpage>11</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1155/2021/9912589</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cheng</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Xiao</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Ren</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Enhancing remote sensing object detection with K-CBST YOLO: integrating CBAM and swin-transformer</article-title>. <source>Remote Sens.</source> <volume>16</volume>, <fpage>2885</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/rs16162885</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Du</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Overview of two-stage object detection algorithms</article-title>. <source>J. Physics: Conf. Ser.</source> <volume>1544</volume>, <fpage>012033</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1088/1742-6596/1544/1/012033</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Du</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Xi</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Ding</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Qiu</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Lv</surname> <given-names>Y.</given-names>
</name>
<etal/>
</person-group>. (<year>2025</year>). <article-title>Motor imagery EEG signal classification based on deformable convolution v3 and adaptive spatial attention mechanism</article-title>. <source>Biomed. Signal Process. Control</source> <volume>99</volume>, <fpage>106905</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.bspc.2024.106905</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>He</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Pang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>W.</given-names>
</name>
<etal/>
</person-group>. (<year>2024</year>). &#x201c;<article-title>The survey of one-stage anchor-free real-time object detection algorithms</article-title>,&#x201d; in <conf-name>Sixth Conference on Frontiers in Optical Imaging and Technology: Imaging Detection and Target Recognition</conf-name>. <fpage>1315602</fpage> (<publisher-name>SPIE</publisher-name>).</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>He</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>C.</given-names>
</name>
<name>
<surname>He</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Shi</surname> <given-names>B.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Pyramid feature fusion through shifted window self-attention for tobacco leaf classification</article-title>. <source>Expert Syst. Appl.</source> <volume>230</volume>, <fpage>120601</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.eswa.2023.120601</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hussain</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>YOLO-v1 to YOLO-v8, the rise of YOLO and its complementary nature toward digital manufacturing and industrial defect detection</article-title>. <source>Machines</source> <volume>11</volume>, <fpage>677</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/machines11070677</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hussain</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Pu</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>D.-W.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Innovative nondestructive imaging techniques for ripening and maturity of fruits&#x2013;a review of recent applications</article-title>. <source>Trends Food Sci. Technol.</source> <volume>72</volume>, <fpage>144</fpage>&#x2013;<lpage>152</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.tifs.2017.12.010</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jia</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Niu</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Niu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Ma</surname> <given-names>W.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>A new efficient multi-object detection and size calculation for blended tobacco shreds using an improved YOLOv7 network and LWC algorithm</article-title>. <source>Sensors</source> <volume>23</volume>, <fpage>8380</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/s23208380</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kang</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>C.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Fast implementation of real-time fruit detection in apple orchards using deep learning</article-title>. <source>Comput. Electron. Agric.</source> <volume>168</volume>, <fpage>105108</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2019.105108</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Kays</surname> <given-names>S. J.</given-names>
</name>
</person-group> (<year>2011</year>). <source>Cultivated vegetables of the world: a multilingual onomasticon</source> (<publisher-loc>Wageningen</publisher-loc>: <publisher-name>Wageningen Academic Publishers</publisher-name>), <fpage>828</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3920/978-90-8686-720-2</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>LeCun</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Bengio</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Hinton</surname> <given-names>G.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Deep learning</article-title>. <source>nature</source> <volume>521</volume>, <fpage>436</fpage>&#x2013;<lpage>444</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/nature14539</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Miao</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Jiang</surname> <given-names>Z.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>An improved lightweight network architecture for identifying tobacco leaf maturity based on Deep learning</article-title>. <source>J. Intelligent Fuzzy Syst.</source> <volume>41</volume>, <fpage>4149</fpage>&#x2013;<lpage>4158</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3233/JIFS-210640</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Anguelov</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Erhan</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Szegedy</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Reed</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Fu</surname> <given-names>C.-Y.</given-names>
</name>
<etal/>
</person-group>. (<year>2016</year>). &#x201c;<article-title>Ssd: Single shot multibox detector</article-title>,&#x201d; in <conf-name>Computer Vision&#x2013;ECCV 2016: 14th European Conference</conf-name>, <conf-loc>Amsterdam, The Netherlands</conf-loc>, <conf-date>October 11&#x2013;14, 2016</conf-date>. <fpage>21</fpage>&#x2013;<lpage>37</lpage>.</citation>
</ref>
<ref id="B21">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Long</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Xiaoyu</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Zhigang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Yong</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Application of hyperspectral imaging technology in classification of tobacco leaves and impurities</article-title>,&#x201d; in <conf-name>2019 2nd International Conference on Safety Produce Informatization (IICSPI)</conf-name>, <conf-loc>Chongqing, China</conf-loc>, <fpage>157</fpage>&#x2013;<lpage>160</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/IICSPI48186.2019.9095975</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Qin</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Xie</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>Z.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). <article-title>The application of hyperspectral images in the classification of fresh leaves&#x2019; maturity for flue-curing tobacco</article-title>. <source>Processes</source> <volume>11</volume>, <fpage>1249</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/pr11041249</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Qing</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Qiu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Jin</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Ji</surname> <given-names>J.</given-names>
</name>
<etal/>
</person-group>. (<year>2024</year>). <article-title>Improved YOLO-FastestV2 wheat spike detection model based on a multi-stage attention mechanism with a LightFPN detection head</article-title>. <source>Front. Plant Sci.</source> <volume>15</volume>, <elocation-id>1411510</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2024.1411510</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ren</surname> <given-names>S.</given-names>
</name>
<name>
<surname>He</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Girshick</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Faster R-CNN: Towards real-time object detection with region proposal networks</article-title>. <source>IEEE Trans. Pattern Anal. Mach. Intell.</source> <volume>39</volume>, <fpage>1137</fpage>&#x2013;<lpage>1149</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TPAMI.2016.2577031</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Soviany</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Ionescu</surname> <given-names>R. T.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Optimizing the trade-off between single-stage and two-stage deep object detectors using image difficulty prediction</article-title>,&#x201d; in <conf-name>2018 20th International Symposium on Symbolic and Numeric Algorithms for Scientific Computing (SYNASC)</conf-name>, <conf-loc>Timisoara, Romania</conf-loc>, <fpage>209</fpage>&#x2013;<lpage>214</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/SYNASC.2018.00041</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sun</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Xue</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Qiao</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>A.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>b). <article-title>Cost-effective identification of the field maturity of tobacco leaves based on deep semi-supervised active learning and smartphone photograph</article-title>. <source>Comput. Electron. Agric.</source> <volume>215</volume>, <fpage>108373</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2023.108373</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sun</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Jiang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Kong</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Zhan</surname> <given-names>W.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>a). &#x201c;<article-title>Sparse R-CNN: End-to-End Object Detection with Learnable Proposals</article-title>,&#x201d; in <conf-name>2021 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)</conf-name>, <conf-loc>Nashville, TN, USA</conf-loc>., <fpage>14449</fpage>&#x2013;<lpage>14458</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TPAMI.2023.3292030</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Lin</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Han</surname> <given-names>J.</given-names>
</name>
<etal/>
</person-group>. (<year>2024</year>). <article-title>YOLOv10: real-time end-to-end object detection</article-title>. <source>arXiv preprint arXiv:2405.14458</source>.</citation>
</ref>
<ref id="B29">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Dai</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>X.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). &#x201c;<article-title>Internimage: Exploring large-scale vision foundation models with deformable convolutions</article-title>,&#x201d; in <conf-name>2023 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)</conf-name>, <conf-loc>Vancouver, BC, Canada</conf-loc>, <fpage>14408</fpage>&#x2013;<lpage>14419</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/CVPR52729.2023.01385</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xiong</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Gao</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>N.</given-names>
</name>
<name>
<surname>He</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Tang</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>Y.</given-names>
</name>
<etal/>
</person-group>. (<year>2024</year>). <article-title>DiffuCNN: tobacco disease identification and grading model in low-resolution complex agricultural scenes</article-title>. <source>Agriculture</source> <volume>14</volume>, <fpage>318</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agriculture14020318</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Wan</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>ELA: efficient local attention for deep convolutional neural networks</article-title>. <source>arXiv preprint arXiv:2403.01123</source>.</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yin</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Karangwa</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Song</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Duhoranimana</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Lin</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Cui</surname> <given-names>H.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>Contribution of tobacco composition compounds to characteristic aroma of Chinese faint-scent cigarettes through chromatography analysis and partial least squares regression</article-title>. <source>J. Chromatogr. B</source> <volume>1105</volume>, <fpage>217</fpage>&#x2013;<lpage>227</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jchromb.2018.12.001</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>B.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>a). <article-title>Differential feature awareness network within antagonistic learning for infrared-visible object detection</article-title>. <source>IEEE Trans. Circuits Syst. Video Technol</source>. 3<volume>4</volume> (<issue>8</issue>), <fpage>6735</fpage>&#x2013;<lpage>6748</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TCSVT.2023.3289142</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Tan</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Cao</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Si</surname> <given-names>L.</given-names>
</name>
<etal/>
</person-group>. (<year>2024</year>). <article-title>Part-aware correlation networks for few-shot learning</article-title>. <source>IEEE Trans. Multimedia</source>. <volume>26</volume>, <fpage>9527</fpage>&#x2013;<lpage>9538</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TMM.2024.3394681</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Shi</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Mu</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Deep-IRTarget: An automatic target detector in infrared imagery using dual-domain feature extraction and allocation</article-title>. <source>IEEE Trans. Multimedia</source> <volume>24</volume>, <fpage>1735</fpage>&#x2013;<lpage>1749</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TMM.2021.3070138</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Lu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>X.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>b). <article-title>In-field tobacco leaf maturity detection with an enhanced mobileNetV1: incorporating a feature pyramid network and attention mechanism</article-title>. <source>Sensors</source> <volume>23</volume>, <fpage>5964</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/s23135964</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Taha</surname> <given-names>M. F.</given-names>
</name>
<name>
<surname>He</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Qiu</surname> <given-names>Z.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Development of an automatic pest monitoring system using a deep learning model of DPeNet</article-title>. <source>Measurement</source> <volume>203</volume>, <fpage>111970</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.measurement.2022.111970</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Zhu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Hu</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Lin</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Dai</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Deformable convnets v2: More deformable, better results</article-title>,&#x201d; in <conf-name>2019 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)</conf-name>, <conf-loc>Long Beach, CA, USA</conf-loc>, <fpage>9308</fpage>&#x2013;<lpage>9316</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/CVPR.2019.00953</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>