<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Plant Sci.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Plant Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Plant Sci.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">1664-462X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpls.2025.1730683</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>An improved YOLOv8n model for in-field detection of pests and diseases in pakchoi</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Zhu</surname><given-names>Yi</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Han</surname><given-names>Yanlu</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Yin</surname><given-names>Yilu</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Zhao</surname><given-names>Shuo</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>*</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3250829/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Lan</surname><given-names>Yubin</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Huang</surname><given-names>Danfeng</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/1577010/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>College of Agricultural Engineering and Food Science, Shandong University of Technology</institution>, <city>Zibo</city>,&#xa0;<country country="cn">China</country></aff>
<aff id="aff2"><label>2</label><institution>Institute of Modern Agricultural Equipment, Shandong University of Technology</institution>, <city>Zibo</city>,&#xa0;<country country="cn">China</country></aff>
<aff id="aff3"><label>3</label><institution>Zibo Digital Agriculture and Rural Development Center</institution>, <city>Zibo</city>,&#xa0;<country country="cn">China</country></aff>
<aff id="aff4"><label>4</label><institution>School of Agriculture and Biology, Shanghai Jiao Tong University</institution>, <city>Shanghai</city>,&#xa0;<country country="cn">China</country></aff>
<author-notes>
<corresp id="c001"><label>*</label>Correspondence: Shuo Zhao, <email xlink:href="mailto:zhs0704@sdut.edu.cn">zhs0704@sdut.edu.cn</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-01-22">
<day>22</day>
<month>01</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2025</year>
</pub-date>
<volume>16</volume>
<elocation-id>1730683</elocation-id>
<history>
<date date-type="received">
<day>23</day>
<month>10</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>31</day>
<month>12</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>31</day>
<month>12</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2026 Zhu, Han, Yin, Zhao, Lan and Huang.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Zhu, Han, Yin, Zhao, Lan and Huang</copyright-holder>
<license>
<ali:license_ref start_date="2026-01-22">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<p>As an important leafy vegetable, pakchoi (<italic>Brassica chinensis</italic> L.) frequently suffers from pests and diseases in field environments. These symptoms are often localized on specific leaf regions, resulting in substantial losses in yield and quality. To achieve efficient and accurate detection of pakchoi pests and diseases, this study proposes an improved lightweight object detection model, termed YOLOv8n-DBW, based on the YOLOv8n framework. First, the original C2f module in the backbone network is replaced with a novel C2f-PE module, which integrates Partial Convolution (PConv) and an Efficient Multi-Scale Attention (EMA) mechanism to enhance high-level semantic feature extraction and multi-scale information fusion. Second, a Weighted Bidirectional Feature Pyramid Network (BiFPN) is introduced into the neck network to strengthen multi-scale feature fusion while improving model generalization and lightweight performance. Finally, the original CIoU loss in the regression branch is replaced with the Wise-IoU (Weighted Interpolation of Sequential Evidence for Intersection over Union) bounding box loss function, which improves bounding box regression accuracy and significantly enhances the detection of small and irregular pest and disease targets. Experimental results on a field-collected pakchoi pest and disease dataset demonstrate that the proposed YOLOv8n-DBW model reduces the number of parameters and model size by 33.3% and 31.8%, respectively, while improving precision and mean average precision (mAP) by 5.0% and 7.5% compared with the baseline YOLOv8n model. Overall, the proposed method outperforms several mainstream object detection algorithms and provides an efficient and accurate solution for real-time pakchoi pest and disease detection, showing strong potential for deployment on embedded systems and mobile devices.</p>
</abstract>
<kwd-group>
<kwd>loss function</kwd>
<kwd>object detection</kwd>
<kwd>pakchoi</kwd>
<kwd>pest and disease recognition</kwd>
<kwd>YOLO model</kwd>
</kwd-group>
<funding-group>
<award-group id="gs1">
<funding-source id="sp1">
<institution-wrap>
<institution>National Natural Science Foundation of China</institution>
<institution-id institution-id-type="doi" vocab="open-funder-registry" vocab-identifier="10.13039/open_funder_registry">10.13039/501100001809</institution-id>
</institution-wrap>
</funding-source>
<award-id rid="sp1">32402557</award-id>
</award-group>
<award-group id="gs2">
<funding-source id="sp2">
<institution-wrap>
<institution>Natural Science Foundation of Shandong Province</institution>
<institution-id institution-id-type="doi" vocab="open-funder-registry" vocab-identifier="10.13039/open_funder_registry">10.13039/501100007129</institution-id>
</institution-wrap>
</funding-source>
<award-id rid="sp2">ZR2023QC213</award-id>
</award-group>
<funding-statement>The author(s) declared that financial support was received for this work and/or its publication. This research was funded by the Key R&amp;D Program of Ningxia Hui Autonomous Region (2025BBF1004), the National Natural Science Foundation of China (32402557), and the Natural Science Foundation of Shandong Province (ZR2023QC213).</funding-statement>
</funding-group>
<counts>
<fig-count count="12"/>
<table-count count="9"/>
<equation-count count="12"/>
<ref-count count="54"/>
<page-count count="18"/>
<word-count count="9229"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Technical Advances in Plant Science</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>Pakchoi (<italic>Brassica chinensis</italic> L.) is a leafy vegetable of significant economic and nutritional importance in Asia, particularly in China. It is widely cultivated due to high consumer demand (<xref ref-type="bibr" rid="B40">Wu et&#xa0;al., 2025</xref>). In 2022, the cultivation area of pakchoi in China reached approximately 300,000 hectares, yielding a total production of around 18 million tonnes, thereby making a substantial contribution to the stability of the vegetable supply. However, the continuous expansion of cultivation and increasing diversification of varieties have intensified the challenges posed by pests and diseases (<xref ref-type="bibr" rid="B9">Hou et&#xa0;al., 2018</xref>). Frequent occurrences of pests and diseases&#x2014;including the Diamondback Moth (<xref ref-type="bibr" rid="B12">Levere and Bresnahan, 2024</xref>), Downy Mildew (<xref ref-type="bibr" rid="B16">Liu et&#xa0;al., 2024</xref>), Leaf Miner (<xref ref-type="bibr" rid="B35">Vilela et&#xa0;al., 2023</xref>), Alternaria Leaf Spot (<xref ref-type="bibr" rid="B26">Olmez et&#xa0;al., 2023</xref>), Black Rot (<xref ref-type="bibr" rid="B11">Kellner et&#xa0;al., 2022</xref>), White Rust (<xref ref-type="bibr" rid="B2">Awika et&#xa0;al., 2019</xref>), and White Spot (<xref ref-type="bibr" rid="B24">Mamede et&#xa0;al., 2022</xref>)&#x2014;severely threaten the efficiency and sustainability of pakchoi production. Consequently, the rapid and accurate identification of these pests and diseases (<xref ref-type="bibr" rid="B50">Zhang et&#xa0;al., 2024</xref>), followed by the formulation and implementation of precise integrated management strategies, has become a critical scientific challenge and an urgent priority in agricultural research.</p>
<p>In the research field of pakchoi pest and disease recognition, traditional identification approaches have long been heavily reliant on manual identification, which not only requires substantial human resources, but also suffers from inconsistent recognition results and low overall accuracy. To address these limitations and substantially improve recognition precision and efficiency, early-stage automated research primarily explored digital image processing techniques (<xref ref-type="bibr" rid="B32">Song et&#xa0;al., 2022</xref>) and conventional machine learning algorithms (<xref ref-type="bibr" rid="B15">Liu, 2022</xref>). However, models constructed through such methods predominantly depended on manually engineered features and architectures, resulting in severely constrained flexibility and scalability that hindered their adaptation to complex and dynamic real-world scenarios. Globally, substantial research achievements have been accumulated in the field of crop pest and disease recognition. Driven by the rapid advancements in artificial intelligence, the applications of deep learning have gradually expanded from the early-stage identification of characteristic diseases in single crops to complex scenarios involving multiple crops and pathogen types (<xref ref-type="bibr" rid="B1">Ai et&#xa0;al., 2020</xref>). For instance, the implementation of diverse deep learning models in crops such as maize (<xref ref-type="bibr" rid="B6">Chenrui et&#xa0;al., 2022</xref>), tomato (<xref ref-type="bibr" rid="B31">Saeed et&#xa0;al., 2023</xref>), wheat (<xref ref-type="bibr" rid="B7">Genaev et&#xa0;al., 2021</xref>), and cotton (<xref ref-type="bibr" rid="B3">Caldeira et&#xa0;al., 2021</xref>) has provided robust technical support for the precision prevention and control of pests and diseases during crop cultivation (<xref ref-type="bibr" rid="B25">Mu and Zeng, 2019</xref>; <xref ref-type="bibr" rid="B44">Xin and Wang, 2021</xref>; <xref ref-type="bibr" rid="B36">Wang, 2022</xref>).</p>
<p>In recent years, detection methods based on the YOLO deep learning model have been increasingly applied due to their high speed and accuracy. For instance, to address the insufficient feature extraction efficiency of the original YOLOv5l model in cucumber pest and disease detection, researchers replaced the C3 modules in both the backbone and neck with Bottleneck CSP modules, constructing a more efficient feature learning pathway. The improved model achieved a mean average precision (mAP) of 80.1% (<xref ref-type="bibr" rid="B27">Omer et&#xa0;al., 2024</xref>). In tomato pest and disease recognition, an improved YOLO-FMDI deep learning algorithm demonstrated significantly enhanced accuracy compared to the original YOLOv8n (<xref ref-type="bibr" rid="B33">Sun et&#xa0;al., 2024</xref>). To tackle core issues in vegetable disease detection, such as missed detection of small targets, insufficient feature fusion, and imbalance between detection accuracy and speed, the YOLOv8n-vegetable improved model was proposed. This model achieved an mAP of 91.4%, a 6.46% improvement over the original YOLOv8n (<xref ref-type="bibr" rid="B38">Wang and Liu, 2024</xref>). Similarly, <xref ref-type="bibr" rid="B52">Zheng et&#xa0;al. (2024)</xref> proposed the YOLOPC model based on YOLOv5s for pakchoi pest detection, achieving an mAP of 91.4%, representing a 12.9% increase over the original YOLOv5s. These aforementioned studies have carried out intelligent recognition of vegetable pests and diseases, successfully achieving precise identification of various pests and diseases. Despite certain progress in the research on the intelligent recognition of pests and diseases, some limitations still exist. Given the characteristic differences between different crops and pests/diseases, it is necessary to design recognition models in a targeted manner to achieve precise identification. Moreover, in the face of pests and diseases with a wide variety of species and variable symptoms, existing recognition methods face challenges in practical application. For example, detection results are susceptible to environmental factors, and both recognition efficiency and accuracy need to be improved. Additionally, research on pest and disease recognition models specifically for leafy vegetables is still relatively scarce at present.</p>
<p>Addressing the aforementioned issues and leveraging the outstanding performance of the YOLO series networks in object detection&#x2014;particularly the advantages of YOLOv8 in detection accuracy, speed, and model size (<xref ref-type="bibr" rid="B37">Wang et&#xa0;al., 2023</xref>). This present work constructs the YOLOv8-DBW model for pest and disease detection in pakchoi, building upon the framework of YOLOv8n. Furthermore, the YOLOv8-DBW model is compared with classical object detection models, namely SSD (<xref ref-type="bibr" rid="B49">Zhai et&#xa0;al., 2020</xref>), Faster R-CNN (<xref ref-type="bibr" rid="B51">Zhao and Liu, 2024</xref>), YOLOv5n (<xref ref-type="bibr" rid="B23">Ma et&#xa0;al., 2023</xref>), YOLOv5s(<xref ref-type="bibr" rid="B43">Xie et&#xa0;al., 2024</xref>), YOLOv7-tiny (<xref ref-type="bibr" rid="B5">Cheng et&#xa0;al., 2024</xref>), YOLOv10n (<xref ref-type="bibr" rid="B14">Li et&#xa0;al., 2024</xref>), YOLOv11n (<xref ref-type="bibr" rid="B54">Zhou and Jiang, 2025</xref>), and YOLOv12n (<xref ref-type="bibr" rid="B47">Yin et&#xa0;al., 2025</xref>), to evaluate the efficiency and accuracy. The proposed model not only significantly enhances detection performance for pests and diseases but also offers a reliable technical solution for lightweight, real-time diagnosis in complex field conditions. These advancements holds substantial practical implications for promoting the intelligent development of precision agriculture. The research findings can provide efficient and accurate technical support for pakchoi pest and disease detection.</p>
</sec>
<sec id="s2" sec-type="materials|methods">
<label>2</label>
<title>Materials and methods</title>
<sec id="s2_1">
<label>2.1</label>
<title>Data collection</title>
<p>Seven types of pakchoi pests and diseases that frequently occur in production were selected as research targets: Diamondback moth, Leaf Miner, Downy Mildew, Black Spot, Black Rot, White Rust, and White Spot. Detailed visual characteristics of these pests and diseases are summarized in <xref ref-type="table" rid="T1"><bold>Table&#xa0;1</bold></xref>, and representative image samples are shown in <xref ref-type="fig" rid="f1"><bold>Figure&#xa0;1</bold></xref>. To minimize selection bias and ensure a representative coverage of different infection stages, a systematic scanning protocol was adopted during image acquisition. Images of pakchoi plants were captured sequentially along cultivation rows to include early-stage, mild, atypical, and late-stage symptomatic samples. Shooting distances were standardized between 20 and 50&#xa0;cm to balance feature resolution and field of view. Images were primarily captured from a vertical top-down perspective, with additional 45&#xb0; oblique views to account for leaf overlap and variations in plant morphology. Image acquisition was conducted from March 21 to May 8, 2025, in Cao County (Shandong Province), Wujiang District (Jiangsu Province), and Jiading District (Shanghai). Images were collected using multiple commonly used smartphone models, including the Xiaomi 12X, OPPO Reno8 Pro, and Samsung Galaxy A53. The corresponding shooting resolutions were 4000&#xd7;3000, 3024&#xd7;4032, and 5632&#xd7;4224 pixels, respectively.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>The common diseases and pests of pakchoi and key characteristics.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Names of pests and diseases</th>
<th valign="middle" align="center">Feature description</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">Downy Mildew</td>
<td valign="middle" align="left">On the leaf adaxial surface (upper side), irregular chlorotic to yellowish-brown lesions appear. Under humid conditions, a downy mildew growth (white or grayish-purple in color) develops on the corresponding areas of the leaf abaxial surface (lower side). As the lesions expand, the affected leaves become chlorotic, yellow, curled, and withered.</td>
</tr>
<tr>
<td valign="middle" align="center">Alternaria Leaf Spot</td>
<td valign="middle" align="left">Circular or subcircular, brown to dark brown lesions appear on the leaves, often exhibiting distinct concentric rings. As the disease progresses severely, lesions coalesce and expand, ultimately causing leaf necrosis.</td>
</tr>
<tr>
<td valign="middle" align="center">Black Rot</td>
<td valign="middle" align="left">V-shaped yellowish-brown lesions initially appear at the leaf margins and progressively expand inward, with characteristic vein blackening forming a net-like pattern at lesion boundaries.</td>
</tr>
<tr>
<td valign="middle" align="center">White Rust</td>
<td valign="middle" align="left">On the abaxial leaf surface, cream-colored, slightly raised pustules (sori) develop, which rupture to release powdery white spores. Correspondingly, the adaxial surface exhibits indistinct chlorotic spots ranging from pale yellow to yellowish-green. As the disease progresses, lesions coalesce, leading to extensive chlorosis and necrosis of the foliage.</td>
</tr>
<tr>
<td valign="middle" align="center">White Spot</td>
<td valign="middle" align="left">Initial lesions manifest as small smoky grey-brown specks that subsequently undergo radial expansion, forming circular to subcircular dichromatic lesions characterized by ash-white centers surrounded by distinct lemon-yellow margins. Under humid conditions, the abaxial surface corresponding to these lesions develops effuse, pale grey fungal growth.</td>
</tr>
<tr>
<td valign="middle" align="center">Leaf Miner</td>
<td valign="middle" align="left">Larvae tunnel endophagously through the mesophyll tissue, forming characteristic serpentine mines exhibiting a whitish, serpentine trajectory through the foliar layers.</td>
</tr>
<tr>
<td valign="middle" align="center">Diamondback Moth</td>
<td valign="middle" align="left">Larvae feed on the mesophyll, early-instar larvae leave translucent feeding spots, while late-instar larvae bore holes; in severe infestations, only the vascular veins remain.</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Image examples of the data set. <bold>(a)</bold> is the Diamondback Moth, <bold>(b)</bold> is the Leaf Miner, <bold>(c)</bold> is the Downy Mildew, <bold>(d)</bold> is the Alternaria Leaf Spot, <bold>(e)</bold> is the Black Rot, <bold>(f)</bold> is the White Rust, <bold>(g)</bold> is the White Spot.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1730683-g001.tif">
<alt-text content-type="machine-generated">Seven images showing various leaf conditions. (a) is the Diamondback Moth. (b) Leaf with white trails from leaf miner larvae. (c) Leaf with yellow spots indicating possible disease. (d) Leaf with brown spots suggesting bacterial or fungal infection. (e) Leaf edge with dark brown rot. (f) Leaf with yellow and brown areas, possibly disease symptoms. (g) Leaf with small brown lesions possibly from pests or disease.</alt-text>
</graphic></fig>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Data enhancement</title>
<p>In this study, we collected images of seven common pests and diseases of pakchoi from three different field areas, covering various growth stages and disease manifestations. Given the significant variability in illumination and meteorological conditions in open-field environments, the data acquisition process was designed to encompass multiple diurnal phases (morning, noon, afternoon) and diverse weather scenarios (sunny, overcast, post-precipitation periods). The original dataset consisted of 1,782 images. To enhance the model&#x2019;s generalization ability, we performed data augmentation on these images (as shown in <xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2</bold></xref>), resulting in a final dataset of 6,110 images. The sample distribution across the seven pest and disease categories is as follows: 1,085 images of Diamondback Moth damage; 842 images of Leaf Miner disease; 992 images of Downy Mildew disease; 753 images of Alternaria Leaf Spot disease; 745 images of Black Rot disease; 855 images of White Rust disease; and 838 images of White Spot disease. All images in the final dataset were standardized to a resolution of 640&#xd7;640 pixels in JPG format. Furthermore, the dataset included images captured during &#x201c;post-precipitation&#x201d; periods, which naturally contained samples with water droplet reflections and soil splashes. Meanwhile, variations in handheld movement during image capture introduced natural motion blur effects, ensuring the model&#x2019;s robustness against complex field challenges.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Data enhancement.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1730683-g002.tif">
<alt-text content-type="machine-generated">Three panels illustrate image editing processes applied to a plant photo. The first shows the original image, followed by a rotated version. The second shows the original image with adjusted brightness. The third shows the original image cropped. Each modification is visually linked to its original with arrows.</alt-text>
</graphic></fig>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>Data labeling</title>
<p>The dataset images were manually annotated using LabelImg software (<ext-link ext-link-type="uri" xlink:href="https://github.com/HumanSignal/labelImg">https://github.com/HumanSignal/labelImg</ext-link>). The following categorical labels were assigned: &#x201c;Backmoth&#x201d; for Diamondback Moth, &#x201c;Leafminer&#x201d; for Leaf Miner, &#x201c;Mildew&#x201d; for Downy Mildew, &#x201c;ALTERNARIA&#x201d; for Alternaria, &#x201c;BLACK-ROT&#x201d; for Black Rot, &#x201c;WHITE-RUST&#x201d; for White Rust, &#x201c;WHITE-SPOT&#x201d; for White Spot. All annotations were saved in TXT files, each containing the corresponding object class and bounding box coordinates. Multi-instance annotations were preserved where applicable, with individual images containing simultaneous occurrences of multiple pathologies. The dataset was subsequently partitioned into training, validation, and test sets with an 8:1:1 ratio, resulting in 4,888 images for training, 611 for validation, and 611 for testing.</p>
</sec>
<sec id="s2_4">
<label>2.4</label>
<title>The network structure of the YOLOv8 deep-learning model</title>
<p>As a next-generation end-to-end object detection algorithm, YOLOv8 significantly enhances detection performance in complex scenarios through architectural refinements and technical innovations (<xref ref-type="bibr" rid="B22">Ma et&#xa0;al., 2024</xref>), while inheriting the computational efficiency characteristic of the YOLO series. The model employs a four-module architecture: Input &#x2192; Backbone &#x2192; Neck &#x2192; Head (<xref ref-type="fig" rid="f3"><bold>Figure&#xa0;3</bold></xref>), and its core design demonstrates in-depth optimizations for real-time performance, adaptability to multi-scale targets, and model lightweighting. The model has five scaled versions (n, s, m, l, x), which satisfy the adaptation requirements of diversified application scenarios (<xref ref-type="bibr" rid="B17">Liu et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B21">Ma and Pang, 2023</xref>; <xref ref-type="bibr" rid="B39">Wang et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B4">Chen et&#xa0;al., 2025</xref>; <xref ref-type="bibr" rid="B20">Long and Lin, 2025</xref>).</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>YOLOv8 model network structure. Conv is the convolution module, C2f is the cross-stage partial feature fusion module, SPPF is the spatial pyramid pooling layer, Concat is the feature concatenation module, Upsample is the upsampling layer, Detect is the detection head, Conv2d is the two-dimensional convolution, BatchNorm2d is the batch normalization layer, SiLU is the activation function, MaxPool2d is the max pooling layer, Bottleneck is the convolution module that includes a residual connection, n denotes the number of Bottleneck modules, Split as the feature hierarchization, Bbox refers to the bounding box.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1730683-g003.tif">
<alt-text content-type="machine-generated">Diagram of an object detection model architecture. The model has three sections: Backbone, Neck, and Head. The Backbone consists of Conv and C2f layers for feature extraction with an SPPF block. The Neck connects with additional Conv, C2f, and Concatenation layers. The Head section has detection layers at resolutions 80x80, 40x40, and 20x20, outputting images of a leaf with a caterpillar. The Conv and detection processes are depicted with arrows and blocks, detailing the model flow from input to output.</alt-text>
</graphic></fig>
<p>This study adopts YOLOv8 as the baseline network model, following the canonical &#x201c;Backbone-Neck-Head&#x201d; hierarchical design paradigm, and is collaboratively composed of three core functional modules to form an efficient object detection framework (<xref ref-type="bibr" rid="B10">Huang et&#xa0;al., 2024</xref>). Among these, the backbone network, serving as the primary structure for feature extraction, incorporates the Basic Convolution (Conv) module, the Cross-Stage Partial Feature Fusion (C2F) unit, and the Spatial Pyramid Pooling Fast (SPPF) module (<xref ref-type="bibr" rid="B41">Xiao et&#xa0;al., 2024</xref>). The neck network adopts a bidirectional architecture that integrates the Feature Pyramid Network (FPN) (<xref ref-type="bibr" rid="B42">Xie et&#xa0;al., 2023</xref>) and the Path Aggregation Network (PAN) (<xref ref-type="bibr" rid="B30">Roy et&#xa0;al., 2022</xref>). Via a bidirectional connection mechanism involving top-down semantic feature transmission and bottom-up detailed feature feedback, it realizes cross-scale fusion of feature maps across different levels. The detection head employs an Anchor-Free detection approach, doing away with the reliance of traditional anchor-based mechanisms on prior target sizes (<xref ref-type="bibr" rid="B37">Wang et&#xa0;al., 2023</xref>).</p>
<p>However, during training on the pakchoi pest/disease dataset, the original YOLOv8 model exhibited insufficient detection accuracy and a low target recognition rate (<xref ref-type="bibr" rid="B48">Yue et&#xa0;al., 2024</xref>). To evaluate the performance differences among various model versions, comparative experiments were conducted on the YOLOv8 series (n/s/m/l/x). Mean Average Precision (mAP) served as the core evaluation metric to assess the detection performance variations across models under conditions of multi-scale target distribution and leaf occlusion scenarios. The results are detailed in <xref ref-type="table" rid="T2"><bold>Table&#xa0;2</bold></xref>.</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Performance results of the YOLOv8 series version.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Model</th>
<th valign="middle" align="center">Mean average precision @0.5(mAP@0.5)/%</th>
<th valign="middle" align="center">Mean average precision @0.5:0.95(mAP@0.5:0.95)/%</th>
<th valign="middle" align="center">Parameters/M</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">YOLOv8n</td>
<td valign="middle" align="center">77.8</td>
<td valign="middle" align="center">59.5</td>
<td valign="middle" align="center">3.0</td>
</tr>
<tr>
<td valign="middle" align="center">YOLOv8s</td>
<td valign="middle" align="center">75.4</td>
<td valign="middle" align="center">58.9</td>
<td valign="middle" align="center">11.1</td>
</tr>
<tr>
<td valign="middle" align="center">YOLOv8m</td>
<td valign="middle" align="center">73.8</td>
<td valign="middle" align="center">57.7</td>
<td valign="middle" align="center">25.8</td>
</tr>
<tr>
<td valign="middle" align="center">YOLOv8l</td>
<td valign="middle" align="center">77.6</td>
<td valign="middle" align="center">60.6</td>
<td valign="middle" align="center">43.6</td>
</tr>
<tr>
<td valign="middle" align="center">YOLOv8x</td>
<td valign="middle" align="center">78.9</td>
<td valign="middle" align="center">61.9</td>
<td valign="middle" align="center">68.1</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>mAP@0.5 represents the average accuracy at a recall rate equal to 0.5.mAP@0.5:0.95 indicates average accuracy in the 0.5 to 0.95 recall range.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>As shown in <xref ref-type="table" rid="T2"><bold>Table&#xa0;2</bold></xref>, under the unified training configuration (200 epochs, RTX 4090 GPU, and identical hyperparameters), despite their larger parameter counts and more complex architectures, the YOLOv8m and YOLOv8l models exhibited lower detection accuracy on the pakchoi pest/disease dataset compared to the lightweight YOLOv8n model. This indicates that merely increasing model complexity failed to yield accuracy gains on this specific dataset, while significantly increasing the computational burden and inference time. Although YOLOv8x achieved the highest accuracy, its substantial parameter count resulted in excessively slow inference speeds, rendering it impractical for low-cost, high-efficiency real-world applications. In addition, the YOLOv8s model also demonstrated slightly lower accuracy than YOLOv8n. This phenomenon may be attributed to the fact that larger models with higher parameter counts typically require larger datasets or different convergence schedules to avoid redundancy and potential overfitting in specific agricultural scenarios. Considering the balance between detection accuracy, computational cost, and inference speed, YOLOv8n was selected as the baseline model for subsequent improvements. This model maintains relatively high detection accuracy while possessing the lowest parameter count and highest inference efficiency, serving as a solid foundation for algorithmic optimization.</p>
</sec>
<sec id="s2_5">
<label>2.5</label>
<title>Improved YOLOv8 model network structure</title>
<p>In natural settings, pakchoi exhibits high-density planting, leading to challenges such as mutual leaf occlusion, weed interference, and overlapping leaves across different growth stages. Concurrently, pest and disease regions display high diversity in characteristics: pathogen infection manifests as lesions with distinct textures, morphologies, and colors, while insect damage presents as mechanical injuries like mines and holes. This complex background interference coupled with significant morphological variations in pests and diseases complicates the precise identification and localization of target regions by detection models. To address these challenges, this study proposes the YOLOv8-DBW model, with the improvement strategy comprising the following three key aspects:</p>
<list list-type="order">
<list-item>
<p>Backbone Network Enhancement: An efficient multi-scale attention mechanism and partial convolution structure are introduced to enhance the model&#x2019;s ability to extract small-target features in complex field environments, thereby improving recognition accuracy.</p></list-item>
<list-item>
<p>Neck Network Enhancement: The BiFPN module is introduced to strengthen feature fusion capability while notably reducing model parameter count and computational cost, thus achieving lightweight design.</p></list-item>
<list-item>
<p>Loss Function Optimization: The Wise-IoU (Weighted Interpolation of Sequential Evidence for Intersection over Union) loss function is introduced, which incorporates classification information into the Intersection over Union (IoU) computation to enhance the model&#x2019;s bounding box regression performance. The refinement improves learning precision for pest/disease features, thereby boosting detection stability and accuracy. The architecture of the enhanced YOLOv8-DBW model is illustrated in <xref ref-type="fig" rid="f4"><bold>Figure&#xa0;4</bold></xref>.</p></list-item>
</list>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Network structure of the improved YOLOv8n model.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1730683-g004.tif">
<alt-text content-type="machine-generated">Flowchart illustrating a neural network architecture with labeled sections: Backbone, Neck, and Head. Backbone includes sequential Conv and C2f layers, ending with C2f-PE and SPPF. Neck features Conv, BiFPN, and Upsample layers, looping through BiFPN multiple times. Head contains C2f, Conv, BiFPN, and Detect layers. Arrows indicate data flow through the network.</alt-text>
</graphic></fig>
<sec id="s2_5_1">
<label>2.5.1</label>
<title>C2f-PE module integrating efficient multi-scale attention and partial convolution</title>
<sec id="s2_5_1_1">
<label>2.5.1.1</label>
<title>Efficient multi-scale attention mechanism</title>
<p>In the task of pest and disease identification in pakchoi, challenges such as severe occlusion, complex background interference, and poor image quality often hinder the effective extraction of features from small targets. To address this issue, this paper introduces the Efficient Multi-scale Attention (EMA) mechanism. The EMA mechanism employs cross-spatial learning to group channels without reducing their dimensionality, thereby preserving information across each channel while minimizing computational overhead (<xref ref-type="bibr" rid="B18">Liu et&#xa0;al., 2024</xref>). The network structure of the EMA attention mechanism is illustrated in <xref ref-type="fig" rid="f5"><bold>Figure&#xa0;5</bold></xref>.</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>Network structure diagram of EMA attention mechanism. X denotes the input feature map, C, H and W represent the number of channels, height, and width of the input image, respectively, G represents the number of groups, C/G represents the number of channels contained in each group, and <inline-formula>
<mml:math display="inline" id="im1"><mml:mrow><mml:msub><mml:mi>Z</mml:mi><mml:mi>C</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> represents the feature map of the c-th channel after two-dimensional global average pooling, <inline-formula>
<mml:math display="inline" id="im2"><mml:mrow><mml:msub><mml:mi>X</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> represents the sub-feature map, &#x201c;X Avg Pool&#x201d; and &#x201c;Y Avg Pool&#x201d; denote one-dimensional average pooling operations along different directions, while &#x201c;Avg Pool&#x201d; refers to two-dimensional average pooling, &#x201c;Group Norm&#x201d; represents group normalization, Sigmoid refers to the activation function, Softmax denotes the normalization function.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1730683-g005.tif">
<alt-text content-type="machine-generated">Flowchart illustrating a neural network architecture. It features two main branches: &#x201c;3x3 branch&#x201d; with convolution, pooling, and softmax layers, and &#x201c;1x1 branch&#x201d; with average pooling, sigmoid, group normalization, and softmax. Both branches integrate through addition and a final sigmoid operation, ending with an output cube marked &#x201c;C, H, W."</alt-text>
</graphic></fig>
<p>When EMA operates, first, it takes the feature map <inline-formula>
<mml:math display="inline" id="im3"><mml:mrow><mml:mtext>X</mml:mtext><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mtext>R</mml:mtext><mml:mrow><mml:mtext>C</mml:mtext><mml:mo>&#xd7;</mml:mo><mml:mtext>H</mml:mtext><mml:mo>&#xd7;</mml:mo><mml:mtext>W</mml:mtext></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula> extracted by the backbone network as input, and partitions the feature map into G groups of sub-feature maps along the channel dimension of X: <inline-formula>
<mml:math display="inline" id="im4"><mml:mrow><mml:mtext>X</mml:mtext><mml:mo>=</mml:mo><mml:mrow><mml:mo stretchy="false">[</mml:mo><mml:mrow><mml:msub><mml:mtext>X</mml:mtext><mml:mn>0</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mtext>X</mml:mtext><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x22ef;</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mtext>X</mml:mtext><mml:mtext>i</mml:mtext></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x22ef;</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mtext>X</mml:mtext><mml:mrow><mml:mtext>G</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">]</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>, where each sub-feature map <inline-formula>
<mml:math display="inline" id="im5"><mml:mrow><mml:msub><mml:mtext>X</mml:mtext><mml:mtext>i</mml:mtext></mml:msub><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mtext>R</mml:mtext><mml:mrow><mml:mfrac><mml:mtext>C</mml:mtext><mml:mrow><mml:mtext>G</mml:mtext></mml:mrow></mml:mfrac><mml:mo>&#xd7;</mml:mo><mml:mtext>H</mml:mtext><mml:mo>&#xd7;</mml:mo><mml:mtext>W</mml:mtext></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>. Subsequently, in the 1&#xd7;1 branch, two one-dimensional global average pooling operations are performed along the horizontal and vertical axes to encode channels, establishing interactions between channel and spatial location information, while generating two spatial encoding feature maps that are concatenated along the vertical direction. This operation is computed as follows (<xref ref-type="disp-formula" rid="eq1">Equations 1</xref> and <xref ref-type="disp-formula" rid="eq2">2</xref>):</p>
<disp-formula id="eq1"><label>(1)</label>
<mml:math display="block" id="M1"><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:msubsup><mml:mtext>Z</mml:mtext><mml:mrow><mml:mtext>C</mml:mtext></mml:mrow><mml:mtext>H</mml:mtext></mml:msubsup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mtext>H</mml:mtext><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mtext>W</mml:mtext></mml:mfrac><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>&#x2264;</mml:mo><mml:mtext>i</mml:mtext><mml:mo>&#x2264;</mml:mo><mml:mtext>W</mml:mtext></mml:mrow></mml:munder><mml:msub><mml:mtext>X</mml:mtext><mml:mtext>C</mml:mtext></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext>H</mml:mtext><mml:mo>,</mml:mo><mml:mtext>i</mml:mtext></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<disp-formula id="eq2"><label>(2)</label>
<mml:math display="block" id="M2"><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:msubsup><mml:mtext>Z</mml:mtext><mml:mrow><mml:mtext>C</mml:mtext></mml:mrow><mml:mtext>W</mml:mtext></mml:msubsup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mtext>W</mml:mtext><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mtext>W</mml:mtext></mml:mfrac><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>&#x2264;</mml:mo><mml:mtext>j</mml:mtext><mml:mo>&#x2264;</mml:mo><mml:mtext>H</mml:mtext></mml:mrow></mml:munder><mml:msub><mml:mtext>X</mml:mtext><mml:mtext>C</mml:mtext></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mtext>W</mml:mtext></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>In the formula, H and W are the height and width of the feature map, respectively; <inline-formula>
<mml:math display="inline" id="im6"><mml:mrow><mml:msubsup><mml:mtext>Z</mml:mtext><mml:mrow><mml:mtext>C</mml:mtext></mml:mrow><mml:mtext>H</mml:mtext></mml:msubsup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mtext>H</mml:mtext><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> and <inline-formula>
<mml:math display="inline" id="im7"><mml:mrow><mml:msubsup><mml:mtext>Z</mml:mtext><mml:mrow><mml:mtext>C</mml:mtext></mml:mrow><mml:mtext>W</mml:mtext></mml:msubsup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mtext>W</mml:mtext><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> are the axis-specific pooling outputs generated along the horizontal axis and vertical axis, respectively; i and j are the width and height of the input of the C-th channel, respectively; <inline-formula>
<mml:math display="inline" id="im8"><mml:mrow><mml:msub><mml:mtext>X</mml:mtext><mml:mtext>C</mml:mtext></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext>j</mml:mtext><mml:mo>,</mml:mo><mml:mtext>W</mml:mtext></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> and <inline-formula>
<mml:math display="inline" id="im9"><mml:mrow><mml:msub><mml:mtext>X</mml:mtext><mml:mtext>C</mml:mtext></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>H</mml:mi><mml:mo>,</mml:mo><mml:mi>i</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> are the input features at the spatial positions (j, W) and (H, i) in the C-th channel, respectively.</p>
<p>Subsequently, a nonlinear Sigmoid activation function is adopted to aggregate the two spatial encoding feature maps processed by 1&#xd7;1 convolution in each group. Then, through group normalization, 2D average pooling, and Softmax operation in sequence, an intermediate feature map with a dimension of C/G&#xd7;1 is generated (<xref ref-type="bibr" rid="B18">Liu et&#xa0;al., 2024</xref>). The 2D global average pooling operation applied to the processed feature is described by <xref ref-type="disp-formula" rid="eq3">Equation 3</xref>:</p>
<disp-formula id="eq3"><label>(3)</label>
<mml:math display="block" id="M3"><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:msub><mml:mi>Z</mml:mi><mml:mi>C</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:mtext>H</mml:mtext><mml:mo>&#xd7;</mml:mo><mml:mtext>W</mml:mtext></mml:mrow></mml:mfrac><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mtext>j</mml:mtext><mml:mrow><mml:mtext>H</mml:mtext></mml:mrow></mml:munderover><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mtext>j</mml:mtext><mml:mrow><mml:mtext>W</mml:mtext></mml:mrow></mml:munderover><mml:msub><mml:mtext>X</mml:mtext><mml:mrow><mml:mtext>C</mml:mtext></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext>i</mml:mtext><mml:mo>,</mml:mo><mml:mtext>j</mml:mtext></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>In the formula, <inline-formula>
<mml:math display="inline" id="im10"><mml:mrow><mml:msub><mml:mi>Z</mml:mi><mml:mi>C</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> represents the feature map of the C-th channel after 2D global average pooling, and <inline-formula>
<mml:math display="inline" id="im11"><mml:mrow><mml:msub><mml:mi>X</mml:mi><mml:mi>C</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> represents the processed feature at the spatial position (i,j) in the C-th channel after 1&#xd7;1 convolution and Sigmoid activation. The intermediate feature map after the Softmax operation is subjected to matrix multiplication with the sub-feature map processed by 3&#xd7;3 convolution, resulting in the first spatial attention weight map with a dimension of 1&#xd7;H&#xd7;W.</p>
<p>The output from the 3&#xd7;3 branch, after 2D average pooling and Softmax operation, undergoes matrix multiplication with the feature map from the 1&#xd7;1 branch, generating the second spatial attention weight map with dimensions 1&#xd7;H&#xd7;W. Finally, the two spatial attention weight maps are summed and then normalized via the Sigmoid function to obtain the final attention weight map. This weight map is subsequently mapped with the original feature map, enabling the model to focus attention on key regions.</p>
</sec>
<sec id="s2_5_1_2">
<label>2.5.1.2</label>
<title>Partial convolution module</title>
<p>PConv is an efficient convolutional structure, which has the advantages of flexibility and adaptability to data loss compared to traditional convolutions. PConv does not always use the same convolution kernel for all input data, but dynamically determines the scope of the convolution kernel based on the validity of the data, that is, whether the data features are missing or damaged, and suppresses the interference of irrelevant factors. This design minimizes unnecessary computation and memory access, significantly improving the real-time processing efficiency of the model. It can reduce floating-point operations while maintaining high feature extraction capabilities, effectively processing images with irregular missing or occluded features (<xref ref-type="fig" rid="f6"><bold>Figure&#xa0;6</bold></xref>).</p>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>Structure diagram of the partial convolution module.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1730683-g006.tif">
<alt-text content-type="machine-generated">Diagram illustrating an identity transformation in a neural network. A rectangular input with dimensions h by w and channel c is transformed into an identical output. Two smaller cubes represent intermediate operations that maintain height h. Arrows indicate the flow direction, labeled as identity transformation.</alt-text>
</graphic></fig>
</sec>
<sec id="s2_5_1_3">
<label>2.5.1.3</label>
<title>Fusion and structure of the C2f-PE module</title>
<p>To enhance the model performance, this study integrates the EMA attention mechanism and PConv into the C2f module to construct a novel C2f-PE module (whose structure is shown in <xref ref-type="fig" rid="f7"><bold>Figure&#xa0;7</bold></xref>). Specifically, the EMA attention mechanism is first inserted after the first Conv in the C2f module to dynamically allocate the weights of input features; meanwhile, the 3&#xd7;3 standard convolution in the Bottleneck is replaced by 3&#xd7;3 PConv for lightweighting. Based on pre-experiments, replacing the 4th C2f module in the backbone network with C2f-PE achieves the optimal effect. This replacement ensures the stability of the input and output dimensions of each layer in the network and ultimately enables the model to show stronger detection capability for pakchoi pest and disease images with irregular missing or leaf occlusion.</p>
<fig id="f7" position="float">
<label>Figure&#xa0;7</label>
<caption>
<p>Structure diagram of C2f-PE module.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1730683-g007.tif">
<alt-text content-type="machine-generated">Diagram illustrating two deep learning components: on the left, a Bottleneck module with PConv3&#xd7;3, Conv1&#xd7;1, BN, ReLU, and Conv1&#xd7;1 layers connected sequentially; on the right, a C2f-PE module featuring Conv, EMA, Split, Bottleneck, Contact, and another Conv, showing a detailed flow of operations.</alt-text>
</graphic></fig>
</sec>
</sec>
<sec id="s2_5_2">
<label>2.5.2</label>
<title>Feature fusion network BiFPN</title>
<p>YOLOv8n employs a feature pyramid structure composed of FPN (Feature Pyramid Network) and PAN (Path Aggregation Network) (<xref ref-type="bibr" rid="B19">Liu et&#xa0;al., 2018</xref>) to achieve cross-scale feature fusion: as shown in <xref ref-type="fig" rid="f8"><bold>Figure&#xa0;8a</bold></xref>, the Feature Pyramid Network (FPN) transmits high-level semantic features from top to bottom. Conversely, the Path Aggregation Network (PAN) enhances low-level localization features through bottom-up paths, and the two jointly establish multi-scale feature correlations (<xref ref-type="fig" rid="f8"><bold>Figure&#xa0;8b</bold></xref>). However, due to its feature aggregation mechanism, PANet has inherent limitations in pakchoi pest and disease detection. First, the multi-scale downsampling and fusion processes of PANet lead to the gradual attenuation and loss of detailed features layer by layer, resulting in the loss of integrity of small target features. Second, the feature fusion strategy of PANet has insufficient robustness to background noise and illumination interference, making it difficult to separate features of occluded targets and easily causing feature confusion and detection deviations. Third, the feature aggregation paths of PANet are relatively complex with large computational overhead, making it difficult to meet the requirements of real-time detection tasks. To address the above issues, this study introduces the Bidirectional Feature Pyramid Network (BiFPN) (<xref ref-type="bibr" rid="B34">Tan et al., 2020</xref>), which possesses bidirectional feature flow and a dynamic weight learning mechanism, as the core feature fusion module. Its advantages are mainly reflected in three aspects: First, structural optimization. The network prunes redundant nodes to reduce ineffective computations and adds cross-layer connections to enhance direct feature interaction. Second, dynamic weighting. It employs learnable weights with fast normalization to fuse features across different scales, adaptively focusing on highly discriminative regions. The weighted fusion is defined by <xref ref-type="disp-formula" rid="eq4">Equation 4</xref> as follows:</p>
<fig id="f8" position="float">
<label>Figure&#xa0;8</label>
<caption>
<p>FPN, PANeT and BiFPN structure diagram.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1730683-g008.tif">
<alt-text content-type="machine-generated">Flowchart comparing two network architectures: (b) PANet and (c) BiFPN. Both diagrams feature stacked nodes labeled as input (\(P_{in}\)) and output (\(P_{out}\)), connected by arrows representing data flow. PANet (left) has a linear path with direct vertical connections, while BiFPN (right) shows more complex, repeated blocks with multiple connections between non-adjacent layers. Each diagram highlights a different approach to feature pyramid networks.</alt-text>
</graphic></fig>
<disp-formula id="eq4"><label>(4)</label>
<mml:math display="block" id="M4"><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:mn>0</mml:mn><mml:mo>=</mml:mo><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mtext>i</mml:mtext></mml:munder><mml:mfrac><mml:mrow><mml:msub><mml:mtext>W</mml:mtext><mml:mtext>i</mml:mtext></mml:msub></mml:mrow><mml:mrow><mml:mi>&#x3f5;</mml:mi><mml:mo>+</mml:mo><mml:msub><mml:mo>&#x2211;</mml:mo><mml:mtext>j</mml:mtext></mml:msub><mml:msub><mml:mtext>W</mml:mtext><mml:mtext>j</mml:mtext></mml:msub></mml:mrow></mml:mfrac><mml:msub><mml:mtext>l</mml:mtext><mml:mtext>j</mml:mtext></mml:msub></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>In the formula: <inline-formula>
<mml:math display="inline" id="im12"><mml:mrow><mml:msub><mml:mtext>W</mml:mtext><mml:mtext>i</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula> denotes the learnable weight. After calculating <inline-formula>
<mml:math display="inline" id="im13"><mml:mrow><mml:msub><mml:mtext>W</mml:mtext><mml:mtext>i</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula>, it is processed by a ReLU activation function to ensure <inline-formula>
<mml:math display="inline" id="im14"><mml:mrow><mml:msub><mml:mtext>W</mml:mtext><mml:mtext>i</mml:mtext></mml:msub><mml:mo>&#x2265;</mml:mo><mml:mn>0</mml:mn></mml:mrow></mml:math></inline-formula>. <inline-formula>
<mml:math display="inline" id="im15"><mml:mi>&#x3f5;</mml:mi></mml:math></inline-formula> is a constant, usually set to 0.0001 to avoid numerical instability. <inline-formula>
<mml:math display="inline" id="im16"><mml:mrow><mml:msub><mml:mtext>l</mml:mtext><mml:mtext>j</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula> denotes the input feature.</p>
<p>Third, efficiency enhancement. By simplifying computational pathways and reducing model complexity, BiFPN achieves a synergistic optimization of accuracy and inference speed. This study leverages its capability for precise weighted fusion of cross-scale features to enhance the retention of small target details and feature discriminability in complex backgrounds. Through accurate weighting, BiFPN preserves fine details of small objects and strengthens feature discrimination in dense/occluded scenarios, while its high efficiency readily adapts to the computational constraints of edge devices, making it an ideal solution for improving the accuracy and practical efficiency of pest and disease detection. The structure diagram of BiFPN is shown in <xref ref-type="fig" rid="f8"><bold>Figure&#xa0;8</bold></xref>.</p>
</sec>
<sec id="s2_5_3">
<label>2.5.3</label>
<title>Wise-IoU loss function</title>
<p>YOLOv8n employs CIoU (<xref ref-type="bibr" rid="B53">Zheng et&#xa0;al., 2020</xref>) as its bounding box regression loss function, whose calculation formula is shown in  <xref ref-type="disp-formula" rid="eq5">Equation 5</xref>:</p>
<disp-formula id="eq5"><label>(5)</label>
<mml:math display="block" id="M5"><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:msub><mml:mtext>L</mml:mtext><mml:mrow><mml:mtext>C</mml:mtext><mml:mrow><mml:mtext>I</mml:mtext></mml:mrow><mml:mtext>o</mml:mtext><mml:mrow><mml:mtext>U</mml:mtext></mml:mrow></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x2212;</mml:mo><mml:mtext>I</mml:mtext><mml:mrow><mml:mtext>o</mml:mtext></mml:mrow><mml:mtext>U</mml:mtext><mml:mo>+</mml:mo><mml:mfrac><mml:mrow><mml:msup><mml:mtext>p</mml:mtext><mml:mn>2</mml:mn></mml:msup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext>b</mml:mtext><mml:mo>,</mml:mo><mml:msup><mml:mtext>b</mml:mtext><mml:mrow><mml:mtext>g</mml:mtext><mml:mrow><mml:mtext>t</mml:mtext></mml:mrow></mml:mrow></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:msup><mml:mtext>c</mml:mtext><mml:mn>2</mml:mn></mml:msup></mml:mrow></mml:mfrac><mml:mo>+</mml:mo><mml:mo>&#x3b1;</mml:mo><mml:mo>&#x3bd;</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>In the formula, <inline-formula>
<mml:math display="inline" id="im17"><mml:mtext>&#x3b1;</mml:mtext></mml:math></inline-formula> is the balanced weight coefficient; <inline-formula>
<mml:math display="inline" id="im18"><mml:mtext>&#x3bd;</mml:mtext></mml:math></inline-formula> is a term for calculating the consistency of the aspect ratio between the predicted bounding box and the ground truth box; b and <inline-formula>
<mml:math display="inline" id="im19"><mml:mrow><mml:msup><mml:mi>b</mml:mi><mml:mrow><mml:mi>g</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula> are the center coordinates of the predicted bounding box and the ground truth box, respectively; c denotes the diagonal length of the minimum enclosing rectangle of the predicted bounding box and the ground truth box; <inline-formula>
<mml:math display="inline" id="im20"><mml:mrow><mml:msup><mml:mi>p</mml:mi><mml:mn>2</mml:mn></mml:msup></mml:mrow></mml:math></inline-formula> denotes the distance between the center points of the predicted bounding box and the ground truth bounding box.</p>
<p>To specifically improve the model&#x2019;s detection performance for small target pests and diseases, this study introduces the Wise-IoU (WIoU) loss function based on a dynamic non-monotonic focusing mechanism to balance samples. This strategy not only reduces the competitiveness of high-quality anchor boxes but also mitigates the harmful gradients generated by low-quality examples. This enables WIoU to focus on anchor boxes of average quality and improve the overall performance of the detector, as shown in <xref ref-type="fig" rid="f9"><bold>Figure&#xa0;9</bold></xref>. <inline-formula>
<mml:math display="inline" id="im21"><mml:mrow><mml:msub><mml:mtext>W</mml:mtext><mml:mtext>t</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula>
<mml:math display="inline" id="im22"><mml:mrow><mml:msub><mml:mtext>H</mml:mtext><mml:mtext>t</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula> denotes the width and height of the overlapping region between the ground-truth bounding box and the predicted bounding box; (x, y) denotes the center coordinates of the predicted bounding box; (<inline-formula>
<mml:math display="inline" id="im23"><mml:mrow><mml:msub><mml:mtext>x</mml:mtext><mml:mrow><mml:mi>gt</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula>
<mml:math display="inline" id="im24"><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>g</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>) denotes the center coordinates of the ground truth box; w and h indicate the width and height of the prediction box; <inline-formula>
<mml:math display="inline" id="im25"><mml:mrow><mml:msub><mml:mtext>W</mml:mtext><mml:mrow><mml:mi>gt</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula>
<mml:math display="inline" id="im26"><mml:mrow><mml:msub><mml:mtext>H</mml:mtext><mml:mrow><mml:mi>gt</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> indicate the width of the target box; <inline-formula>
<mml:math display="inline" id="im27"><mml:mrow><mml:msub><mml:mtext>W</mml:mtext><mml:mtext>g</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula>
<mml:math display="inline" id="im28"><mml:mrow><mml:msub><mml:mi>H</mml:mi><mml:mi>g</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> indicates the minimum border width and height.</p>
<fig id="f9" position="float">
<label>Figure&#xa0;9</label>
<caption>
<p>Wise-IoU loss function.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1730683-g009.tif">
<alt-text content-type="machine-generated">Illustration showing two overlapping rectangles with dimensions labeled. The orange rectangle has width \( W_{gt} \) and height \( H_{gt} \), centered at \((x_{gt}, y_{gt})\). The blue rectangle has width \( W_t \) and height \( H_t \), centered at \((x, y)\). A red line connects the centers. The enclosing green rectangle has width \( W_g \) and height \( H_g \).</alt-text>
</graphic></fig>
<p>Since training data inevitably contain low-quality examples, IoU is replaced with outlier degree through the dynamic non-monotonic focusing mechanism to evaluate the quality of anchor boxes, so as to avoid excessive penalties on the model caused by geometric factors (e.g., distance and aspect ratio), as shown in <xref ref-type="disp-formula" rid="eq6">Equations 6</xref>-<xref ref-type="disp-formula" rid="eq8">8</xref>.</p>
<disp-formula id="eq6"><label>(6)</label>
<mml:math display="block" id="M6"><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:msub><mml:mtext>L</mml:mtext><mml:mrow><mml:mtext>W</mml:mtext><mml:mrow><mml:mtext>I</mml:mtext></mml:mrow><mml:mtext>o</mml:mtext><mml:mrow><mml:mtext>U</mml:mtext></mml:mrow></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mtext>r</mml:mtext><mml:mo>&#xb7;</mml:mo><mml:msub><mml:mtext>R</mml:mtext><mml:mrow><mml:mtext>W</mml:mtext><mml:mrow><mml:mtext>I</mml:mtext></mml:mrow><mml:mtext>o</mml:mtext><mml:mrow><mml:mtext>U</mml:mtext></mml:mrow></mml:mrow></mml:msub><mml:msub><mml:mtext>L</mml:mtext><mml:mrow><mml:mtext>I</mml:mtext><mml:mrow><mml:mtext>o</mml:mtext></mml:mrow><mml:mtext>U</mml:mtext></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mtext>r</mml:mtext><mml:mo>=</mml:mo><mml:mfrac><mml:mtext>&#x3b2;</mml:mtext><mml:mrow><mml:mtext>&#x3b4;</mml:mtext><mml:msup><mml:mtext>&#x3b1;</mml:mtext><mml:mrow><mml:mtext>&#x3b2;</mml:mtext><mml:mo>-</mml:mo><mml:mtext>&#x3b1;</mml:mtext></mml:mrow></mml:msup></mml:mrow></mml:mfrac></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<disp-formula id="eq7"><label>(7)</label>
<mml:math display="block" id="M7"><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:mtext>&#x3b2;</mml:mtext><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:msubsup><mml:mtext>L</mml:mtext><mml:mrow><mml:mtext>I</mml:mtext><mml:mrow><mml:mtext>o</mml:mtext></mml:mrow><mml:mtext>U</mml:mtext></mml:mrow><mml:mo>*</mml:mo></mml:msubsup></mml:mrow><mml:mrow><mml:mover accent="true"><mml:mrow><mml:msub><mml:mtext>L</mml:mtext><mml:mrow><mml:mtext>I</mml:mtext><mml:mrow><mml:mtext>o</mml:mtext></mml:mrow><mml:mtext>U</mml:mtext></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="true">&#xaf;</mml:mo></mml:mover></mml:mrow></mml:mfrac><mml:mo>&#x2208;</mml:mo><mml:mrow><mml:mo stretchy="false">[</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mo>,</mml:mo><mml:mrow><mml:mrow><mml:mo>+</mml:mo><mml:mo>&#x221e;</mml:mo></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<disp-formula id="eq8"><label>(8)</label>
<mml:math display="block" id="M8"><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:msub><mml:mtext>R</mml:mtext><mml:mrow><mml:mtext>W</mml:mtext><mml:mrow><mml:mtext>I</mml:mtext></mml:mrow><mml:mtext>o</mml:mtext><mml:mrow><mml:mtext>U</mml:mtext></mml:mrow></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mtext>e</mml:mtext><mml:mrow><mml:mtext>x</mml:mtext></mml:mrow><mml:mtext>p</mml:mtext><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext>x</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mtext>x</mml:mtext><mml:mrow><mml:mtext>g</mml:mtext><mml:mrow><mml:mtext>t</mml:mtext></mml:mrow></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mn>2</mml:mn></mml:msup><mml:mo>+</mml:mo><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext>y</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mtext>y</mml:mtext><mml:mrow><mml:mtext>g</mml:mtext><mml:mrow><mml:mtext>t</mml:mtext></mml:mrow></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mn>2</mml:mn></mml:msup></mml:mrow><mml:mrow><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mtext>c</mml:mtext><mml:mrow><mml:mtext>w</mml:mtext></mml:mrow><mml:mn>2</mml:mn></mml:msubsup><mml:mo>+</mml:mo><mml:msubsup><mml:mtext>c</mml:mtext><mml:mrow><mml:mtext>h</mml:mtext></mml:mrow><mml:mn>2</mml:mn></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>*</mml:mo></mml:msup></mml:mrow></mml:mfrac></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>In the formula, <inline-formula>
<mml:math display="inline" id="im29"><mml:mrow><mml:msub><mml:mtext>L</mml:mtext><mml:mrow><mml:mi>IoU</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2208;</mml:mo><mml:mrow><mml:mo stretchy="false">[</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">]</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> denotes the IoU loss, which will weaken the penalty term for high-quality anchor boxes and strengthen its focus on the distance between center points when the overlap between the anchor box and the predicted bounding box is high; <inline-formula>
<mml:math display="inline" id="im30"><mml:mrow><mml:msub><mml:mtext>R</mml:mtext><mml:mrow><mml:mi>WIoU</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2208;</mml:mo><mml:mrow><mml:mo stretchy="false">[</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mi>exp</mml:mi></mml:mrow><mml:mo stretchy="false">]</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> denotes the penalty term of Wise-IoU, which strengthens the loss of anchor boxes of average quality. Superscript <inline-formula>
<mml:math display="inline" id="im31"><mml:mo>*</mml:mo></mml:math></inline-formula> denotes that it does not participate in backpropagation, which effectively prevents the network model from generating non-convergent gradients. <inline-formula>
<mml:math display="inline" id="im32"><mml:mrow><mml:mover accent="true"><mml:mrow><mml:msub><mml:mtext>L</mml:mtext><mml:mrow><mml:mi>IoU</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="true">&#xaf;</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula> serves as a normalization factor, denoting the incremental moving average. <inline-formula>
<mml:math display="inline" id="im33"><mml:mtext>&#x3b2;</mml:mtext></mml:math></inline-formula> denotes the outlier degree: the smaller its value, the higher the anchor box quality; the larger its value, the lower the anchor box quality. Based on this, a bidirectional gradient gain adjustment strategy is designed: for high-quality anchor boxes with low <inline-formula>
<mml:math display="inline" id="im34"><mml:mtext>&#x3b2;</mml:mtext></mml:math></inline-formula>, small gradient gains are assigned; for weak-feature anchor boxes with high <inline-formula>
<mml:math display="inline" id="im35"><mml:mtext>&#x3b2;</mml:mtext></mml:math></inline-formula>, large gradient gains are assigned. This effectively reduces harmful gradients from low-quality training samples, so as to make the bounding box regression loss focus on anchor boxes of average quality, and ultimately improve the detection robustness of the network to pakchoi pest and disease scenarios.</p>
</sec>
</sec>
<sec id="s2_6">
<label>2.6</label>
<title>Model training and evaluation metrics</title>
<sec id="s2_6_1">
<label>2.6.1</label>
<title>Experimental environment and training strategies</title>
<p>The experiments were conducted on a Windows 11 system, with the deep learning model implemented in PyTorch. Experimental environment parameters are summarized in <xref ref-type="table" rid="T3"><bold>Table&#xa0;3</bold></xref>.</p>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Training environment and hardware platform parameters table.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Parameters</th>
<th valign="middle" align="center">Configuration</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">CPU</td>
<td valign="middle" align="center">AMD Ryzen7-7735H</td>
</tr>
<tr>
<td valign="middle" align="center">GPU</td>
<td valign="middle" align="center">NVIDIA GeForce RTX 4090</td>
</tr>
<tr>
<td valign="middle" align="center">GPU memory size</td>
<td valign="middle" align="center">24GB</td>
</tr>
<tr>
<td valign="middle" align="center">Operating systems</td>
<td valign="middle" align="center">Windows 11</td>
</tr>
<tr>
<td valign="middle" align="center">Deep learning architecture</td>
<td valign="middle" align="center">PyTorch 2.3.1 + Python3.9.19 + CUDA12.1</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The hyperparameters were configured to optimize model training and validation efficiency while maintaining performance and accuracy. Detailed settings are listed in <xref ref-type="table" rid="T4"><bold>Table&#xa0;4</bold></xref>.</p>
<table-wrap id="T4" position="float">
<label>Table&#xa0;4</label>
<caption>
<p>Some key parameters set during model training.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Parameters</th>
<th valign="middle" align="center">Setup</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">Epochs</td>
<td valign="middle" align="center">200</td>
</tr>
<tr>
<td valign="middle" align="center">Batch size</td>
<td valign="middle" align="center">32</td>
</tr>
<tr>
<td valign="middle" align="center">Input image size</td>
<td valign="middle" align="center">640&#xd7;640</td>
</tr>
<tr>
<td valign="middle" align="center">Workers</td>
<td valign="middle" align="center">8</td>
</tr>
<tr>
<td valign="middle" align="center">Initial learning rate</td>
<td valign="middle" align="center">0.01</td>
</tr>
<tr>
<td valign="middle" align="center">Optimizer</td>
<td valign="middle" align="center">SGD</td>
</tr>
<tr>
<td valign="middle" align="center">Weight decay</td>
<td valign="middle" align="center">0.0005</td>
</tr>
<tr>
<td valign="middle" align="center">Momentum</td>
<td valign="middle" align="center">0.937</td>
</tr>
<tr>
<td valign="middle" align="center">Close mosaic</td>
<td valign="middle" align="center">10</td>
</tr>
<tr>
<td valign="middle" align="center">Patience</td>
<td valign="middle" align="center">50</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s2_6_2">
<label>2.6.2</label>
<title>Model evaluation indicators</title>
<p>To comprehensively evaluate the performance of the multi-scenario small target detection model for pakchoi pests and diseases, this study adopts Precision (P), Recall (R), mean average precision (mAP), Floating-point Operations (FLOPs), Parameters, and model size (MB) as evaluation metrics. Based on the matching relationship between ground truth annotations and prediction results in object detection tasks, samples are classified into four categories: True Positives (TP, predicted as positive and actually positive), False Positives (FP, predicted as positive but actually negative), True Negatives (TN, predicted as negative and actually negative), and False Negatives (FN, predicted as negative but actually positive). The calculations of relevant metrics are shown in <xref ref-type="disp-formula" rid="eq9">Equations 9</xref>-<xref ref-type="disp-formula" rid="eq12">12</xref>.</p>
<disp-formula id="eq9"><label>(9)</label>
<mml:math display="block" id="M9"><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:mtext>P</mml:mtext><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:msub><mml:mtext>T</mml:mtext><mml:mtext>p</mml:mtext></mml:msub></mml:mrow><mml:mrow><mml:msub><mml:mtext>T</mml:mtext><mml:mtext>p</mml:mtext></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mtext>F</mml:mtext><mml:mtext>N</mml:mtext></mml:msub></mml:mrow></mml:mfrac><mml:mo>&#xd7;</mml:mo><mml:mn>100</mml:mn><mml:mo>%</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<disp-formula id="eq10"><label>(10)</label>
<mml:math display="block" id="M10"><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:mtext>R</mml:mtext><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:msub><mml:mtext>T</mml:mtext><mml:mtext>P</mml:mtext></mml:msub></mml:mrow><mml:mrow><mml:msub><mml:mtext>T</mml:mtext><mml:mtext>P</mml:mtext></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mtext>F</mml:mtext><mml:mtext>N</mml:mtext></mml:msub></mml:mrow></mml:mfrac><mml:mo>&#xd7;</mml:mo><mml:mn>100</mml:mn><mml:mo>%</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<disp-formula id="eq11"><label>(11)</label>
<mml:math display="block" id="M11"><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:mtext>A</mml:mtext><mml:mrow><mml:mtext>P</mml:mtext></mml:mrow><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:mrow><mml:msubsup><mml:mo>&#x222b;</mml:mo><mml:mn>0</mml:mn><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msubsup><mml:mrow><mml:mtext>P</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mtext>R</mml:mtext><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mtext>d</mml:mtext><mml:mrow><mml:mtext>R</mml:mtext></mml:mrow></mml:mrow></mml:mrow></mml:mstyle></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<disp-formula id="eq12"><label>(12)</label>
<mml:math display="block" id="M12"><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:mi>mAP</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mi>n</mml:mi></mml:mfrac><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>n</mml:mi></mml:munderover><mml:mi>A</mml:mi><mml:msub><mml:mi>P</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mi>m</mml:mi><mml:mi>A</mml:mi><mml:mi>P</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mi>n</mml:mi></mml:mfrac><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>n</mml:mi></mml:munderover><mml:mi>A</mml:mi><mml:msub><mml:mi>P</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>Herein, Precision (P) reflects the reliability of predicted positive samples; Recall (R) reflects the model&#x2019;s ability to identify true positive samples. Average Precision (AP) denotes the average precision of a specific category, while mean Average Precision (mAP) represents the average of the average precisions across all categories. The larger the mAP value, the higher the average precision of the model and the better the detection performance.</p>
</sec>
</sec>
</sec>
<sec id="s3" sec-type="results">
<label>3</label>
<title>Results</title>
<sec id="s3_1">
<label>3.1</label>
<title>Analysis of convergence experiment</title>
<p>Visualization of loss curves can intuitively reflect the convergence process of the model, thereby facilitating better adjustment of training strategies. The loss values include bounding box loss and distribution focal loss (used to evaluate the regression performance of object detection bounding boxes) as well as classification loss (used to evaluate classification performance) (<xref ref-type="bibr" rid="B45">Yang et&#xa0;al., 2023</xref>).</p>
<p>To systematically evaluate the stability of model performance and mitigate interference from random factors during training, this study conducted independent and repeated training and evaluation experiments for each model. The specific procedures were as follows: All experiments were performed under identical hardware (e.g., GPU model, memory configuration) and software (e.g., deep learning framework version, operating system) environments. Each model was trained independently for five repeated runs. Before each run, the model parameters and weights were reinitialized, and the input order of the training dataset was randomly shuffled to eliminate the influence of initial weights and data sequence on the results. The final reported performance metrics (e.g., accuracy, mAP) were calculated as the arithmetic mean of the results from the five runs, serving to quantify the stability of model performance. All model comparisons were based on these averaged metrics to ensure fairness and reliability in the evaluation.</p>
<p>As shown in <xref ref-type="fig" rid="f10"><bold>Figure&#xa0;10</bold></xref>, throughout the training process, the model exhibited no signs of overfitting or underfitting, indicating that it possesses good generalization ability and the capability to capture data patterns. As the number of training epochs increased, all three types of loss values decreased continuously. After the 130th epoch, the loss curves tended to converge and stabilize, suggesting that the model had reached the optimal state and could proceed to the stage of model performance evaluation.</p>
<fig id="f10" position="float">
<label>Figure&#xa0;10</label>
<caption>
<p>Loss value curves of YOLOv8-DBW.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1730683-g010.tif">
<alt-text content-type="machine-generated">Two line graphs compare training and validation loss metrics over 200 epochs. The left graph shows training loss, and the right shows validation loss. Each graph includes blue, orange, and green lines representing bounding box loss, classification loss, and distribution focal loss, respectively. All loss values decrease over time, with distribution focal loss having the lowest values in both graphs.</alt-text>
</graphic></fig>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Comparison of different attention mechanisms</title>
<p>To verify the rationality of introducing the EMA attention mechanism, this study independently embedded it into the backbone network of the original YOLOv8n model. Meanwhile, the four attention mechanisms (SE, CA, ECA, and CBAM) were respectively introduced at the same position to conduct comparative experiments, and the experimental results are shown in <xref ref-type="table" rid="T5"><bold>Table&#xa0;5</bold></xref>. Data indicate that the model with the EMA attention mechanism introduced achieves an Accuracy of 83.2% and a mean average precision (mAP) of 80.1%, with both indicators being higher than those of the models incorporating the other four attention mechanisms. In addition, the Recall of this model is 71.6%, which is only 0.4 percentage points lower than that of the model with the SE attention mechanism introduced. Overall, the EMA mechanism shows obvious advantages in terms of core detection accuracy indicators.</p>
<table-wrap id="T5" position="float">
<label>Table&#xa0;5</label>
<caption>
<p>Comparison of the effects between different attention mechanisms.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Attention mechanisms</th>
<th valign="middle" align="center">P/%</th>
<th valign="middle" align="center">R/%</th>
<th valign="middle" align="center">mAP/%</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">EMA</td>
<td valign="middle" align="center">83.2</td>
<td valign="middle" align="center">71.6</td>
<td valign="middle" align="center">80.1</td>
</tr>
<tr>
<td valign="middle" align="center">SE</td>
<td valign="middle" align="center">82.0</td>
<td valign="middle" align="center">72</td>
<td valign="middle" align="center">79.5</td>
</tr>
<tr>
<td valign="middle" align="center">CA</td>
<td valign="middle" align="center">83.0</td>
<td valign="middle" align="center">70.6</td>
<td valign="middle" align="center">79.8</td>
</tr>
<tr>
<td valign="middle" align="center">ECA</td>
<td valign="middle" align="center">83.1</td>
<td valign="middle" align="center">70.4</td>
<td valign="middle" align="center">79.9</td>
</tr>
<tr>
<td valign="middle" align="center">CBAM</td>
<td valign="middle" align="center">82.9</td>
<td valign="middle" align="center">68.8</td>
<td valign="middle" align="center">79.7</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Ablation experiment</title>
<p>To verify the effectiveness of each improvement in the modified YOLOv8n, this study set up 8 ablation experiment schemes to validate the effectiveness of the modified YOLOv8n modules, with the results shown in <xref ref-type="table" rid="T6"><bold>Table&#xa0;6</bold></xref>.</p>
<table-wrap id="T6" position="float">
<label>Table&#xa0;6</label>
<caption>
<p>Results of ablation experiment.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Test</th>
<th valign="middle" align="center">C2f-PE</th>
<th valign="middle" align="center">BiFPN</th>
<th valign="middle" align="center">Wise-IoU</th>
<th valign="middle" align="center">P/%</th>
<th valign="middle" align="center">R/%</th>
<th valign="middle" align="center">mAP/%</th>
<th valign="middle" align="center">FLOPs/G</th>
<th valign="middle" align="center">Parameters/M</th>
<th valign="middle" align="center">Model size/MB</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">81.4</td>
<td valign="middle" align="center">69.8</td>
<td valign="middle" align="center">77.8</td>
<td valign="middle" align="center">8.1</td>
<td valign="middle" align="center">3.0</td>
<td valign="middle" align="center">6.3</td>
</tr>
<tr>
<td valign="middle" align="center">2</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">83.3</td>
<td valign="middle" align="center">70.8</td>
<td valign="middle" align="center">80.5</td>
<td valign="middle" align="center">7.3</td>
<td valign="middle" align="center">2.7</td>
<td valign="middle" align="center">5.8</td>
</tr>
<tr>
<td valign="middle" align="center">3</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">85.7</td>
<td valign="middle" align="center">74.1</td>
<td valign="middle" align="center">84.3</td>
<td valign="middle" align="center">7.0</td>
<td valign="middle" align="center">2.0</td>
<td valign="middle" align="center">4.3</td>
</tr>
<tr>
<td valign="middle" align="center">4</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">86.4</td>
<td valign="middle" align="center">75.3</td>
<td valign="middle" align="center">85.3</td>
<td valign="middle" align="center">7.0</td>
<td valign="middle" align="center">2.0</td>
<td valign="middle" align="center">4.3</td>
</tr>
<tr>
<td valign="middle" align="center">5</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">82.4</td>
<td valign="middle" align="center">71.5</td>
<td valign="middle" align="center">78.8</td>
<td valign="middle" align="center">7.1</td>
<td valign="middle" align="center">1.9</td>
<td valign="middle" align="center">4.2</td>
</tr>
<tr>
<td valign="middle" align="center">6</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">83.2</td>
<td valign="middle" align="center">73.5</td>
<td valign="middle" align="center">81.5</td>
<td valign="middle" align="center">7.1</td>
<td valign="middle" align="center">1.9</td>
<td valign="middle" align="center">4.2</td>
</tr>
<tr>
<td valign="middle" align="center">7</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">82.9</td>
<td valign="middle" align="center">71.0</td>
<td valign="middle" align="center">79.3</td>
<td valign="middle" align="center">8.1</td>
<td valign="middle" align="center">3.0</td>
<td valign="middle" align="center">6.3</td>
</tr>
<tr>
<td valign="middle" align="center">8</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">84.3</td>
<td valign="middle" align="center">71.6</td>
<td valign="middle" align="center">82.5</td>
<td valign="middle" align="center">7.3</td>
<td valign="middle" align="center">2.7</td>
<td valign="middle" align="center">5.8</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>P denotes precision, R denotes recall, and mAP represents the mean average precision. &#x201c;&#x221a;&#x201d; indicates the use of this improvement, while &#x201c;-&#x201d; signifies that the improvement was not applied.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>As shown in <xref ref-type="table" rid="T6"><bold>Table&#xa0;6</bold></xref>, after introducing the C2f-PE module, the feature extraction capability of the model was notably enhanced compared with the original baseline. Specifically, Precision (P), Recall (R), and mean Average Precision (mAP) increased by 1.9, 1.0, and 2.7 percentage points, respectively. This improvement can be attributed to the EMA attention mechanism, which effectively suppresses interference caused by occlusion and enhances the model&#x2019;s focus on small-target features. Meanwhile, due to the lightweight convolutional design of the PConv module, the number of floating-point operations (FLOPs) was reduced by 0.8 G, while the model size and parameter count decreased by 0.3 MB and 0.5 M, respectively. After further integrating the BiFPN module, the model achieved additional performance gains and improved computational efficiency. Compared with the original model, P, R, and mAP increased by 4.3, 4.3, and 6.5 percentage points, respectively. At the same time, the number of parameters and model size were reduced by 33.3% and 31.8%, while FLOPs decreased by 13.6%, indicating a significant improvement in lightweight performance. These results demonstrate that BiFPN effectively enhances multi-scale feature fusion while reducing redundant computations. Finally, after replacing the original loss function with the Wise-IoU loss, the model&#x2019;s detection performance was further improved, with P, R, and mAP increasing by 5.0, 5.5, and 7.5 percentage points, respectively. This result suggests that Wise-IoU improves the accuracy and stability of bounding box regression, thereby enhancing overall detection robustness. Based on the ablation experiment results, each individual module contributes positively to performance improvement. When all proposed modules are combined, the model achieves optimal performance across all evaluation metrics, confirming the effectiveness of the proposed improvements.</p>
</sec>
<sec id="s3_4">
<label>3.4</label>
<title>Analysis of comparative experiments on different IoU losses</title>
<p>To verify the effectiveness of the proposed Wise-IoU loss function in the pakchoi pest and disease detection task, training was conducted using YOLOv8&#x2019;s default CIoU as well as existing mainstream regression loss functions including DIoU, GIoU, SIoU, and EIoU. The evaluation metrics adopted mAP at IoU thresholds of 0.5 and 0.5&#x2013;0.95. The experimental results are shown in <xref ref-type="table" rid="T7"><bold>Table&#xa0;7</bold></xref>. As indicated in <xref ref-type="table" rid="T6"><bold>Table&#xa0;6</bold></xref>, compared with the default CIoU of YOLOv8n, the proposed Wise-IoU increased mAP@0.5 and mAP@0.5:0.95 by 1.5 and 1.3 percentage points respectively. Among all comparative methods, Wise-IoU achieved the optimal precision, verifying its superiority in agricultural disease detection scenarios.</p>
<table-wrap id="T7" position="float">
<label>Table&#xa0;7</label>
<caption>
<p>Performance comparison of different IoU loss.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Model</th>
<th valign="middle" align="center">mAP@0.5</th>
<th valign="middle" align="center">mAP@0.5:0.95</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">YOLOv8n + CIoU</td>
<td valign="middle" align="center">77.8</td>
<td valign="middle" align="center">59.5</td>
</tr>
<tr>
<td valign="middle" align="center">YOLOv8n + DIoU</td>
<td valign="middle" align="center">76.2</td>
<td valign="middle" align="center">58.2</td>
</tr>
<tr>
<td valign="middle" align="center">YOLOv8n + GIoU</td>
<td valign="middle" align="center">77.6</td>
<td valign="middle" align="center">59.1</td>
</tr>
<tr>
<td valign="middle" align="center">YOLOv8n + SIoU</td>
<td valign="middle" align="center">77.5</td>
<td valign="middle" align="center">59.2</td>
</tr>
<tr>
<td valign="middle" align="center">YOLOv8n + EIoU</td>
<td valign="middle" align="center">77.4</td>
<td valign="middle" align="center">59.7</td>
</tr>
<tr>
<td valign="middle" align="center">YOLOv8n + Wise-IoU</td>
<td valign="middle" align="center">79.3</td>
<td valign="middle" align="center">60.8</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>mAP@0.5 and mAP@0.5:0.95 represent the mean average precision at IoUthresholds of 0.5 and 0.5 to 0.95.</p></fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="s3_5">
<label>3.5</label>
<title>Comparison of mainstream object detection models</title>
<p>To evaluate the performance of the proposed YOLOv8-DBW model, comparative experiments were conducted against several mainstream object detection methods, including Faster R-CNN, SSD, YOLOv5s, YOLOv5n, YOLOv7-tiny, YOLOv10n, YOLOv11n, and YOLOv12n. To ensure fairness and scientific rigor, all benchmark models were retrained on the same pakchoi pest and disease dataset using identical hardware environments and hyperparameter configurations, as specified in <xref ref-type="table" rid="T3"><bold>Tables&#xa0;3</bold></xref> and <xref ref-type="table" rid="T4"><bold>4</bold></xref>. To minimize the effects of experimental randomness, each model was independently trained five times, and the reported performance metrics represent the arithmetic mean of the five runs. The comparison results are summarized in <xref ref-type="table" rid="T8"><bold>Table&#xa0;8</bold></xref>. As shown in <xref ref-type="table" rid="T8"><bold>Table&#xa0;8</bold></xref>, the proposed YOLOv8-DBW model achieved superior detection performance compared with all benchmark models. Specifically, its mean average precision (mAP) exceeded that of Faster R-CNN, SSD, YOLOv5s, YOLOv5n, YOLOv7-tiny, YOLOv10n, YOLOv11n, and YOLOv12n by 23.4, 19.1, 10.0, 11.5, 14.0, 6.9, 7.8, and 12.1 percentage points, respectively. Meanwhile, the number of model parameters was reduced by 96.7%, 94.4%, 71.5%, 20.0%, 66.7%, 25.9%, 20.0%, and 20.0% compared with the corresponding models. Although the FLOPs of YOLOv8-DBW are slightly higher than those of YOLOv5n and approximately 0.6 G and 1.0 G higher than those of YOLOv11n and YOLOv12n, respectively, they remain substantially lower than those of the other compared models. In addition, the model size of YOLOv8-DBW is reduced by 96.7%, 91.3%, 73.7%, 5.3%, 69.5%, 33.3%, 30.8%, and 29.5%, respectively, meeting the requirements for lightweight deployment. Although YOLOv5n, YOLOv11n, and YOLOv12n achieve higher inference speeds in terms of frames per second (FPS), their parameter counts and model sizes are larger than those of YOLOv8-DBW, and their detection accuracy remains lower. Overall, the proposed YOLOv8-DBW model demonstrates a more favorable balance among detection accuracy, computational efficiency, and model compactness.</p>
<table-wrap id="T8" position="float">
<label>Table&#xa0;8</label>
<caption>
<p>Performance comparison of mainstream models.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Models</th>
<th valign="middle" align="center">Precision/%</th>
<th valign="middle" align="center">Recall/%</th>
<th valign="middle" align="center">mAP@0.5/%</th>
<th valign="middle" align="center">FLOPs/G</th>
<th valign="middle" align="center">Parameters/M</th>
<th valign="middle" align="center">FPS</th>
<th valign="middle" align="center">Model size/MB</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">Faster-RCNN</td>
<td valign="middle" align="center">63.8</td>
<td valign="middle" align="center">62.1</td>
<td valign="middle" align="center">61.9</td>
<td valign="middle" align="center">121.4</td>
<td valign="middle" align="center">60.1</td>
<td valign="middle" align="center">38.5</td>
<td valign="middle" align="center">108.6</td>
</tr>
<tr>
<td valign="middle" align="center">SSD</td>
<td valign="middle" align="center">65.3</td>
<td valign="middle" align="center">60.2</td>
<td valign="middle" align="center">66.2</td>
<td valign="middle" align="center">61.2</td>
<td valign="middle" align="center">35.2</td>
<td valign="middle" align="center">41.7</td>
<td valign="middle" align="center">41.3</td>
</tr>
<tr>
<td valign="middle" align="center">YOLOv5s</td>
<td valign="middle" align="center">79.1</td>
<td valign="middle" align="center">70.7</td>
<td valign="middle" align="center">75.3</td>
<td valign="middle" align="center">15.7</td>
<td valign="middle" align="center">7.0</td>
<td valign="middle" align="center">69.8</td>
<td valign="middle" align="center">13.7</td>
</tr>
<tr>
<td valign="middle" align="center">YOLOv5n</td>
<td valign="middle" align="center">75.3</td>
<td valign="middle" align="center">71.3</td>
<td valign="middle" align="center">73.8</td>
<td valign="middle" align="center">4.2</td>
<td valign="middle" align="center">2.5</td>
<td valign="middle" align="center">99.3</td>
<td valign="middle" align="center">3.8</td>
</tr>
<tr>
<td valign="middle" align="center">YOLOv7-tiny</td>
<td valign="middle" align="center">71.2</td>
<td valign="middle" align="center">69.5</td>
<td valign="middle" align="center">71.3</td>
<td valign="middle" align="center">13.2</td>
<td valign="middle" align="center">6.0</td>
<td valign="middle" align="center">78.8</td>
<td valign="middle" align="center">11.8</td>
</tr>
<tr>
<td valign="middle" align="center">YOLOv10n</td>
<td valign="middle" align="center">84.4</td>
<td valign="middle" align="center">68.8</td>
<td valign="middle" align="center">78.4</td>
<td valign="middle" align="center">8.4</td>
<td valign="middle" align="center">2.7</td>
<td valign="middle" align="center">88.6</td>
<td valign="middle" align="center">5.4</td>
</tr>
<tr>
<td valign="middle" align="center">YOLOv11n</td>
<td valign="middle" align="center">75.9</td>
<td valign="middle" align="center">74.7</td>
<td valign="middle" align="center">77.5</td>
<td valign="middle" align="center">6.4</td>
<td valign="middle" align="center">2.5</td>
<td valign="middle" align="center">97.4</td>
<td valign="middle" align="center">5.2</td>
</tr>
<tr>
<td valign="middle" align="center">YOLOv12n</td>
<td valign="middle" align="center">78.2</td>
<td valign="middle" align="center">70.7</td>
<td valign="middle" align="center">73.2</td>
<td valign="middle" align="center">6.0</td>
<td valign="middle" align="center">2.5</td>
<td valign="middle" align="center">111.2</td>
<td valign="middle" align="center">5.1</td>
</tr>
<tr>
<td valign="middle" align="center">YOLOv8n-DBW</td>
<td valign="middle" align="center">86.4</td>
<td valign="middle" align="center">75.3</td>
<td valign="middle" align="center">85.3</td>
<td valign="middle" align="center">7.0</td>
<td valign="middle" align="center">2.0</td>
<td valign="middle" align="center">95.1</td>
<td valign="middle" align="center">4.3</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The radar chart results characterizing the comprehensive performance of the models (<xref ref-type="fig" rid="f11"><bold>Figure&#xa0;11</bold></xref>) show that the improved YOLOv8-DBW model has the most full and complete area, indicating that its performance in all aspects is closer to the ideal state compared with other models. In summary, the YOLOv8-DBW algorithm proposed in this study has demonstrated its superiority in multiple metrics.</p>
<fig id="f11" position="float">
<label>Figure&#xa0;11</label>
<caption>
<p>Radar chart of the comprehensive performance comparison of the mainstream.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1730683-g011.tif">
<alt-text content-type="machine-generated">Radar chart comparing various object detection models, including Faster-RCNN, SSD, and several YOLO versions. Metrics measured are precision, recall, mean average precision at 0.5 percent, model size in megabytes, and parameters in millions. Each model is represented by different colored markers and lines.</alt-text>
</graphic></fig>
</sec>
<sec id="s3_6">
<label>3.6</label>
<title>Performance across different categories</title>
<p>To further evaluate the robustness of the proposed model under class imbalance, detailed performance metrics for each of the seven pest and disease categories are summarized in <xref ref-type="table" rid="T9"><bold>Table&#xa0;9</bold></xref>. Despite the variation in sample counts among different categories, the YOLOv8-DBW model achieved consistently strong performance across all classes. Specifically, the minority class Black Rot attained an mAP of 83.9%, which is highly comparable to that of the majority class Diamondback Moth at 87.4%. This relatively balanced performance across categories suggests that the proposed model is less sensitive to sample imbalance. Such robustness can be attributed to the synergistic effect of the enhanced feature extraction capability provided by the C2f-PE module and the dynamic sample weighting mechanism introduced by the Wise-IoU loss function, which together help mitigate potential bias toward majority classes.</p>
<table-wrap id="T9" position="float">
<label>Table&#xa0;9</label>
<caption>
<p>Detailed recognition performance for the seven categories.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Category</th>
<th valign="middle" align="center">Sample count</th>
<th valign="middle" align="center">Precision (%)</th>
<th valign="middle" align="center">Recall (%)</th>
<th valign="middle" align="center">mAP@0.5 (%)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">Diamondback Moth</td>
<td valign="middle" align="center">1085</td>
<td valign="middle" align="center">88.5</td>
<td valign="middle" align="center">78.2</td>
<td valign="middle" align="center">87.4</td>
</tr>
<tr>
<td valign="middle" align="center">Leaf Miner</td>
<td valign="middle" align="center">842</td>
<td valign="middle" align="center">87.1</td>
<td valign="middle" align="center">77.0</td>
<td valign="middle" align="center">86.2</td>
</tr>
<tr>
<td valign="middle" align="center">Downy Mildew</td>
<td valign="middle" align="center">992</td>
<td valign="middle" align="center">85.9</td>
<td valign="middle" align="center">74.5</td>
<td valign="middle" align="center">84.8</td>
</tr>
<tr>
<td valign="middle" align="center">Alternaria Leaf Spot</td>
<td valign="middle" align="center">753</td>
<td valign="middle" align="center">84.8</td>
<td valign="middle" align="center">73.6</td>
<td valign="middle" align="center">84.1</td>
</tr>
<tr>
<td valign="middle" align="center">Black Rot</td>
<td valign="middle" align="center">745</td>
<td valign="middle" align="center">85.2</td>
<td valign="middle" align="center">72.8</td>
<td valign="middle" align="center">83.9</td>
</tr>
<tr>
<td valign="middle" align="center">White Rust</td>
<td valign="middle" align="center">855</td>
<td valign="middle" align="center">86.5</td>
<td valign="middle" align="center">75.4</td>
<td valign="middle" align="center">85.5</td>
</tr>
<tr>
<td valign="middle" align="center">White Spot</td>
<td valign="middle" align="center">838</td>
<td valign="middle" align="center">86.8</td>
<td valign="middle" align="center">75.6</td>
<td valign="middle" align="center">85.2</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3_7">
<label>3.7</label>
<title>Model visualization analysis</title>
<p>Based on the experimental results of mainstream models, by selecting and verifying better model algorithms for comparative analysis, the YOLOv5n, YOLOv10n, YOLOv11n, YOLOv12n, and the improved YOLOv8-DBW algorithm proposed in this study were chosen for visual comparison in the detection results of pakchoi pests and diseases, with the detection results shown in <xref ref-type="fig" rid="f12"><bold>Figure&#xa0;12</bold></xref>.</p>
<fig id="f12" position="float">
<label>Figure&#xa0;12</label>
<caption>
<p>Recognition performance of different models for pakchoi diseases and pests. In the figure, red circles indicate false detections.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1730683-g012.tif">
<alt-text content-type="machine-generated">Comparison of plant disease detection across seven columns labeled as Alternaria Leaf Spot, Diamondback Moth, Black Rot, Leaf Miner, Downy Mildew, White Rust, and White Spot. Rows represent original images and detection results from models named YOLOv5s, YOLOv8n, YOLOv10n, YOLOv11n, YOLOv12n, and DBW-YOLOv8. Each model&#x2019;s detection is marked with colored boxes or circles highlighting affected areas on leaves.</alt-text>
</graphic></fig>
<p>As shown in <xref ref-type="fig" rid="f12"><bold>Figure&#xa0;12</bold></xref>, the comparison of detection confidence on the test set reveals that some models exhibit noticeable cross-misclassification and background false detection issues in the identification of pakchoi pests and diseases. Specifically, YOLOv12 misidentifies Alternaria Leaf Spot as White Rust, and YOLOv11n makes a similar error in detecting White Spot. Moreover, in the detection of Black Rot, YOLOv5s, YOLOv10n, and YOLOv11n all misclassify background areas as Diamondback Moth. In contrast, the YOLOv8-DBW model shows no misclassification across all cases and achieves significantly higher detection accuracy than the other models. These results confirm that the improvements made to YOLOv8n effectively enhance the detection performance for pakchoi pests and diseases, addressing the insufficient accuracy of existing models.</p>
</sec>
</sec>
<sec id="s4" sec-type="discussion">
<label>4</label>
<title>Discussion</title>
<p>The experimental results indicate that the proposed YOLOv8-DBW model achieves superior performance compared with traditional object detection frameworks. Rather than relying on isolated improvements, the proposed architecture forms a synergistic design in which the C2f-PE module stabilizes feature representation, the BiFPN network enhances multi-scale feature fusion efficiency, and the Wise-IoU loss refines bounding box regression accuracy. This coordinated design effectively addresses the limitations of the original YOLOv8n model, particularly its insufficient detection accuracy for small and occluded pest and disease targets in complex field environments. With the rapid development of deep learning technologies (<xref ref-type="bibr" rid="B25">Mu and Zeng, 2019</xref>; <xref ref-type="bibr" rid="B15">Liu, 2022</xref>), their applications have become widespread, leading to significant breakthroughs in crop pest and disease recognition in recent years (<xref ref-type="bibr" rid="B1">Ai et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B44">Xin and Wang, 2021</xref>). Traditional recognition methods heavily rely on manual detection, which is not only time-consuming and labor-intensive but also prone to reducing efficiency and accuracy due to human errors. While existing approaches based on models such as CNN and YOLO have partially alleviated these issues (<xref ref-type="bibr" rid="B49">Zhai et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B21">Ma and Pang, 2023</xref>; <xref ref-type="bibr" rid="B51">Zhao and Liu, 2024</xref>), they still face obvious bottlenecks in computational efficiency and deployment on edge devices. In resource-constrained environments, high computational and storage demands often hinder practical application. For instance, an improved convolutional neural network (CNN) was used to construct a lightweight model for identifying common pests and diseases in winter wheat, achieving a recognition accuracy of 96.02% (<xref ref-type="bibr" rid="B46">Yao et&#xa0;al., 2023</xref>). A deep learning model trained for cassava disease detection achieved an accuracy of up to 98% (<xref ref-type="bibr" rid="B29">Ramcharan et&#xa0;al., 2017</xref>). Similarly, for jute plant diseases, a deep learning network named YOLO-JD was proposed, which achieved the best detection performance with a mean average precision (mAP) of 96.63% (<xref ref-type="bibr" rid="B13">Li et&#xa0;al., 2022</xref>). Therefore, optimizing the model structure to improve inference efficiency is key to enhancing its adaptability.</p>
<p>Compared to other vegetables, leafy vegetables such as pakchoi are more susceptible to pests and diseases due to their edible parts being close to the soil, weak stress resistance, and high environmental sensitivity. In recent years, factors such as abnormal climate, continuous cropping obstacles, and soil degradation have further increased the pressure on pest and disease control. The YOLO model offers prominent advantages for pakchoi pest and disease detection, including high precision, real-time performance, and quantifiability, forming the foundation for precision agriculture. <xref ref-type="bibr" rid="B8">He et&#xa0;al. (2025)</xref> proposed the FV-YOLOv5s model, which broke through the bottleneck of detecting weak features of two types of pests and diseases (diamondback moth and downy mildew) in leafy vegetables. <xref ref-type="bibr" rid="B28">Qiang and Shi (2022)</xref> addressed the problems of scattered small targets and missed detection of clusters in pakchoi pest detection under wide scenarios, constructing a technical chain of &#x201c;block detection-hybrid model-edge-cloud collaboration&#x201d; to realize accurate identification and efficient deployment of pests in wide scenarios. <xref ref-type="bibr" rid="B52">Zheng et&#xa0;al. (2024)</xref> focused on the issue of missed detection of small targets for two types of pakchoi pests, proposing the YOLOPC model based on the YOLOv5s model. By optimizing the network with the CBAM attention mechanism and dilated convolution, synergistic optimization of accuracy and lightweight performance were achieved. Recent studies have explored YOLO-based improvements for specific pest or disease categories; however, most existing approaches focus on single or limited target types. In contrast, the present study targets multi-category pakchoi pest and disease detection by integrating data augmentation strategies and a lightweight yet robust detection framework, enabling stable performance across diverse categories and field conditions.</p>
<p>The proposed YOLOv8-DBW model not only improves detection accuracy but also significantly reduces computational cost and model size, making it suitable for deployment on embedded and mobile devices. This balance between accuracy and efficiency provides practical technical support for real-time field monitoring and precision agriculture applications. Despite its strong performance, this study has several limitations. First, although the current dataset encompasses diverse environmental conditions across three provinces, further expansion to include a wider array of crop cultivars and distinct climatic zones would further bolster the model&#x2019;s cross-region generalization ability. Second, the current model focuses on qualitative detection and does not provide quantitative assessment of disease severity. Third, the inherent black-box nature of deep learning models limits interpretability in agricultural decision-making scenarios. Future research should expand dataset diversity, integrate severity estimation methods, and incorporate interpretability techniques such as Grad-CAM to enhance model transparency and decision support capability. Overall, this study clarifies the direction for subsequent optimization and supports the transition from pest and disease detection toward precision decision support in agricultural production.</p>
</sec>
<sec id="s5" sec-type="conclusion">
<label>5</label>
<title>Conclusion</title>
<p>To achieve rapid and accurate intelligent detection of pakchoi pests and diseases, the present study proposes an online detection method named YOLOv8-DBW, based on an improved YOLOv8n architecture. The model incorporates three key enhancements. First, in the backbone network, the original C2f module is replaced with a proposed C2f-PE module that integrates Partial Convolution (PConv) and an Efficient Multi-scale Attention (EMA) mechanism, enhancing the model&#x2019;s feature extraction capability, increasing its precision, recall, and mean average precision (mAP) by 1.9%, 1%, and 2.7%, respectively, while also achieving a preliminary level of lightweight design by reducing floating-point operations (FLOPs) by 0.8 G, model size by 0.3 MB, and the number of parameters by 0.5 M. Second, the BiFPN module is introduced to replace the original neck structure, which strengthens the model&#x2019;s ability to detect overlapping or dense pest and disease instances under complex backgrounds. This modification leads to increases in precision, recall, and mAP of 4.3%, 4.3%, and 6.5%, respectively, while reducing parameters by 33.3%, model size by 31.8%, and FLOPs by 13.6%, significantly improving computational efficiency. Third, the Wise-IoU is adopted as the new bounding-box regression loss function, which improves the model&#x2019;s ability to localize pest and disease features accurately, resulting in notable improvements in precision, recall, and mAP of 5.0%, 5.5%, and 7.5%, respectively. In the field of pakchoi pest and disease detection, the YOLOv8-DBW model shows significant improvements in the number of parameters, detection speed, and accuracy compared with classical object detection algorithms such as Faster R-CNN and SSD, as well as mainstream lightweight models including YOLOv5s, YOLOv5n, YOLOv7-tiny, YOLOv10n, YOLOv11n, and YOLOv12n. Therefore, for field cultivation, this model can be deployed on devices to identify pakchoi pests and diseases and provide early warning, and will also facilitate precision variable spraying of pesticides, thereby realizing precision and efficient prevention and control of pests and diseases.</p>
</sec>
</body>
<back>
<sec id="s6" sec-type="data-availability">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material. Further inquiries can be directed to the corresponding author.</p></sec>
<sec id="s7" sec-type="author-contributions">
<title>Author contributions</title>
<p>YZ: Investigation, Writing &#x2013; original draft, Visualization, Software. YH: Writing &#x2013; review &amp; editing, Investigation, Data curation, Formal Analysis. YY: Supervision, Writing &#x2013; review &amp; editing. SZ: Writing &#x2013; review &amp; editing, Supervision, Funding acquisition. YL: Writing &#x2013; review &amp; editing, Conceptualization, Methodology, Supervision. DH: Methodology, Validation, Writing &#x2013; review &amp; editing, Software.</p></sec>
<sec id="s9" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p></sec>
<sec id="s10" sec-type="ai-statement">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p></sec>
<sec id="s11" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p></sec>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Ai</surname> <given-names>Y.</given-names></name>
<name><surname>Sun</surname> <given-names>C.</given-names></name>
<name><surname>Tie</surname> <given-names>J.</given-names></name>
<name><surname>Hou</surname> <given-names>M.</given-names></name>
</person-group> (<year>2020</year>). 
<article-title>Research on recognition model of crop diseases and insect pests based on deep learning in harsh environments</article-title>. <source>IEEE Access</source> <volume>8</volume>, <fpage>171686</fpage>&#x2013;<lpage>171693</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ACCESS.2020.3025325</pub-id>
</mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Awika</surname> <given-names>H. O.</given-names></name>
<name><surname>Marconi</surname> <given-names>T. G.</given-names></name>
<name><surname>Bedre</surname> <given-names>R.</given-names></name>
<name><surname>Mandadi</surname> <given-names>K. K.</given-names></name>
<name><surname>Avila</surname> <given-names>C. A.</given-names></name>
</person-group> (<year>2019</year>). 
<article-title>Minor alleles are associated with white rust (Albugo occidentalis) susceptibility in spinach (Spinacia oleracea)</article-title>. <source>Horticulture</source> <volume>6</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41438-019-0214-7</pub-id>, PMID: <pub-id pub-id-type="pmid">31814982</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Caldeira</surname> <given-names>R. F.</given-names></name>
<name><surname>Santiago</surname> <given-names>W. E.</given-names></name>
<name><surname>Teruel</surname> <given-names>B. J.</given-names></name>
</person-group> (<year>2021</year>). 
<article-title>Identification of cotton leaf lesions using deep learning techniques</article-title>. <source>Sensors</source> <volume>21</volume>, <fpage>3169</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/s21093169</pub-id>, PMID: <pub-id pub-id-type="pmid">34063578</pub-id>
</mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Chen</surname> <given-names>X.</given-names></name>
<name><surname>Yang</surname> <given-names>X. T.</given-names></name>
<name><surname>Zhou</surname> <given-names>J. J.</given-names></name>
<name><surname>Zhu</surname> <given-names>K. K.</given-names></name>
<name><surname>Wang</surname> <given-names>H. Z.</given-names></name>
<name><surname>Zhang</surname> <given-names>C. Q.</given-names></name>
<etal/>
</person-group>. (<year>2025</year>). 
<article-title>DAMI-YOLOv8l: A multi-scale detection framework for light-trapping insect pest monitoring</article-title>. <source>Ecol. Inf.</source> <volume>86</volume>, <fpage>102789</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ecoinf.2025.103067</pub-id>
</mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Cheng</surname> <given-names>D. G.</given-names></name>
<name><surname>Zhao</surname> <given-names>Z. Q.</given-names></name>
<name><surname>Wang</surname> <given-names>M. Y.</given-names></name>
<name><surname>An</surname> <given-names>Q. S.</given-names></name>
<name><surname>Ma</surname> <given-names>Z. Y.</given-names></name>
<name><surname>Li</surname> <given-names>L.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>Rice diseases identification method based on improved YOLOv7-tiny</article-title>. <source>Agriculture</source> <volume>14</volume>, <fpage>718</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agriculture14050709</pub-id>
</mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Chenrui</surname> <given-names>K.</given-names></name>
<name><surname>Lin</surname> <given-names>J.</given-names></name>
<name><surname>Yu</surname> <given-names>Z.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>Attention-based multiscale feature pyramid network for corn pest detection under wild environment</article-title>. <source>Insects</source> <volume>13</volume>, <fpage>978</fpage>&#x2013;<lpage>978</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/insects13110978</pub-id>, PMID: <pub-id pub-id-type="pmid">36354802</pub-id>
</mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Genaev</surname> <given-names>M. A.</given-names></name>
<name><surname>Skolotneva</surname> <given-names>E. S.</given-names></name>
<name><surname>Gultyaeva</surname> <given-names>E. I.</given-names></name>
<name><surname>Orlova</surname> <given-names>E. A.</given-names></name>
<name><surname>Bechtold</surname> <given-names>N. P.</given-names></name>
<name><surname>Morozov</surname> <given-names>S. V.</given-names></name>
<etal/>
</person-group>. (<year>2021</year>). 
<article-title>Image-based wheat fungi diseases identification by deep learning</article-title>. <source>Plants</source> <volume>10</volume>, <fpage>500</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/plants10081500</pub-id>, PMID: <pub-id pub-id-type="pmid">34451545</pub-id>
</mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>He</surname> <given-names>H. J.</given-names></name>
<name><surname>Liu</surname> <given-names>Y. X.</given-names></name>
<name><surname>Wang</surname> <given-names>S. Y.</given-names></name>
</person-group> (<year>2025</year>). 
<article-title>Research on the detection algorithm for pests and diseases of leafy vegetables based on improved YOLO v5s</article-title>. <source>JiangSu Agric. Sci.</source> <volume>53</volume>, <fpage>244</fpage>&#x2013;<lpage>250</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.15889/j.issn.1002-1302.2025.05.035</pub-id>
</mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Hou</surname> <given-names>S. N.</given-names></name>
<name><surname>Zheng</surname> <given-names>N.</given-names></name>
<name><surname>Tang</surname> <given-names>L.</given-names></name>
<name><surname>Ji</surname> <given-names>X. F.</given-names></name>
<name><surname>Li</surname> <given-names>Y. N.</given-names></name>
</person-group> (<year>2018</year>). 
<article-title>Effects of cadmium and copper mixtures to carrot and pakchoi under greenhouse cultivation condition</article-title>. <source>Ecotoxicology And Environ. Safety.</source> <volume>159</volume>, <fpage>172</fpage>&#x2013;<lpage>181</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ecoenv.2018.04.060</pub-id>, PMID: <pub-id pub-id-type="pmid">29751225</pub-id>
</mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Huang</surname> <given-names>M.</given-names></name>
<name><surname>Mi</surname> <given-names>W. K.</given-names></name>
<name><surname>Wu</surname> <given-names>Y. X.</given-names></name>
<name><surname>Feng</surname> <given-names>Y.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>EDGS-YOLOv8: an improved YOLOv8 lightweight UAV detection model</article-title>. <source>Drones</source> <volume>8</volume>, <fpage>342</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/drones8070337</pub-id>
</mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Kellner</surname> <given-names>N.</given-names></name>
<name><surname>Antal</surname> <given-names>E.</given-names></name>
<name><surname>Nagy</surname> <given-names>A.</given-names></name>
<name><surname>Bujdos&#xf3;</surname> <given-names>G.</given-names></name>
<name><surname>Kov&#xe1;cs</surname> <given-names>S.</given-names></name>
<name><surname>Sipos</surname> <given-names>L.</given-names></name>
<etal/>
</person-group>. (<year>2022</year>). 
<article-title>The effect of black rot on grape berry composition</article-title>. <source>Acta Alimentaria.</source> <volume>51</volume>, <fpage>126</fpage>&#x2013;<lpage>133</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1556/066.2021.00195</pub-id>
</mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Levere</surname> <given-names>K. M.</given-names></name>
<name><surname>Bresnahan</surname> <given-names>A.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Bacillus thuringiensis resistance of diamondback moth in a broccoli crop</article-title>. <source>Ecol. Model.</source> <volume>495</volume>, <fpage>110797</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ecolmodel.2024.110787</pub-id>
</mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Li</surname> <given-names>D. W.</given-names></name>
<name><surname>Ahmed</surname> <given-names>F.</given-names></name>
<name><surname>Wu</surname> <given-names>J.</given-names></name>
<name><surname>Wu</surname> <given-names>H.</given-names></name>
<name><surname>Zhang</surname> <given-names>X.</given-names></name>
<name><surname>Wang</surname> <given-names>H.</given-names></name>
<etal/>
</person-group>. (<year>2022</year>). 
<article-title>YOLO-JD: A deep learning network for jute diseases and pests detection from images</article-title>. <source>Plants</source> <volume>11</volume>, <fpage>937</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/plants11070937</pub-id>, PMID: <pub-id pub-id-type="pmid">35406915</pub-id>
</mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Li</surname> <given-names>Y.</given-names></name>
<name><surname>Guo</surname> <given-names>Z. H.</given-names></name>
<name><surname>Yang</surname> <given-names>L.</given-names></name>
<name><surname>Wang</surname> <given-names>H. J.</given-names></name>
<name><surname>Zhang</surname> <given-names>Z.</given-names></name>
<name><surname>Chen</surname> <given-names>C.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>Weed detection algorithms in rice fields based on improved YOLOv10n</article-title>. <source>Agriculture</source> <volume>14</volume>, <fpage>1931</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agriculture14112066</pub-id>
</mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Liu</surname> <given-names>Y. X.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>Field weed recognition algorithm based on machine learning</article-title>. <source>J. Electronic Imaging</source> <volume>31</volume>, <fpage>051509</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1117/1.JEI.31.5.051413</pub-id>
</mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Liu</surname> <given-names>B.</given-names></name>
<name><surname>Fernandez</surname> <given-names>M. A.</given-names></name>
<name><surname>Kirk</surname> <given-names>W. W.</given-names></name>
<name><surname>Du Toit</surname> <given-names>L. J.</given-names></name>
<name><surname>Hausbeck</surname> <given-names>M. K.</given-names></name>
<name><surname>Quesada-Ocampo</surname> <given-names>L. M.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>Investigation of using hyperspectral vegetation indices to assess brassica downy mildew</article-title>. <source>Sensors</source> <volume>24</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/s24061916</pub-id>, PMID: <pub-id pub-id-type="pmid">38544179</pub-id>
</mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Liu</surname> <given-names>Q.</given-names></name>
<name><surname>Huang</surname> <given-names>W.</given-names></name>
<name><surname>Wang</surname> <given-names>Z.</given-names></name>
<name><surname>Yin</surname> <given-names>X.</given-names></name>
<name><surname>He</surname> <given-names>J.</given-names></name>
<name><surname>Liu</surname> <given-names>Y.</given-names></name>
<etal/>
</person-group>. (<year>2023</year>). 
<article-title>DSW-YOLOv8n: A new underwater target detection algorithm based on improved YOLOv8n</article-title>. <source>Electronics</source> <volume>12</volume>, <fpage>4001</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/electronics12183892</pub-id>
</mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Liu</surname> <given-names>Q.</given-names></name>
<name><surname>Lv</surname> <given-names>J.</given-names></name>
<name><surname>Ma</surname> <given-names>J.</given-names></name>
<name><surname>Sun</surname> <given-names>H.</given-names></name>
<name><surname>Li</surname> <given-names>J.</given-names></name>
<name><surname>Hu</surname> <given-names>B.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>MAE-YOLOv8-based small object detection of green crisp plum in real complex orchard environments</article-title>. <source>Comput. Electron. Agric.</source> <volume>226</volume>, <fpage>109231</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2024.109458</pub-id>
</mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Liu</surname> <given-names>S.</given-names></name>
<name><surname>Qi</surname> <given-names>L.</given-names></name>
<name><surname>Qin</surname> <given-names>H.</given-names></name>
<name><surname>Shi</surname> <given-names>J.</given-names></name>
<name><surname>Jia</surname> <given-names>J.</given-names></name>
</person-group> (<year>2018</year>). 
<article-title>Path aggregation network for instance segmentation</article-title>. <source>Proc. IEEE Conf. Comput. Vis. Pattern Recognit. (CVPR)</source>, <volume>2018</volume>, <fpage>8759</fpage>&#x2013;<lpage>8768</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/CVPR.2018.00913</pub-id>
</mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Long</surname> <given-names>Y.</given-names></name>
<name><surname>Lin</surname> <given-names>W. S.</given-names></name>
</person-group> (<year>2025</year>). 
<article-title>Surface defect detection of ultrathin fiberboard based on improved YOLOv8x</article-title>. <source>J. Nondestruct. Eval.</source> <volume>44</volume>, <fpage>34</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s10921-025-01196-8</pub-id>
</mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Ma</surname> <given-names>M. Y.</given-names></name>
<name><surname>Pang</surname> <given-names>H. L.</given-names></name>
</person-group> (<year>2023</year>). 
<article-title>SP-YOLOv8s: an improved YOLOv8s model for remote sensing image tiny object detection</article-title>. <source>Appl. Sci.</source> <volume>13</volume>, <fpage>8424</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/app13148161</pub-id>
</mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Ma</surname> <given-names>N.</given-names></name>
<name><surname>Su</surname> <given-names>Y. X.</given-names></name>
<name><surname>Wang</surname> <given-names>C.</given-names></name>
<name><surname>Li</surname> <given-names>H. T.</given-names></name>
<name><surname>Zhang</surname> <given-names>X.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Wheat seed detection and counting method based on improved YOLOv8 model</article-title>. <source>Sensors</source> <volume>24</volume>, <fpage>1396</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/s24051654</pub-id>, PMID: <pub-id pub-id-type="pmid">38475189</pub-id>
</mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Ma</surname> <given-names>L.</given-names></name>
<name><surname>Yu</surname> <given-names>Q. W.</given-names></name>
<name><surname>Zhang</surname> <given-names>J. R.</given-names></name>
<name><surname>Xing</surname> <given-names>C.</given-names></name>
<name><surname>Chen</surname> <given-names>C.</given-names></name>
<name><surname>Wang</surname> <given-names>W.</given-names></name>
<etal/>
</person-group>. (<year>2023</year>). 
<article-title>Maize leaf disease identification based on YOLOv5n algorithm incorporating attention mechanism</article-title>. <source>Agronomy</source> <volume>13</volume>, <fpage>516</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agronomy13020521</pub-id>
</mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Mamede</surname> <given-names>M. C.</given-names></name>
<name><surname>Mota</surname> <given-names>R. P.</given-names></name>
<name><surname>Silva</surname> <given-names>A. C. A.</given-names></name>
<name><surname>Tebaldi</surname> <given-names>N. D.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>Nanoparticles in inhibiting Pantoea ananatis and to control maize white spot</article-title>. <source>Cienc. Rural</source> <volume>52</volume>, <fpage>e20210147</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1590/0103-8478cr20210481</pub-id>
</mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Mu</surname> <given-names>R. H.</given-names></name>
<name><surname>Zeng</surname> <given-names>X. Q.</given-names></name>
</person-group> (<year>2019</year>). 
<article-title>A review of deep learning research</article-title>. <source>KSII Trans. Internet Inf. Syst.</source> <volume>13</volume>, <fpage>1738</fpage>&#x2013;<lpage>1764</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3837/tiis.2019.04.001</pub-id>
</mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Olmez</surname> <given-names>S.</given-names></name>
<name><surname>Mutlu</surname> <given-names>N.</given-names></name>
<name><surname>Demir</surname> <given-names>S.</given-names></name>
<name><surname>Ba&#x15f;ba&#x11f;ci</surname> <given-names>G.</given-names></name>
<name><surname>Aydo&#x11f;du</surname> <given-names>M.</given-names></name>
<name><surname>Bayraktar</surname> <given-names>H.</given-names></name>
<etal/>
</person-group>. (<year>2023</year>). 
<article-title>First report of alternaria alternata causing leaf spot diseases of cotton in turkiye</article-title>. <source>Plant Dis.</source> <volume>107</volume>, <fpage>3273</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1094/PDIS-04-23-0724-PDN</pub-id>
</mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Omer</surname> <given-names>S. M.</given-names></name>
<name><surname>Ghafoor</surname> <given-names>K. Z.</given-names></name>
<name><surname>Al-Talabani</surname> <given-names>A. K.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Lightweight improved yolov5 model for cucumber leaf disease and pest detection based on deep learning</article-title>. <source>Signal Image Video Process.</source> <volume>18</volume>, <fpage>1329</fpage>&#x2013;<lpage>1342</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11760-023-02865-9</pub-id>
</mixed-citation>
</ref>
<ref id="B28">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Qiang</surname> <given-names>Z.</given-names></name>
<name><surname>Shi</surname> <given-names>F. H.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>Pest disease detection of Brassica chinensis in wide scenes via machine vision: method and deployment</article-title>. <source>J. Plant Dis. Prot.</source> <volume>129</volume>, <fpage>533</fpage>&#x2013;<lpage>544</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s41348-021-00562-8</pub-id>
</mixed-citation>
</ref>
<ref id="B29">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Ramcharan</surname> <given-names>A.</given-names></name>
<name><surname>Baranowski</surname> <given-names>K.</given-names></name>
<name><surname>McCloskey</surname> <given-names>P.</given-names></name>
<name><surname>Ahmed</surname> <given-names>B.</given-names></name>
<name><surname>Legg</surname> <given-names>J.</given-names></name>
<name><surname>Hughes</surname> <given-names>D. P.</given-names></name>
<etal/>
</person-group>. (<year>2017</year>). 
<article-title>Deep learning for image-based cassava disease detection</article-title>. <source>Front. Plant Sci.</source> <volume>8</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2017.01852</pub-id>, PMID: <pub-id pub-id-type="pmid">29163582</pub-id>
</mixed-citation>
</ref>
<ref id="B30">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Roy</surname> <given-names>A. M.</given-names></name>
<name><surname>Bose</surname> <given-names>R.</given-names></name>
<name><surname>Bhaduri</surname> <given-names>J.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>A fast accurate fine-grain object detection model based on YOLOv4 deep neural network</article-title>. <source>Neural Comput. Appl.</source> <volume>34</volume>, <fpage>3895</fpage>&#x2013;<lpage>3921</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s00521-021-06651-x</pub-id>
</mixed-citation>
</ref>
<ref id="B31">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Saeed</surname> <given-names>A.</given-names></name>
<name><surname>Abdel-Aziz</surname> <given-names>A. A.</given-names></name>
<name><surname>Mossad</surname> <given-names>A.</given-names></name>
<name><surname>El-Roby</surname> <given-names>A.</given-names></name>
<name><surname>Ali</surname> <given-names>M. A.</given-names></name>
</person-group> (<year>2023</year>). 
<article-title>Smart detection of tomato leaf diseases using transfer learning-based convolutional neural networks</article-title>. <source>Agriculture</source> <volume>13</volume>, <fpage>132</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agriculture13010139</pub-id>
</mixed-citation>
</ref>
<ref id="B32">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Song</surname> <given-names>Y. F.</given-names></name>
<name><surname>Li</surname> <given-names>S. W.</given-names></name>
<name><surname>Qiao</surname> <given-names>J. J.</given-names></name>
<name><surname>Sun</surname> <given-names>H. Y.</given-names></name>
<name><surname>Li</surname> <given-names>D. W.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>Analysis on chlorophyll diagnosis of wheat leaves based on digital image processing and feature selection</article-title>. <source>Trait. Signal</source> <volume>39</volume>, <fpage>381</fpage>&#x2013;<lpage>387</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.18280/ts.390140</pub-id>
</mixed-citation>
</ref>
<ref id="B33">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Sun</surname> <given-names>H.</given-names></name>
<name><surname>Nicholaus</surname> <given-names>I. T.</given-names></name>
<name><surname>Yang</surname> <given-names>S.</given-names></name>
<name><surname>Kang</surname> <given-names>D.</given-names></name>
<name><surname>Kim</surname> <given-names>H.</given-names></name>
<name><surname>Kim</surname> <given-names>J.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>YOLO-FMDI: A lightweight YOLOv8 focusing on a multi-scale feature diffusion interaction neck for tomato pest and disease detection</article-title>. <source>Electronics</source> <volume>13</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/electronics13152974</pub-id>
</mixed-citation>
</ref>
<ref id="B34">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Tan</surname> <given-names>M.</given-names></name>
<name><surname>Pang</surname> <given-names>R.</given-names></name>
<name><surname>Le</surname> <given-names>Q. V.</given-names></name>
</person-group> (<year>2020</year>). 
<article-title>EfficientDet: Scalable and efficient object detection</article-title>. <source>Proc. IEEE/CVF Conf. Comput. Vis. Pattern Recognit. (CVPR)</source>, <volume>2020</volume>, <fpage>10781</fpage>&#x2013;<lpage>10790</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/CVPR42600.2020</pub-id>
</mixed-citation>
</ref>
<ref id="B35">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Vilela</surname> <given-names>E. F.</given-names></name>
<name><surname>Ferreira</surname> <given-names>W.</given-names></name>
<name><surname>Rocha</surname> <given-names>J.</given-names></name>
<name><surname>Santana</surname> <given-names>D.</given-names></name>
</person-group> (<year>2023</year>). 
<article-title>New spectral index and machine learning models for detecting coffee leaf miner infestation using Sentinel-2 multispectral imagery</article-title>. <source>Agriculture</source> <volume>13</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agriculture13020388</pub-id>
</mixed-citation>
</ref>
<ref id="B36">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>B.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>Identification of crop diseases and insect pests based on deep learning</article-title>. <source>Sci. Program</source> <volume>2022</volume>, <fpage>6638521</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1155/2022/1752685</pub-id>
</mixed-citation>
</ref>
<ref id="B37">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>G.</given-names></name>
<name><surname>Chen</surname> <given-names>Y. F.</given-names></name>
<name><surname>An</surname> <given-names>P.</given-names></name>
<name><surname>Hong</surname> <given-names>H.</given-names></name>
<name><surname>Hu</surname> <given-names>J.</given-names></name>
<name><surname>Huang</surname> <given-names>T.</given-names></name>
<etal/>
</person-group>. (<year>2023</year>). 
<article-title>UAV-YOLOv8: A small-object-detection model based on improved YOLOv8 for UAV aerial photography scenarios</article-title>. <source>Sensors</source> <volume>23</volume>, <fpage>6391</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/s23167190</pub-id>, PMID: <pub-id pub-id-type="pmid">37631727</pub-id>
</mixed-citation>
</ref>
<ref id="B38">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>X. W.</given-names></name>
<name><surname>Liu</surname> <given-names>J.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Vegetable disease detection using an improved YOLOv8 algorithm in the greenhouse plant environment</article-title>. <source>Sci. Rep.</source> <volume>14</volume>, <fpage>4898</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-024-55594-5</pub-id>, PMID: <pub-id pub-id-type="pmid">38383751</pub-id>
</mixed-citation>
</ref>
<ref id="B39">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>Z. Y.</given-names></name>
<name><surname>Yuan</surname> <given-names>G. W.</given-names></name>
<name><surname>Li</surname> <given-names>C.</given-names></name>
<name><surname>Zhao</surname> <given-names>J.</given-names></name>
<name><surname>Su</surname> <given-names>Y.</given-names></name>
<name><surname>Chen</surname> <given-names>C.</given-names></name>
<etal/>
</person-group>. (<year>2023</year>). 
<article-title>Foreign-object detection in high-voltage transmission line based on improved YOLOv8m</article-title>. <source>Appl. Sci.</source> <volume>13</volume>, <fpage>12793</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/app132312775</pub-id>
</mixed-citation>
</ref>
<ref id="B40">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wu</surname> <given-names>F.</given-names></name>
<name><surname>Liu</surname> <given-names>Z. K.</given-names></name>
<name><surname>Liu</surname> <given-names>Y. L.</given-names></name>
<name><surname>Li</surname> <given-names>L.</given-names></name>
<name><surname>Gao</surname> <given-names>L.</given-names></name>
<name><surname>Yu</surname> <given-names>C. Y.</given-names></name>
<etal/>
</person-group>. (<year>2025</year>). 
<article-title>Green pak choi is better in suitable environment but the purple ones more resist to drought and shading</article-title>. <source>BMC Plant Biol.</source> <volume>25</volume>, <fpage>35</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s12870-025-06354-8</pub-id>, PMID: <pub-id pub-id-type="pmid">40098092</pub-id>
</mixed-citation>
</ref>
<ref id="B41">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Xiao</surname> <given-names>B. J.</given-names></name>
<name><surname>Nguyen</surname> <given-names>M.</given-names></name>
<name><surname>Yan</surname> <given-names>W. Q.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Fruit ripeness identification using YOLOv8 model</article-title>. <source>Multimed. Tools Appl.</source> <volume>83</volume>, <fpage>28039</fpage>&#x2013;<lpage>28056</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11042-023-16570-9</pub-id>
</mixed-citation>
</ref>
<ref id="B42">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Xie</surname> <given-names>J.</given-names></name>
<name><surname>Pang</surname> <given-names>Y. W.</given-names></name>
<name><surname>Khan</surname> <given-names>M. H.</given-names></name>
<name><surname>Khan</surname> <given-names>R.</given-names></name>
<name><surname>Li</surname> <given-names>W.</given-names></name>
<name><surname>Han</surname> <given-names>S.</given-names></name>
<etal/>
</person-group>. (<year>2023</year>). 
<article-title>Latent feature pyramid network for object detection</article-title>. <source>IEEE Trans. Multimed.</source> <volume>25</volume>, <fpage>2153</fpage>&#x2013;<lpage>2163</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TMM.2022.3143707</pub-id>
</mixed-citation>
</ref>
<ref id="B43">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Xie</surname> <given-names>Z. J.</given-names></name>
<name><surname>Zhang</surname> <given-names>Y. Y.</given-names></name>
<name><surname>Cao</surname> <given-names>Z.</given-names></name>
<name><surname>Wei</surname> <given-names>Q.</given-names></name>
<name><surname>Hu</surname> <given-names>Z.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Hydroponic Chinese flowering cabbage detection and localization algorithm based on improved YOLOv5s</article-title>. <source>PloS One</source> <volume>19</volume>, <fpage>e0316661</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pone.0315465</pub-id>, PMID: <pub-id pub-id-type="pmid">39680539</pub-id>
</mixed-citation>
</ref>
<ref id="B44">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Xin</surname> <given-names>M. Y.</given-names></name>
<name><surname>Wang</surname> <given-names>Y.</given-names></name>
</person-group> (<year>2021</year>). 
<article-title>Image recognition of crop diseases and insect pests based on deep learning</article-title>. <source>Wirel. Commun. Mob. Comput.</source> <volume>2021</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.1155/2021/5511676</pub-id>
</mixed-citation>
</ref>
<ref id="B45">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Yang</surname> <given-names>G. L.</given-names></name>
<name><surname>Wang</surname> <given-names>J. X.</given-names></name>
<name><surname>Nie</surname> <given-names>Z.</given-names></name>
<name><surname>Yang</surname> <given-names>H.</given-names></name>
<name><surname>Zhang</surname> <given-names>S. H.</given-names></name>
</person-group> (<year>2023</year>). 
<article-title>A lightweight YOLOv8 tomato detection algorithm combining feature enhancement and attention</article-title>. <source>Agronomy</source> <volume>13</volume>, <fpage>1707</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agronomy13071824</pub-id>
</mixed-citation>
</ref>
<ref id="B46">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Yao</surname> <given-names>J. B.</given-names></name>
<name><surname>Liu</surname> <given-names>J. H.</given-names></name>
<name><surname>Liu</surname> <given-names>Z.</given-names></name>
<name><surname>Li</surname> <given-names>Y.</given-names></name>
<name><surname>Wang</surname> <given-names>X.</given-names></name>
<name><surname>Du</surname> <given-names>Y.</given-names></name>
<etal/>
</person-group>. (<year>2023</year>). 
<article-title>Identification of winter wheat pests and diseases based on improved convolutional neural network</article-title>. <source>Open Life Sci.</source> <volume>18</volume>, <fpage>20220165</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1515/biol-2022-0632</pub-id>, PMID: <pub-id pub-id-type="pmid">37426620</pub-id>
</mixed-citation>
</ref>
<ref id="B47">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Yin</surname> <given-names>X. P.</given-names></name>
<name><surname>Zhao</surname> <given-names>Z. K.</given-names></name>
<name><surname>Hu</surname> <given-names>J.</given-names></name>
<name><surname>Ding</surname> <given-names>C.</given-names></name>
<name><surname>Fu</surname> <given-names>W.</given-names></name>
<name><surname>Li</surname> <given-names>Z.</given-names></name>
<etal/>
</person-group>. (<year>2025</year>). 
<article-title>MAS-YOLO: A lightweight detection algorithm for PCB defect detection based on improved YOLOv12</article-title>. <source>Appl. Sci.</source> <volume>15</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/app15116238</pub-id>
</mixed-citation>
</ref>
<ref id="B48">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Yue</surname> <given-names>G. B.</given-names></name>
<name><surname>Liu</surname> <given-names>Y. Q.</given-names></name>
<name><surname>Liu</surname> <given-names>D. D.</given-names></name>
<name><surname>Li</surname> <given-names>X. D.</given-names></name>
<name><surname>Cui</surname> <given-names>Y. C.</given-names></name>
<name><surname>Lu</surname> <given-names>J.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>GLU-YOLOv8: An improved pest and disease target detection algorithm based on YOLOv8</article-title>. <source>Forests</source> <volume>15</volume>, <fpage>1528</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/f15091486</pub-id>
</mixed-citation>
</ref>
<ref id="B49">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhai</surname> <given-names>S. P.</given-names></name>
<name><surname>Shang</surname> <given-names>D. R.</given-names></name>
<name><surname>Wei</surname> <given-names>Y. M.</given-names></name>
<name><surname>Duan</surname> <given-names>L. Z.</given-names></name>
<name><surname>Wang</surname> <given-names>J.</given-names></name>
</person-group> (<year>2020</year>). 
<article-title>DF-SSD: An improved SSD object detection algorithm based on DenseNet and feature fusion</article-title>. <source>IEEE Access</source> <volume>8</volume>, <fpage>24344</fpage>&#x2013;<lpage>24357</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ACCESS.2020.2971026</pub-id>
</mixed-citation>
</ref>
<ref id="B50">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhang</surname> <given-names>J.</given-names></name>
<name><surname>Zhang</surname> <given-names>D. F.</given-names></name>
<name><surname>Li</surname> <given-names>W.</given-names></name>
<name><surname>Li</surname> <given-names>H. J.</given-names></name>
<name><surname>Yang</surname> <given-names>Y. H.</given-names></name>
<name><surname>Li</surname> <given-names>H. Y.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>DSCONV-GAN: A UAV-based model for Verticillium wilt disease detection in Chinese cabbage in complex growing environments</article-title>. <source>Plant Methods</source> <volume>20</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s13007-024-01303-2</pub-id>, PMID: <pub-id pub-id-type="pmid">39696451</pub-id>
</mixed-citation>
</ref>
<ref id="B51">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhao</surname> <given-names>Q. H.</given-names></name>
<name><surname>Liu</surname> <given-names>Y. Q.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Design of apple recognition model based on improved deep learning object detection framework Faster-RCNN</article-title>. <source>Adv. Contin. Discret. Models</source> <volume>2024</volume>, <fpage>7</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s13662-024-03835-2</pub-id>
</mixed-citation>
</ref>
<ref id="B52">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zheng</surname> <given-names>J. J.</given-names></name>
<name><surname>Lan</surname> <given-names>B.</given-names></name>
<name><surname>Yu</surname> <given-names>L.</given-names></name>
<name><surname>Zhang</surname> <given-names>G.</given-names></name>
<name><surname>Cao</surname> <given-names>H.</given-names></name>
<name><surname>Zhang</surname> <given-names>S.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>Method for pest identification of pakchoi based on the improved YOLOv5s model</article-title>. <source>Int. J. Agric. Biol. Eng.</source> <volume>40</volume>, <fpage>124</fpage>&#x2013;<lpage>133</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.25165/j.ijabe.20241702.8317</pub-id>
</mixed-citation>
</ref>
<ref id="B53">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zheng</surname> <given-names>Z.</given-names></name>
<name><surname>Wang</surname> <given-names>P.</given-names></name>
<name><surname>Liu</surname> <given-names>D.</given-names></name>
<name><surname>Ren</surname> <given-names>W.</given-names></name>
<name><surname>Ye</surname> <given-names>Q.</given-names></name>
<name><surname>Hu</surname> <given-names>Q.</given-names></name>
<etal/>
</person-group>. (<year>2020</year>). 
<article-title>Distance-IoU loss: Faster and better learning for bounding box regression</article-title>. <source>Proc. AAAI Conf. Artif. Intell.</source> <volume>34</volume>, <fpage>12993</fpage>&#x2013;<lpage>13000</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1609/aaai.v34i07.6999</pub-id>
</mixed-citation>
</ref>
<ref id="B54">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhou</surname> <given-names>K. Q.</given-names></name>
<name><surname>Jiang</surname> <given-names>S. H.</given-names></name>
</person-group> (<year>2025</year>). 
<article-title>Forest fire detection algorithm based on improved YOLOv11n</article-title>. <source>Sensors</source> <volume>25</volume>, <fpage>275</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/s25102989</pub-id>, PMID: <pub-id pub-id-type="pmid">40431784</pub-id>
</mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn id="n1" fn-type="custom" custom-type="edited-by">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1568126">Xing Yang</ext-link>, Anhui Science and Technology University, China</p></fn>
<fn id="n2" fn-type="custom" custom-type="reviewed-by">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2840407">Shunhao Qing</ext-link>, Northwest A&amp;F University, China</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3276122">Wu Yao</ext-link>, Anhui Science and Technology University, China</p></fn>
</fn-group>
</back>
</article>