<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Plant Sci.</journal-id>
<journal-title>Frontiers in Plant Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Plant Sci.</abbrev-journal-title>
<issn pub-type="epub">1664-462X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpls.2024.1398277</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Plant Science</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>LCGSC-YOLO: a lightweight apple leaf diseases detection method based on LCNet and GSConv module under YOLO framework</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Wang</surname>
<given-names>Jianlong</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2073583"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Qin</surname>
<given-names>Congcong</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Hou</surname>
<given-names>Beibei</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Yuan</surname>
<given-names>Yuan</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhang</surname>
<given-names>Yake</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Feng</surname>
<given-names>Wenfeng</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2315654"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>School of Computer Science and Technology, Henan Polytechnic University</institution>, <addr-line>Jiaozuo</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>School of Education, Henan Normal University</institution>, <addr-line>Xinxiang</addr-line>, <country>China</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>School of Computer and Information Engineering, Henan Normal University</institution>, <addr-line>Xinxiang</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Lei Shu, Nanjing Agricultural University, China</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Jinling Zhao, Anhui University, China</p>
<p>Bin Liu, Northwest A&amp;F University, China</p>
<p>Wei Lu, Nanjing Agricultural University, China</p>
<p>Jale Bekta&#x15f;, Mersin University, T&#xfc;rkiye</p>
<p>Fauzan Masykur, Muhammadiyah University of Ponorogo, Indonesia</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Jianlong Wang, <email xlink:href="mailto:wangjianlong24@hpu.edu.cn">wangjianlong24@hpu.edu.cn</email>
</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>31</day>
<month>10</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>15</volume>
<elocation-id>1398277</elocation-id>
<history>
<date date-type="received">
<day>09</day>
<month>03</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>09</day>
<month>10</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2024 Wang, Qin, Hou, Yuan, Zhang and Feng</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Wang, Qin, Hou, Yuan, Zhang and Feng</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>In response to the current mainstream deep learning detection methods with a large number of learned parameters and the complexity of apple leaf disease scenarios, the paper proposes a lightweight method and names it LCGSC-YOLO. This method is based on the LCNet(A Lightweight CPU Convolutional Neural Network) and GSConv(Group Shuffle Convolution) module modified YOLO(You Only Look Once) framework.</p>
</sec>
<sec>
<title>Methods</title>
<p>Firstly, the lightweight LCNet is utilized to reconstruct the backbone network, with the purpose of reducing the number of parameters and computations of the model. Secondly, the GSConv module and the VOVGSCSP (Slim-neck by GSConv) module are introduced in the neck network, which makes it possible to minimize the number of model parameters and computations while guaranteeing the fusion capability among the different feature layers. Finally, coordinate attention is embedded in the tail of the backbone and after each VOVGSCSP module to improve the problem of detection accuracy degradation issue caused by model lightweighting.</p>
</sec>
<sec>
<title>Results</title>
<p>The experimental results show the LCGSC-YOLO can achieve an excellent detection performance with mean average precision of 95.5% and detection speed of 53 frames per second (FPS) on the mixed datasets of Plant Pathology 2021 (FGVC8) and AppleLeaf9.</p>
</sec>
<sec>
<title>Discussion</title>
<p>The number of parameters and Floating Point Operations (FLOPs) of the LCGSC-YOLO are much less thanother related comparative experimental algorithms.</p>
</sec>
</abstract>
<kwd-group>
<kwd>apple leaf disease detection</kwd>
<kwd>coordinate attention</kwd>
<kwd>lightweight network</kwd>
<kwd>depth-wise separable convolution</kwd>
<kwd>YOLO</kwd>
</kwd-group>
<counts>
<fig-count count="9"/>
<table-count count="6"/>
<equation-count count="13"/>
<ref-count count="46"/>
<page-count count="17"/>
<word-count count="8644"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-in-acceptance</meta-name>
<meta-value>Sustainable and Intelligent Phytoprotection</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>As one of the favorite fruits, apples are highly nutritious and widely cultivated around the world (<xref ref-type="bibr" rid="B17">Hyson, 2011</xref>). China has been the global leader in apple production (<xref ref-type="bibr" rid="B14">Hu et&#xa0;al., 2022</xref>). The apple cultivation industry has performed a vital task in promoting the agricultural economy of China. However, various diseases during the growth period of apples make the containment and management of apple leaf diseases extremely challenging (<xref ref-type="bibr" rid="B9">Dhaka et&#xa0;al., 2021</xref>). The prevention of apple leaf diseases is crucial for apple growth. Farmers need to minimize the incidence of leaf diseases through effective measures to ensure quality of apple production (<xref ref-type="bibr" rid="B38">Roy and Bhaduri, 2021</xref>). Therefore, timely detection of apple leaf diseases is essential for disease prevention and control. It not only ensures the quality of the fruits but also contributes to the improvement of agricultural yield.</p>
<p>Traditional apple leaf disease detection methods primarily depended on eye observation to identify disease categories. But, the approach has the problem of high labor intensity. The method of manual visual inspection no longer meets the needs of modern agriculture for efficiency and precision. Thus, it is essential to introduce more advanced technologies and methods to achieve greater efficiency in disease detection (<xref ref-type="bibr" rid="B2">Arsenovic et&#xa0;al., 2019</xref>). With the advent of the machine learning-based technology, it has been employed in various aspects of agriculture (<xref ref-type="bibr" rid="B41">Tian et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B3">Attri et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B10">Elbasi et&#xa0;al., 2023</xref>). For example, Rastogi et&#xa0;al. classified leaves based on artificial neural networks and then graded them according to the number of diseases on the leaves (<xref ref-type="bibr" rid="B34">Rastogi et&#xa0;al., 2015</xref>). Ahmed et&#xa0;al. used a decision tree approach to detect the three most common rice diseases, which are black sigatoka, bacterial leaf blight and brown spot (<xref ref-type="bibr" rid="B1">Ahmed et&#xa0;al., 2019</xref>). Harakannanavar et&#xa0;al. combined K-Nearest Neighbor and image processing techniques for detecting leaf diseases in tomato plants (<xref ref-type="bibr" rid="B11">Harakannanavar et&#xa0;al., 2022</xref>). However, these machine learning-based methods are usually made less practical in embedded devices given the large amount of computations in the data preprocessing and feature extraction phases (<xref ref-type="bibr" rid="B39">Sujatha et&#xa0;al., 2021</xref>).</p>
<p>In recent years, deep learning techniques have made great progress in leaf disease detection (<xref ref-type="bibr" rid="B31">Ngugi et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B24">Khan et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B5">Bhuiyan et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B23">Kaur et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B26">Li et&#xa0;al., 2022b</xref>). Specifically, Jiang et&#xa0;al. utilized convolutional neural network (CNN) to obtain features from rice leaf diseases. Then, support vector machine (SVM) is employed to perform classification and prediction of specific diseases (<xref ref-type="bibr" rid="B20">Jiang et&#xa0;al., 2020</xref>). Zeng et&#xa0;al. addressed the challenges posed by complex environments and relatively small disease areas in crop disease images using a selfattentive convolutional neural network (SACNN) (<xref ref-type="bibr" rid="B44">Zeng and Li, 2020</xref>). With the emergence of target detection models, such as Faster-RCNN (<xref ref-type="bibr" rid="B37">Ren et&#xa0;al., 2015</xref>) and YOLO series (<xref ref-type="bibr" rid="B35">Redmon et&#xa0;al., 2016</xref>; <xref ref-type="bibr" rid="B36">Redmon and Farhadi, 2017</xref>), they can accurately detect the category and location of the target, which attracts more and more researchers to employ it in agriculture for crop spots on leaves for accurate classification and localization. However, the majority of disease detection models have a large number of parameters that are not well suited for deployment on mobile devices, which makes them difficult to meet the practical requirements of agricultural applications (<xref ref-type="bibr" rid="B30">Maddikunta et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B21">Johannes et&#xa0;al., 2017</xref>). In addition, Jiang et&#xa0;al. designed the INAR-SSD module for detecting apple leaf diseases, and the detection capability of the SSD network on various leaf diseases was enhanced by designing the inception module (<xref ref-type="bibr" rid="B19">Jiang et&#xa0;al., 2019</xref>). Due to the stacking of a large number of inception modules, INAR-SSD is not suitable for mobile devices. Therefore, in the last three years, researchers were largely focused on reducing the complexity of models to enhance the practicality. For instance, Bi et&#xa0;al. adopted a lightweight method for apple leaf disease detection by employing the MobileNet model (<xref ref-type="bibr" rid="B6">Bi et&#xa0;al., 2022</xref>). However, the presence of numerous convolutions and bottleneck modules still causes a substantial number of parameters. Barman et&#xa0;al. introduced a smartphone-based model for classifying citrus leaf diseases (<xref ref-type="bibr" rid="B4">Barman et&#xa0;al., 2020</xref>). Although the model was deployed on mobile devices, its application was limited to indoor experimental data, restricting its use in the practical detection of leaf diseases in complex outdoor environments. Hu et&#xa0;al. employed a lightweight method based on knowledge distillation to detect maize leaf diseases, which decreased the complexity of the model. But, it is difficult to guarantee the applicability of this method in real-world environments, which contain changes in weather and light (<xref ref-type="bibr" rid="B15">Hu et&#xa0;al., 2023</xref>). Xu et&#xa0;al. devoted to reducing the number of parameters and computation through effective model design in order to improve the efficiency of apple leaf disease detection. The study used three different categories of diseases and conducted experiments in dense scenarios as well as leaf shade scenarios. These research efforts provide an important foundation for disease detection. But only relying on these three disease categories and limited scenario setups may not be sufficient to deal with more complex real-world application. Therefore, expanding the disease categories and adding more complex scenario types can help to improve the generalization ability of the model so that it can better adapt to the diverse challenges of practical applications (<xref ref-type="bibr" rid="B43">Xu and Wang, 2023</xref>).</p>
<p>In modern agricultural production, the use of mobile devices to detect apple leaf diseases has become an important trend. The application of lightweight models has significantly improved the efficiency and feasibility of this process. By running these optimized models on mobile devices, farmers and agricultural experts are able to quickly and accurately identify diseases on apple leaves and take timely interventions accordingly. This real-time detection and rapid response capability is critical for crop health management, helping to not only increase yields but also improve the overall quality of produce. In addition, the application of lightweight models significantly reduces the reliance on expensive hardware equipment, further lowering the cost of detection. The popularity of mobile devices, coupled with the efficiency of lightweight models, has made disease detection more accessible and economical, providing a convenient solution for agricultural production. Through these improvements, modern agriculture is better able to achieve precise management and intelligent operations, increasing overall production efficiency and product quality.</p>
<p>To sum up, scholars have introduced numerous effective methods in the field of object detection, leading to significant advancements in the detection of plant leaf diseases (<xref ref-type="bibr" rid="B18">Jackulin and Murugavalli, 2022</xref>; <xref ref-type="bibr" rid="B33">Orchi et&#xa0;al., 2021</xref>). In order to solve the problems of the current apple leaf disease detection, such as large number of parameters and calculations, lengthy inference time, and difficulty in real-time monitoring, the paper proposes a lightweight network model LCGSC-YOLO that takes both detection speed and accuracy into account. The main contributions are as follows:</p>
<list list-type="bullet">
<list-item>
<p>The LCNet is utilized to reconstruct the backbone network, which mainly consists of lightweight depth-wise separable convolutions. These convolutions effectively reduce the number of model parameters and computations.</p>
</list-item>
<list-item>
<p>The GSConv and VOVGSCSP modules are used to replace the original Conv and C3 modules in the neck network, which reduces the number of model parameters and computations while guaranteeing the fusion capability among different feature layers.</p>
</list-item>
<list-item>
<p>The combination of coordinate attention and LCNet embedded in the tail of backbone makes the network achieves better feature extraction performance. Moreover, the coordinate attention is embedded behind each VOVGSCSP module to enhance the feature fusion capability of the network, which eventually ameliorates the problem of accuracy degradation caused by model lightweighting.</p>
</list-item>
</list>
<p>The later sections are organized as follows: the second part describes and shows the detailed contents of the dataset and elaborates on the methodology proposed in the paper. The third part analyzes and discusses the experimental procedures and results of this paper. The fourth section draws the conclusion of this paper.</p>
</sec>
<sec id="s2" sec-type="materials|methods">
<label>2</label>
<title>Materials and methods</title>
<sec id="s2_1">
<label>2.1</label>
<title>Datasets</title>
<p>The apple leaf disease data utilized in the paper have been selected from the Phytopathology 2021 (FGVC8) dataset and the AppleLeaf9 dataset. The images in the datasets are all derived from outdoor scenes. Seven common diseases have been chosen for the study. Frog_eye_leaf_spot, Powdery_mildew, Rust, and Scab were selected from FGVC8. Alternaria leaf spot, Grey spot, and Mosaic were selected from AppleLeaf9. Moreover, under natural conditions Frog_eye_leaf_spots are mixed with Rust and Scab to form two categories of disease occurrence scenarios, respectively. In total, there are seven disease names and nine disease categories. The specific number of images in each category is shown in <xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>. The representative images of different disease categories are shown in <xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref>.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>The number of images of different disease categories in the dataset.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" rowspan="2" align="center">Categories of leaf disease</th>
<th valign="top" colspan="3" align="center">Number</th>
</tr>
<tr>
<th valign="top" align="center">Original Images</th>
<th valign="top" align="center">Enhanced Images</th>
<th valign="top" align="center">Total</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="center">Frog_eye_leaf_spot</td>
<td valign="top" align="center">367</td>
<td valign="top" align="center">2569</td>
<td valign="top" align="center">2936</td>
</tr>
<tr>
<td valign="top" align="center">Powdery_mildew</td>
<td valign="top" align="center">400</td>
<td valign="top" align="center">2800</td>
<td valign="top" align="center">3200</td>
</tr>
<tr>
<td valign="top" align="center">Rust</td>
<td valign="top" align="center">313</td>
<td valign="top" align="center">2191</td>
<td valign="top" align="center">2504</td>
</tr>
<tr>
<td valign="top" align="center">Scab</td>
<td valign="top" align="center">460</td>
<td valign="top" align="center">3220</td>
<td valign="top" align="center">3680</td>
</tr>
<tr>
<td valign="top" align="center">Alternaria_leaf_spot</td>
<td valign="top" align="center">253</td>
<td valign="top" align="center">1771</td>
<td valign="top" align="center">2024</td>
</tr>
<tr>
<td valign="top" align="center">Grey_spot</td>
<td valign="top" align="center">163</td>
<td valign="top" align="center">1141</td>
<td valign="top" align="center">1304</td>
</tr>
<tr>
<td valign="top" align="center">Mosaic</td>
<td valign="top" align="center">145</td>
<td valign="top" align="center">1015</td>
<td valign="top" align="center">1160</td>
</tr>
<tr>
<td valign="top" align="center">Rust+Frog_eye_leaf_spot</td>
<td valign="top" align="center">107</td>
<td valign="top" align="center">749</td>
<td valign="top" align="center">856</td>
</tr>
<tr>
<td valign="top" align="center">Scab+Frog_eye_leaf_spot</td>
<td valign="top" align="center">82</td>
<td valign="top" align="center">574</td>
<td valign="top" align="center">656</td>
</tr>
<tr>
<td valign="top" align="center">Total</td>
<td valign="top" align="center">2290</td>
<td valign="top" align="center">16030</td>
<td valign="top" align="center">18320</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>The representative images of different disease categories. <bold>(A)</bold> Scab. <bold>(B)</bold> Rust. <bold>(C)</bold> Powdery_mildew. <bold>(D)</bold> leaf spot. <bold>(E)</bold> Altermaria leaf spot. <bold>(F)</bold> Grey spot. <bold>(G)</bold> Mosaic. <bold>(H)</bold> Rust+ Frog_eye_leaf_spot. <bold>(I)</bold> Scab+ Frog_eye_leaf_spot.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1398277-g001.tif"/>
</fig>
<p>All collected images were labeled using the LabelImg tool and saved in an XML file. In addition, the images in the datasets are enhanced with changes such as rotation, brightness, contrast, and addition of noise. Finally, there are 18320 images in the datasets, and the specific number of images in each category is shown in <xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>. During the datasets labeling process, this study has assumed all the browning areas on the leaves to be spots caused by pests and diseases. However, this study recognizes that browning may also be caused by other reasons such as environmental factors or human intervention, and this simplifying assumption may limit the comprehensiveness of the labeling and the generalization ability of the model. In order to improve the accuracy and rigor of the study, this study plans to introduce a detailed comparative analysis of types of browning not caused by pests and diseases in future studies. This will help understand the multiple causes of browning and make appropriate adjustments in datasets labeling to ensure that the model can accurately identify and distinguish among various browning types and improve overall detection. The links to the datasets used in this study are provided below: <ext-link ext-link-type="uri" xlink:href="https://drive.google.com/drive/folders/1MRfK5eOm5-6KZTngPzpzjp9gx1NyEvZY?usp=sharing">https://drive.google.com/drive/folders/1MRfK5eOm5-6KZTngPzpzjp9gx1NyEvZY?usp=sharing</ext-link>.</p>
<p>As illustrated in <xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref>, apple leaf disease detection faces several challenges. Firstly, different categories of leaf diseases have different shapes and sizes, which makes feature extraction more difficult. Secondly, most leaf diseases are small and densely distributed, which increases the difficulty of the localization process. Finally, under outdoor conditions, natural light and raindrops may interfere with leaf disease recognition.</p>
<p>A series of different scenes were selected as part of the experimental datasets for this study. The dark scene simulated the detection of leaf diseases in a low light or night environment. The rainy scene symbolized the effects of rain on the leaf surface, such as raindrop shading and water droplet retention. The lighting scene emphasizes the situation of leaves under direct sunlight or bright light. The dense scene contains a large number of diseases, which is closer to the leaf disease situation in real farms and provides a more rigorous testing environment for evaluating the performance of the detection algorithms. The multiple leaves scene and the two-spots scene further increase the complexity of the scenarios by taking into account multiple leaves and the interactions between two spots on the leaves. By conducting experiments in these different scenarios, the applicability of the proposed leaf disease detection method in practical applications can be comprehensively evaluated and more reliable technical support can be provided for agricultural production.</p>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Design for LCGSC-YOLO</title>
<p>With the aim of achieving a lightweight model on apple leaf disease detection to make it more convenient to be applied to embedded devices, the paper proposes a lightweight method and names it LCGSC-YOLO. <xref ref-type="fig" rid="f2">
<bold>Figures&#xa0;2A, B</bold>
</xref> show the detailed framework of YOLOv5 and the proposed LCGSC-YOLO in the paper, respectively.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>The framework of the two models. <bold>(A)</bold> The YOLO model. <bold>(B)</bold> The proposed LCGSC-YOLO model.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1398277-g002.tif"/>
</fig>
<p>YOLO is chosen as the main framework in this study mainly because of its efficiency and real-time performance. YOLO is able to predict both bounding box and class probabilities of targets in a single network, which allows it to perform well when dealing with complex backgrounds and dense targets while maintaining low computational requirements and fast inference speed. Some other target detection framework, although advantageous in different aspects, such as SSD performs better in terms of speed but may not be as good as YOLO when dealing with small targets and complex backgrounds. Faster R-CNN, although it performs well in terms of detection accuracy, its two-phase structure results in slower inference and higher computational requirements, which limits its application in real-time detection. Taking these factors into consideration, YOLO is chosen in this study to meet our requirements for real-time performance and computational efficiency, while effectively handling complex backgrounds and high-density targets. The YOLOv5 version is chosen because it has demonstrated excellent stability and maturity in the field of target detection, providing a solid foundation for innovation in this study.</p>
<p>YOLOv5 is made up of four parts: the input layer, the backbone network, the neck network and the prediction head. As presented in <xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2A</bold>
</xref>, it can be seen that the backbone network of YOLOv5 stacks numerous Conv and C3 modules. The Spatial Pyramid Pooling-Fast (SPPF) module is utilized to capture multi-scale target information and then connects to the neck network (<xref ref-type="bibr" rid="B45">Zhang et&#xa0;al., 2023</xref>). In the neck network, besides the Conv and C3 modules, the Concat module is employed to aggregate the feature maps of different layers, thus reducing feature map information loss. The detection head module mainly performs multi-scale target detection of feature maps (<xref ref-type="bibr" rid="B40">Sun et&#xa0;al., 2022</xref>).</p>
<p>Due to the large number of Conv and C3 modules in the original YOLOv5 framework, it is difficult to embed it for utilizing in mobile devices (<xref ref-type="bibr" rid="B43">Xu and Wang, 2023</xref>). Therefore, an efficient LCGSC-YOLO for apple leaf disease detection is proposed in the work. The framework of LCGSC-YOLO is shown as <xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2B</bold>
</xref>. Compared to YOLOv5, the main innovations of LCGSC-YOLO are described below: 1) LCNet is used to reconstruct the backbone network and is categorized into LCNet-3 and LCNet-5 according to the convolutional kernel size. LCNet greatly reduces the number of parameters and computations of the model. 2) In the neck network, we are utilizing the GSConv module and the VOVGSCSP module to replace the Conv and C3 modules from the YOLO framework. 3) The coordinate attention(CA) is inserted at the tail of the backbone and after each VOVGSCSP module, which alleviates the problem of detection accuracy reduction caused by the lightweighting of the model.</p>
<p>In the next section, the effectiveness of each module will be analyzed step by step. Firstly, this study demonstrates that the lightweight design of LCNet effectively reduces the computational complexity through theory and formulas. Similarly, this study analyzes that GSConv optimizes the convolution operation by mixing convolution kernels, which further reduces the computation. Secondly, the introduction of CA attention mechanism makes the accuracy of the model improved. Finally, the experimental results show that the combination of LCNet and GSConv makes a significant reduction in the number of parameters and the number of Floating Point Operations (FLOPs) of the model, while the CA attention mechanism further enhances the performance and accuracy of the model.</p>
<sec id="s2_2_1">
<label>2.2.1</label>
<title>Design of the LCNet module</title>
<p>To be more conveniently applied to embedded devices, the paper compresses the model parameters as much as possible with the guarantee of relatively high detection accuracy. The backbone of LCGSC-YOLO is structured by utilizing LCNet (<xref ref-type="bibr" rid="B8">Cui et&#xa0;al., 2021</xref>). The LCNet is utilized to decrease the number of parameters and computations in the feature extraction process. As shown in <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3</bold>
</xref>, depending on the size of the convolutional kernel, the LCNet is divided into two types of modules: LCNet-3 and LCNet-5. The LCNet-3 has only 3 &#xd7; 3 depth-wise (DW) convolution module and point-wise (PW) convolution module to extract features, while the LCNet-5 utilizes 5 &#xd7; 5 convolution and introduces a squeeze-and-excitation (SE) attention module.</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>The structure of the LCNet module. <bold>(A)</bold> Depth-wise Convolution. <bold>(B)</bold> Pointwise Convolution. <bold>(C)</bold> SE Block.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1398277-g003.tif"/>
</fig>
<p>The 5 &#xd7; 5 convolution used in the LCNet-5 module captures a larger range of features which helps to recognize more complex patterns in an image, especially when local features are not sufficient to describe the overall structure. Although a single 5 &#xd7; 5 convolutional kernel has a large number of parameters, the total number of parameters may be relatively small compared to the use of multiple 3 &#xd7; 3 convolutional kernels in the same target region. The introduction of the SE module enables the fusion of information between different channels and improves the accuracy of model detection.</p>
<p>Within each LCNet-3 and LCNet-5 module, the first layer of the network performs a down-sampling operation on the feature maps to reduce the size of the feature maps to one-half of their original size. In addition, the number of input feature map channels is expanded to twice the original number. The subsequent layers extract only the holdout features without modifying the width and height of the feature map as well as the number of channels. The working principle of LCNet-3 and LCNet-5 is shown in <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3</bold>
</xref>.</p>
<p>Depth-wise separable convolution is divided into two components: the first is the depth-wise convolution and the other is the point-wise convolution (<xref ref-type="bibr" rid="B7">Chollet, 2017</xref>). Depth-wise convolution is a 2D convolution of each channel from the input image to reduce the number of parameters. Point-wise convolution uses 1 &#xd7; 1 convolution for all channels based on depth-wise convolution, which greatly reduces the amount of computation. The schematic diagrams of depth-wise convolution and point-wise convolution are shown in <xref ref-type="fig" rid="f3">
<bold>Figures&#xa0;3A, B</bold>
</xref>, respectively.</p>
<p>In the following, computing the number of floating point operations (FLOPs) helps to illustrate and compare the complexity of standard convolution and depth-wise separable convolution.</p>
<p>Assume that the convolution kernel size is <inline-formula>
<mml:math display="inline" id="im1">
<mml:mrow>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the size of the input feature map, and the size of the output feature map is <inline-formula>
<mml:math display="inline" id="im2">
<mml:mrow>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>The number of standard convolution calculations is indicated as <xref ref-type="disp-formula" rid="eq1">Equation 1</xref>:</p>
<disp-formula id="eq1">
<label>(1)</label>
<mml:math display="block" id="M1">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>L</mml:mi>
<mml:mi>O</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>S</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</disp-formula>
<p>The number of depth-wise convolution calculations can be written as <xref ref-type="disp-formula" rid="eq2">Equation 2</xref>:</p>
<disp-formula id="eq2">
<label>(2)</label>
<mml:math display="block" id="M2">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>L</mml:mi>
<mml:mi>O</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>D</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</disp-formula>
<p>
<xref ref-type="disp-formula" rid="eq3">Equation 3</xref> demonstrates the number of pointwise convolution calculations:</p>
<disp-formula id="eq3">
<label>(3)</label>
<mml:math display="block" id="M3">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>L</mml:mi>
<mml:mi>O</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>P</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</disp-formula>
<p>The number of depth-wise separable convolution calculations is displayed by <xref ref-type="disp-formula" rid="eq4">Equation 4</xref>:</p>
<disp-formula id="eq4">
<label>(4)</label>
<mml:math display="block" id="M4">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>L</mml:mi>
<mml:mi>O</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</disp-formula>
<p>The ratio of the number of depth-wise separable convolution to the number of standard convolution calculations is:</p>
<disp-formula id="eq5">
<label>(5)</label>
<mml:math display="block" id="M5">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>L</mml:mi>
<mml:mi>O</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mo>&#xa0;</mml:mo>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>L</mml:mi>
<mml:mi>O</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mo>&#xa0;</mml:mo>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>S</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>+</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<p>According to the <xref ref-type="disp-formula" rid="eq5">Equation 5</xref>, the depth-wise separable convolutions can achieve a reduction in computations depending on the number of output channels and the size of the convolution kernel. Thus, depth-wise separable convolutions can greatly decrease the computations when the number of network layers is continuously increasing.</p>
<p>SE attention improves the performance of neural networks (<xref ref-type="bibr" rid="B16">Hu et&#xa0;al., 2018</xref>). As shown in <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3C</bold>
</xref>, the SE attention involves two primary processes: squeeze and excitation. In the squeeze phase, global average pooling is first performed on the feature maps of each channel. It generates a single weight for each channel and the purpose of this step is to integrate the global information for each channel. In the excitation step, two fully connected layers are introduced. The output of these layers is passed through an activation function that produces a weight. Then, the weight is applied to the original feature map, which effectively assigns different importance to each channel.</p>
<p>Therefore, the LCNet-3 and LCNet-5 modules reconstruct the proposed lightweight backbone network of LCGSC-YOLO for fast feature extraction. Compared with the original YOLO framework, LCNet as the backbone network can dramatically decrease the number of parameters and the computations.</p>
</sec>
<sec id="s2_2_2">
<label>2.2.2</label>
<title>Design of GSConv and VOVGSCSP modules</title>
<p>For a further reduction of the parameters and computations of the model, the GSConv module and the VOVGSCSP module are used to replace the original Conv and C3 modules, which are embedded in the neck network.</p>
<p>GSConv is a convolution strategy in depth-wise separable convolution (<xref ref-type="bibr" rid="B27">Li et&#xa0;al., 2022a</xref>). It has less parameters and cheaper computation cost than standard convolution. (convolution + BN + activation function). The implementation flow of GSConv is shown in <xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4A</bold>
</xref>. In the basic module of GSConv, the number of input channels is <inline-formula>
<mml:math display="inline" id="im3">
<mml:mrow>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and the number of output channels is <inline-formula>
<mml:math display="inline" id="im4">
<mml:mrow>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. Firstly, the input is processed by standard convolution to change the number of channels to <inline-formula>
<mml:math display="inline" id="im5">
<mml:mrow>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo stretchy="false">/</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, which generates a hidden feature map with fewer channels and reduces the number of parameters. Then, the hidden layer is processed using DW convolution and the number of channels remains <inline-formula>
<mml:math display="inline" id="im6">
<mml:mrow>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo stretchy="false">/</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>. Next, the result after the first standard convolution is connected with the result after DW convolution by Concat operation. Finally, the shuffle operation is introduced to achieve fast fusion of information among different channels, which enhances the extracted semantic information. The shuffle operation is shown in <xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4B</bold>
</xref>.</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>The structure of GSConv and VOVGSCSP modules. <bold>(A)</bold> GSConv. <bold>(B)</bold> The channel shuffle operation. <bold>(C)</bold> GS bottleneck. <bold>(D)</bold> VOVGSCSP. <bold>(E)</bold> Modules relationships.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1398277-g004.tif"/>
</fig>
<p>Here, we also provide a brief analysis of FLOPs. Suppose that the output feature map width and height are denoted as <italic>W</italic> and <italic>H</italic>, respectively. <inline-formula>
<mml:math display="inline" id="im7">
<mml:mrow>
<mml:msub>
<mml:mi>K</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> means standard convolutional kernel size and <inline-formula>
<mml:math display="inline" id="im8">
<mml:mrow>
<mml:msub>
<mml:mi>K</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> represents DW convolutional kernel size. <inline-formula>
<mml:math display="inline" id="im9">
<mml:mrow>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im10">
<mml:mrow>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> indicates the number of channels for feature map input and output, respectively.</p>
<p>
<xref ref-type="disp-formula" rid="eq6">Equation 6</xref> represents the number of standard convolution calculations:</p>
<disp-formula id="eq6">
<label>(6)</label>
<mml:math display="block" id="M6">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>L</mml:mi>
<mml:mi>O</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mi>W</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>H</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>K</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>K</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</disp-formula>
<p>The number of GSConv calculations is demonstrated in <xref ref-type="disp-formula" rid="eq7">Equation 7</xref>:</p>
<disp-formula id="eq7">
<label>(7)</label>
<mml:math display="block" id="M7">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>L</mml:mi>
<mml:mi>O</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mi>W</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>H</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>K</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>K</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo stretchy="false">/</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Ratio of GSConv to standard convolution calculations:</p>
<disp-formula id="eq8">
<label>(8)</label>
<mml:math display="block" id="M8">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>L</mml:mi>
<mml:mi>O</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mi>S</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>L</mml:mi>
<mml:mi>O</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>W</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>H</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>K</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>K</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo stretchy="false">/</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>W</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>H</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>K</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>K</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<p>From <xref ref-type="disp-formula" rid="eq8">Equation 8</xref>, it is possible to draw the following conclusions. As the number of channels continues to increase, the FLOPs of GSConv are nearly half that of standard convolution. Due to the increase in the number of input image channels in the LCGSC-YOLO model after backbone feature extraction, the number of feature map channels is raised from 3 to 512, as shown in <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3</bold>
</xref>. When the number of channels is 512, the computations amount of GSConv is almost close to half that of the standard convolution. Therefore, the application of the GSConv module will reduce the computations significantly over the standard convolution.</p>
<p>Next, GSConv is utilized to form the GS bottleneck, which shown as in <xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4C</bold>
</xref>. It consists of two GSConv layers. The first GSConv layer halves the number of channels. Further, the output is residually concatenated with the former GSConv. Finally, the VOVGSCSP module consists of multiple GS bottlenecks modules. In the VOVGSCSP module, the Conv module compresses the channel number to one-half of the original number. Then, the result after the GS bottlenecks module is concatenated with the result after the Conv module. The network diagram of the VOVGSCSP is displayed in <xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4D</bold>
</xref>.</p>
<p>As a result, we choose to merge GSConv into the Neck network with a large number of channels. Specifically, the GSConv module and the VOVGSCSP module are utilized to substitute the original Conv and C3 modules, which can significantly reduce the computations. In addition, <xref ref-type="fig" rid="f4">
<bold>Figure 4E</bold>
</xref> shows the relationship of the connections among the modules.</p>
</sec>
<sec id="s2_2_3">
<label>2.2.3</label>
<title>The introduction of coordinate attention module</title>
<p>By improving the lightweighting of each module of the YOLO framework, the number of parameters and computations of the model can be dramatically decreased and the inference speed of the model can be improved. However, this inevitably brings about a degradation in model detection accuracy caused by the lightweighting of the model.</p>
<p>As a consequence, introducing a coordinate attention (CA) module (<xref ref-type="bibr" rid="B32">Niu et&#xa0;al., 2021</xref>) at key positions of the network is an effective strategy to increase the accuracy of the model to leaf disease. With the above operational improvements, the ability of the network to recognize and localize leaf diseases can be improved without adding too much computation.</p>
<p>As shown in <xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5</bold>
</xref>, the CA attention mechanism performs feature extraction in both directions of the input feature map, which not only obtains the relationship among the channels, but also takes into account the positional information about the directions. It helps the model to better localize and identify the target. The feature information in both directions can be fused by Concat, and then non-linear activation is performed using the h_wish function to obtain intermediate features of the coded information. The intermediate information feature map is divided in both height and width directions to get two different dimension vectors. Finally, nonlinear activation is performed with a sigmoid function to generate the corresponding attentional weights.</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>The structure of coordinate attention.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1398277-g005.tif"/>
</fig>
<p>In consequence, the coordinate attention (CA) is embedded in the tail of the backbone and the end of each VOVGSCSP module, which enhances the recognition capability of the model for various apple leaf diseases. CA attention is not only excellent in performance, but also has lightweight characteristics, which can be flexibly adopted in the corresponding network framework. As a result, it alleviates the accuracy loss from model compression without incurring substantial computational costs.</p>
</sec>
</sec>
</sec>
<sec id="s3">
<label>3</label>
<title>Experiments analysis and discussion</title>
<sec id="s3_1">
<label>3.1</label>
<title>Implementations and settings</title>
<p>The experiments are performed in Ubuntu system with an Intel Xeon(R) Silver 4214R CPU@2.40 GHz x48 processor, 128 GB of RAM, a graphics card NVIDIA Corporation TU102GL [Quadro RTX 8000], CUDA 12.2, Pytorch 2.1.0, Python 3.9.18. The hyperparameters of the experiments are set as follows: the Epochs of the model are set to 300, the Initial learning rate of the model is given as 0.01, the model optimizer is selected as SGD, and the Batch size is fixed to 32. All apple leaf disease data are classified by a ratio of 7 to 2 to 1 as training set, validation set, and test set.</p>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Evaluation indicators</title>
<p>Aiming to objectively evaluate the validity of the experimental results, the paper chooses mean accuracy precision (mAP), precision (P), recall (R) as the objective evaluation metrics of the experiment (<xref ref-type="bibr" rid="B12">Hossin and Sulaiman, 2015</xref>). The mAP denotes the sum of the mean accuracies of all categories divided by all categories. The P means the ratio of the actual number of positive samples in the predicted sample to the number of all positive samples. The R represents the ratio of the number of actual positive samples in the predicted sample to all predicted samples. The assessment metrics were calculated according to the following <xref ref-type="disp-formula" rid="eq9">Equations 9</xref>&#x2013;<xref ref-type="disp-formula" rid="eq12">12</xref>.</p>
<disp-formula id="eq9">
<label>(9)</label>
<mml:math display="block" id="M9">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq10">
<label>(10)</label>
<mml:math display="block" id="M10">
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq11">
<label>(11)</label>
<mml:math display="block" id="M11">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>=</mml:mo>
<mml:munderover>
<mml:mo>&#x222b;</mml:mo>
<mml:mn>0</mml:mn>
<mml:mn>1</mml:mn>
</mml:munderover>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>R</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>R</mml:mi>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq12">
<label>(12)</label>
<mml:math display="block" id="M12">
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>A</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:msubsup>
<mml:mi>A</mml:mi>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<p>TP (True Positive) indicate the number of positive samples that the model correctly predicts as positive. FP (False Positive) expresses the number of negative samples that the model incorrectly predicts as positive. FN (False Negative) signifies the number of positive samples that the model incorrectly predicts as negative (<xref ref-type="bibr" rid="B46">Zhu et&#xa0;al., 2023</xref>).</p>
<p>Moreover, the number of parameters (Para) and FLOPs are employed to assess the complexity of the model (<xref ref-type="bibr" rid="B22">Justus et&#xa0;al., 2018</xref>). Assume that the convolution kernel size is <inline-formula>
<mml:math display="inline" id="im11">
<mml:mrow>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the size of the input feature map, and the size of the output feature map is <inline-formula>
<mml:math display="inline" id="im12">
<mml:mrow>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. The fewer the number of parameters and FLOPs, and the lower the complexity of the model, which means it is more suitable to be applied in resource-constrained embedded devices.</p>
<p>The number of depth-wise separable convolution parameters is indicated as <xref ref-type="disp-formula" rid="eq13">Equation 13</xref>:</p>
<disp-formula id="eq13">
<label>(13)</label>
<mml:math display="block" id="M13">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</disp-formula>
<p>The calculation formula of FLOPs for depth-wise separable convolution is shown in <xref ref-type="disp-formula" rid="eq4">Equation 4</xref>.</p>
<p>Finally, frames per second (FPS) is utilized to evaluate the inference speed of the model (<xref ref-type="bibr" rid="B25">Kiani Galoogahi et&#xa0;al., 2017</xref>), larger values of FPS indicate that more data is processed within the same time.</p>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Ablation experiments</title>
<p>A series of testing experiments validate the effectiveness of the different lightweighting modules proposed in the paper. In order to ensure the generalization ability of the model, this study has adopted the method of dividing the validation set. The datasets have been divided according to 70% for training, 20% for validation, and 10% for testing. With this validation strategy, this study evaluated the performance of the model on unseen data. Test 1 indicates YOLOv5s, which is treated as a baseline model. Test 2 means reconstructing the backbone of the YOLO by LCNet. Test 3 demonstrates the adoption of the GSConv module and the VOVGSCSP module to replace the Conv and C3 modules in the YOLO framework, respectively. Test 4 indicates a combination of improvements from Test 2 and Test 3. The results of the objective assessment of the model detection performance with different improvement modules are listed in <xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref>.</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>The results of ablation experiments with different lightweighting improvement methods.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left"/>
<th valign="top" align="left">Model</th>
<th valign="top" align="center">P/%</th>
<th valign="top" align="center">R/%</th>
<th valign="top" align="center">mAP/%</th>
<th valign="top" align="center">Para/M</th>
<th valign="top" align="center">FLOPs/G</th>
<th valign="top" align="center">FPS</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="center">Test 1</td>
<td valign="top" align="left">YOLOv5s</td>
<td valign="top" align="center">
<bold>93.8</bold>
</td>
<td valign="top" align="center">
<bold>93.4</bold>
</td>
<td valign="top" align="center">
<bold>96.6</bold>
</td>
<td valign="top" align="center">7.03</td>
<td valign="top" align="center">16.0</td>
<td valign="top" align="center">35</td>
</tr>
<tr>
<td valign="top" align="center">Test 2</td>
<td valign="top" align="left">YOLO+LCNet</td>
<td valign="top" align="center">92.9</td>
<td valign="top" align="center">92.2</td>
<td valign="top" align="center">95.3</td>
<td valign="top" align="center">3.64</td>
<td valign="top" align="center">6.3</td>
<td valign="top" align="center">56</td>
</tr>
<tr>
<td valign="top" align="center">Test 3</td>
<td valign="top" align="left">YOLO+GS_VOV</td>
<td valign="top" align="center">93.6</td>
<td valign="top" align="center">92.5</td>
<td valign="top" align="center">96.1</td>
<td valign="top" align="center">6.04</td>
<td valign="top" align="center">14.2</td>
<td valign="top" align="center">39</td>
</tr>
<tr>
<td valign="top" align="center">Test 4</td>
<td valign="top" align="left">YOLO+LCNet+GS_VOV</td>
<td valign="top" align="center">92.1</td>
<td valign="top" align="center">91.6</td>
<td valign="top" align="center">94.2</td>
<td valign="top" align="center">
<bold>2.61</bold>
</td>
<td valign="top" align="center">
<bold>5.6</bold>
</td>
<td valign="top" align="center">
<bold>60</bold>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Bold values represents the best performance.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>The conclusion can be obtained by analyzing the experimental results of Test 1 and Test 2. In comparison for YOLOv5s, the number of parameters and FLOPs of YOLO-LCNet are decreased by 48.22% and 60.63%, respectively, and increases the FPS by 60%. It demonstrates that utilizing the LCNet module to reconstruct the backbone network of the YOLO framework can dramatically decrease the complexity of the model and can quite obviously increase the inference speed of the model. From Test 1 and Test 3, the modified neck network of YOLO with GSConv_VOVGSCSP module (YOLO+GS_VOV) reduces the number of parameters and FLOPs by 0.99M and 1.8G, respectively, while mAP is reduced by only 0.5%. The consequences confirm that the introduction of the proposed GSConv_VOVGSCSP (GS_VOV) module into the neck network not only makes the model more lightweight but also has little effect on the feature fusion capability among the different network layers in the neck network. Based on the experimental results of Test 4, it can be observed that the number of model parameters and FLOPs in the YOLO framework introduced by LCNet together with the GS_VOV module are only 37.12% and 35% of those in YOLOv5s, respectively. The model inference speed attained 60 FPS, which is 1.71 times faster than that of YOLOv5s. The above discussion and analysis demonstrate the effectiveness of the proposed method.</p>
<p>It is clear from <xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref> that the lightweighting of the model inevitably brings about the problem of detection accuracy degradation. Therefore, it is very necessary to improve the detection accuracy as much as possible without bringing in higher computations.</p>
</sec>
<sec id="s3_4" sec-type="discussion">
<label>3.4</label>
<title>Discussion of different attention mechanisms</title>
<p>We take YOLO+LCNet+GS_VOV as the baseline network and verify the influence in model performance after introducing different attention modules. Test 4 is YOLO+LCNet+GS_VOV, which combines the LCNet, GSConv and VOVGSCSP modules. Test 5 represents the SE module that is added to Test 4, Test 6 indicates the CBAM module that is introduced to Test 4, and Test 7 is the proposed LCGSC-YOLO, which integrates the CA module from Test 4. The objective evaluation results of introducing different attention modules on the performance of YOLO+LCNet+GS_VOV are reflected in <xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref>, respectively.</p>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Results of introducing different improvements in YOLO+LCNet+GS_VOV.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left"/>
<th valign="top" align="left">Model</th>
<th valign="top" align="center">P/%</th>
<th valign="top" align="center">R/%</th>
<th valign="top" align="center">mAP/%</th>
<th valign="top" align="center">Para/M</th>
<th valign="top" align="center">FLOPs/G</th>
<th valign="top" align="center">FPS</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="center">Test 4</td>
<td valign="top" align="left">&#x2013;</td>
<td valign="top" align="center">92.1</td>
<td valign="top" align="center">91.6</td>
<td valign="top" align="center">94.2</td>
<td valign="top" align="center">
<bold>2.61</bold>
</td>
<td valign="top" align="center">
<bold>5.6</bold>
</td>
<td valign="top" align="center">
<bold>60</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">Test 5</td>
<td valign="top" align="left">+SE</td>
<td valign="top" align="center">92.8</td>
<td valign="top" align="center">91.7</td>
<td valign="top" align="center">94.7</td>
<td valign="top" align="center">2.81</td>
<td valign="top" align="center">5.9</td>
<td valign="top" align="center">58</td>
</tr>
<tr>
<td valign="top" align="center">Test 6</td>
<td valign="top" align="left">+CBAM</td>
<td valign="top" align="center">93.2</td>
<td valign="top" align="center">91.9</td>
<td valign="top" align="center">95.0</td>
<td valign="top" align="center">2.84</td>
<td valign="top" align="center">6.9</td>
<td valign="top" align="center">50</td>
</tr>
<tr>
<td valign="top" align="center">Test 7</td>
<td valign="top" align="left">+CA</td>
<td valign="top" align="center">
<bold>93.4</bold>
</td>
<td valign="top" align="center">
<bold>92.0</bold>
</td>
<td valign="top" align="center">
<bold>95.5</bold>
</td>
<td valign="top" align="center">2.96</td>
<td valign="top" align="center">6.7</td>
<td valign="top" align="center">53</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Bold values represents the best performance.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>Compared to Test 4, the results of Test 5, Test 6, and Test 7 demonstrate that adding the attention mechanism to YOLO+LCNet+GS_VOV can enhance the detection performance of the model. Although all the listed attention mechanisms improve detection accuracy, they simultaneously increasing the number of parameters and decreasing the model inference speed. However, the task at this stage is to maximize accuracy within a limited range of parameters variation. The key is to reach a balance between lightweighting and accuracy. Therefore, in this paper, CA is chosen at this stage because of the desire to increase the detection accuracy as much as possible. As can be observed in Test 4 and Test 7, the mAP of LCGSC-YOLO is 1.3% higher than that of YOLO+LCNet+GS_VOV, while the number of parameters and computations only increase by 0.35 M and 1.1 G, respectively. Even though there is some computational cost associated with the approach, it has very little effect on the inference speed of the model. LCGSC-YOLO inference speed is reduced by only 7 FPS. With the above results, the CA attention mechanism can effectively alleviate the problem of detection accuracy degradation caused by the model lightweighting. Therefore, the addition of CA attention mechanism to the proposed lightweight model can make the model performance more excellent.</p>
<p>After introducing the CA attention module in the lightweight model, the research further conducts ablation experiments by combining CA with above modules of lightweight to verify the independence of the CA module and to prove that there is no dependency among the modules. The results in <xref ref-type="table" rid="T4">
<bold>Table&#xa0;4</bold>
</xref> show that Test 8 represents YOLO+GS_VOV+CA, Test 9 denotes YOLO+LCNet+CA, and Test 10 indicates YOLOv5s+CA. Compared with Test 3 in <xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref>, Test 8 shows the increase in accuracy and recall after the introduction of the CA module, proving that the CA module plays an active role in model performance optimization. Similarly, the comparison between Test 9 and Test 2 shows that the introduction of the CA module improves the detection performance, which mAP improves to 95.9%. Test 10 further demonstrates the effectiveness of the CA module in the YOLOv5s model, with mAP reaching 97.1%. The above experimental results validate that the CA module is able to produce improvements on multiple lightweight modules. However, although the detection performance is improved, this improvement is accompanied by both increase in the number of parameters and computational complexity. Therefore, there is a balance between accuracy and model complexity.</p>
<table-wrap id="T4" position="float">
<label>Table&#xa0;4</label>
<caption>
<p>Results of ablation experiments after introduction of CA attention.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left"/>
<th valign="top" align="left">Model</th>
<th valign="top" align="center">P/%</th>
<th valign="top" align="center">R/%</th>
<th valign="top" align="center">mAP/%</th>
<th valign="top" align="center">Para/M</th>
<th valign="top" align="center">FLOPs/G</th>
<th valign="top" align="center">FPS</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="center">Test 7</td>
<td valign="top" align="left">&#x2013;</td>
<td valign="top" align="center">93.4</td>
<td valign="top" align="center">92.0</td>
<td valign="top" align="center">95.5</td>
<td valign="top" align="center">
<bold>2.96</bold>
</td>
<td valign="top" align="center">
<bold>6.7</bold>
</td>
<td valign="top" align="center">
<bold>53</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">Test 8</td>
<td valign="top" align="left">YOLO+GS_VOV+CA</td>
<td valign="top" align="center">93.9</td>
<td valign="top" align="center">92.9</td>
<td valign="top" align="center">96.5</td>
<td valign="top" align="center">6.38</td>
<td valign="top" align="center">15.1</td>
<td valign="top" align="center">37</td>
</tr>
<tr>
<td valign="top" align="center">Test 9</td>
<td valign="top" align="left">YOLO+LCNet+CA</td>
<td valign="top" align="center">93.2</td>
<td valign="top" align="center">92.8</td>
<td valign="top" align="center">95.9</td>
<td valign="top" align="center">4.18</td>
<td valign="top" align="center">6.9</td>
<td valign="top" align="center">52</td>
</tr>
<tr>
<td valign="top" align="center">Test 10</td>
<td valign="top" align="left">YOLOv5s+CA</td>
<td valign="top" align="center">
<bold>94.5</bold>
</td>
<td valign="top" align="center">
<bold>93.8</bold>
</td>
<td valign="top" align="center">
<bold>97.1</bold>
</td>
<td valign="top" align="center">7.26</td>
<td valign="top" align="center">16.8</td>
<td valign="top" align="center">33</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Bold values represents the best performance.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>As a conclusion, LCGSC-YOLO has several advantages. Firstly, the model with small number of parameters do not require a large amount of storage space. Secondly, the model is low computation and can be run with limited hardware resources. Finally, the model training and inference speed is fast and can process the data quickly. Therefore, LCGSC-YOLO is more suitable to be deployed in embedded devices for detecting apple leaf diseases.</p>
</sec>
<sec id="s3_5">
<label>3.5</label>
<title>The selection of lightweight backbone networks</title>
<p>In this subsection, aiming to validating the performance of different backbone networks, the backbone of the YOLO framework is reconstructed by the current mainstream lightweight modules. YOLO-MN3 means the backbone of YOLO is constructed by employing the MobileNetv3 module (<xref ref-type="bibr" rid="B13">Howard et&#xa0;al., 2019</xref>), and YOLO-SN2 illustrates that it is reconstructed by applying the basic modules of ShuffleNetv2 (<xref ref-type="bibr" rid="B29">Ma et&#xa0;al., 2018</xref>), YOLO-GN and YOLO-EN2 denote that the YOLO backbone is composed of modules applying GhostNet (<xref ref-type="bibr" rid="B24">Khan et&#xa0;al., 2022</xref>) and EfficientNetv2 (<xref ref-type="bibr" rid="B41">Tian et&#xa0;al., 2020</xref>), respectively, and the backbone with the LCNet is known as YOLO-LCNet. <xref ref-type="table" rid="T5">
<bold>Table&#xa0;5</bold>
</xref> lists the results of the test experiments for different lightweight backbone networks.</p>
<table-wrap id="T5" position="float">
<label>Table&#xa0;5</label>
<caption>
<p>Comparison of experimental results of different lightweight improved backbone.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Model</th>
<th valign="top" align="center">P/%</th>
<th valign="top" align="center">R/%</th>
<th valign="top" align="center">mAP/%</th>
<th valign="top" align="center">Para/M</th>
<th valign="top" align="center">FLOPs/G</th>
<th valign="top" align="center">FPS</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">YOLOv5s</td>
<td valign="top" align="center">
<bold>93.8</bold>
</td>
<td valign="top" align="center">
<bold>93.4</bold>
</td>
<td valign="top" align="center">
<bold>96.6</bold>
</td>
<td valign="top" align="center">7.03</td>
<td valign="top" align="center">16.0</td>
<td valign="top" align="center">35</td>
</tr>
<tr>
<td valign="top" align="left">YOLO-MN3</td>
<td valign="top" align="center">92.2</td>
<td valign="top" align="center">91.5</td>
<td valign="top" align="center">95.3</td>
<td valign="top" align="center">3.91</td>
<td valign="top" align="center">7.2</td>
<td valign="top" align="center">48</td>
</tr>
<tr>
<td valign="top" align="left">YOLO-SN2</td>
<td valign="top" align="center">93.2</td>
<td valign="top" align="center">91.6</td>
<td valign="top" align="center">95.5</td>
<td valign="top" align="center">3.85</td>
<td valign="top" align="center">6.9</td>
<td valign="top" align="center">48</td>
</tr>
<tr>
<td valign="top" align="left">YOLO-GN</td>
<td valign="top" align="center">93.5</td>
<td valign="top" align="center">93.3</td>
<td valign="top" align="center">96.2</td>
<td valign="top" align="center">5.39</td>
<td valign="top" align="center">8.6</td>
<td valign="top" align="center">43</td>
</tr>
<tr>
<td valign="top" align="left">YOLO-EN2</td>
<td valign="top" align="center">93.3</td>
<td valign="top" align="center">92.7</td>
<td valign="top" align="center">95.7</td>
<td valign="top" align="center">3.78</td>
<td valign="top" align="center">6.5</td>
<td valign="top" align="center">54</td>
</tr>
<tr>
<td valign="top" align="left">YOLO-LCNet</td>
<td valign="top" align="center">92.9</td>
<td valign="top" align="center">92.2</td>
<td valign="top" align="center">95.3</td>
<td valign="top" align="center">
<bold>3.64</bold>
</td>
<td valign="top" align="center">
<bold>6.3</bold>
</td>
<td valign="top" align="center">
<bold>56</bold>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Bold values represents the best performance.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>As can be noticed from <xref ref-type="table" rid="T5">
<bold>Table&#xa0;5</bold>
</xref>, all of listed different lightweight modules can to some degree decrease the number of parameters and computations of the model, but inevitably causes a loss of accuracy. Compared with YOLO-MN3, YOLO-SN2, YOLO-GN and YOLO-EN2, YOLO-LCNet has lower model complexity and quicker inference speed. Specifically, the mAP of YOLOv5 is only 1.3% higher compared to YOLO-LCNet, but the number of parameters and FLOPs of YOLO-LCNet are 48.22% and 60.63% lower than that of YOLOv5, respectively. In addition, compared to YOLOv5, the model inference speed of YOLO-LCNet is 60% faster than it. In a word, Using LCNet to reconstruct the backbone network of YOLO is a better choice to reduce model complexity and enhance model inference speed.</p>
</sec>
<sec id="s3_6">
<label>3.6</label>
<title>Comparative experiments</title>
<p>In this section, the experimental results of the proposed method are compared with other methods related to leaf disease detection. Specifically, INAR-SSD (<xref ref-type="bibr" rid="B19">Jiang et&#xa0;al., 2019</xref>) as a detection model for apple leaf disease detection with the ALDD dataset. BTC-YOLOv5s (<xref ref-type="bibr" rid="B28">Li et&#xa0;al., 2023</xref>) and MGA-YOLO (<xref ref-type="bibr" rid="B42">Wang et&#xa0;al., 2022</xref>) are improved lightweight apple leaf disease detection models based on the FGVC8 datasets. Khan et&#xa0;al. employ YOLOv4 to apple leaf disease detection (<xref ref-type="bibr" rid="B24">Khan et&#xa0;al., 2022</xref>). The experimental results of different models are presented in <xref ref-type="table" rid="T6">
<bold>Table&#xa0;6</bold>
</xref>. The reason for selecting the above comparison methods is that they have some similarities with the method proposed in this study in terms of theoretical and technical characteristics. For example, both INAR-SSD and YOLOv4 belong to the classical methods in the field of target detection.BTC-YOLOv5s and MGA-YOLO are improved versions based on a lightweight target detection model, and their design ideas and technical features have some similarities with the method proposed in this study. In addition, these comparative methods use similar datasets when dealing with leaf disease detection tasks, and all attempt to address common challenges in leaf disease detection, such as light variations and leaf disease morphological diversity. Therefore, by comparing with these methods, the innovations and usefulness of the method proposed in this study can be better assessed in the context of current research hot spots and technological trends, as well as its strengths and limitations in solving the leaf disease detection problem.</p>
<table-wrap id="T6" position="float">
<label>Table&#xa0;6</label>
<caption>
<p>Comparison of experimental results of different lightweighting methods.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="center">Method</th>
<th valign="top" align="center">INAR-SSD</th>
<th valign="top" align="center">BTC-YOLOv5s</th>
<th valign="top" align="center">YOLOv4</th>
<th valign="top" align="center">MGA-YOLO</th>
<th valign="top" align="center">LCGSC-YOLO</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="center">Scab</td>
<td valign="top" align="center">85.9</td>
<td valign="top" align="center">91.2</td>
<td valign="top" align="center">91.7</td>
<td valign="top" align="center">94.5</td>
<td valign="top" align="center">
<bold>96.3</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">Rust</td>
<td valign="top" align="center">82.3</td>
<td valign="top" align="center">89.1</td>
<td valign="top" align="center">90.2</td>
<td valign="top" align="center">92.8</td>
<td valign="top" align="center">
<bold>94.2</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">Mosaic</td>
<td valign="top" align="center">83.5</td>
<td valign="top" align="center">88.7</td>
<td valign="top" align="center">89.8</td>
<td valign="top" align="center">93.1</td>
<td valign="top" align="center">
<bold>94.9</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">Grey_spot</td>
<td valign="top" align="center">80.4</td>
<td valign="top" align="center">87.3</td>
<td valign="top" align="center">88.7</td>
<td valign="top" align="center">91.8</td>
<td valign="top" align="center">
<bold>92.9</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">Powdery_mildew</td>
<td valign="top" align="center">92.1</td>
<td valign="top" align="center">93.4</td>
<td valign="top" align="center">94.5</td>
<td valign="top" align="center">96.8</td>
<td valign="top" align="center">
<bold>99.2</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">Frog_eye_leaf_spot</td>
<td valign="top" align="center">90.4</td>
<td valign="top" align="center">92.6</td>
<td valign="top" align="center">93.1</td>
<td valign="top" align="center">95.9</td>
<td valign="top" align="center">
<bold>98.6</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">Alternaria_leaf_spot</td>
<td valign="top" align="center">81.2</td>
<td valign="top" align="center">88.4</td>
<td valign="top" align="center">89.6</td>
<td valign="top" align="center">92.1</td>
<td valign="top" align="center">
<bold>92.7</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">P/%</td>
<td valign="top" align="center">84.7</td>
<td valign="top" align="center">89.6</td>
<td valign="top" align="center">90.7</td>
<td valign="top" align="center">91.8</td>
<td valign="top" align="center">
<bold>93.4</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">R/%</td>
<td valign="top" align="center">83.5</td>
<td valign="top" align="center">88.7</td>
<td valign="top" align="center">89.8</td>
<td valign="top" align="center">90.9</td>
<td valign="top" align="center">
<bold>92.0</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">mAP/%</td>
<td valign="top" align="center">85.1</td>
<td valign="top" align="center">90.1</td>
<td valign="top" align="center">91.0</td>
<td valign="top" align="center">93.8</td>
<td valign="top" align="center">
<bold>95.5</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">Para/M</td>
<td valign="top" align="center">23.62</td>
<td valign="top" align="center">15.8</td>
<td valign="top" align="center">60.81</td>
<td valign="top" align="center">11.26</td>
<td valign="top" align="center">
<bold>2.96</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">FLOPs/G</td>
<td valign="top" align="center">89.62</td>
<td valign="top" align="center">53.16</td>
<td valign="top" align="center">44.8</td>
<td valign="top" align="center">28.4</td>
<td valign="top" align="center">
<bold>6.7</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">FPS</td>
<td valign="top" align="center">7</td>
<td valign="top" align="center">12</td>
<td valign="top" align="center">14</td>
<td valign="top" align="center">21</td>
<td valign="top" align="center">
<bold>53</bold>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Bold values represents the best performance.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>As shown in <xref ref-type="table" rid="T6">
<bold>Table&#xa0;6</bold>
</xref>, the proposed LCGSC-YOLO model has fewer number of parameters and computations compared with other experimental models. Moreover, it also shows excellent performance in terms of detection accuracy and inference speed. In comparison with INAR-SSD, LCGSC-YOLO achieves a 10.4% increase in detection accuracy and a 46 FPS improvement in inference speed. In addition, the proposed method has only 12.53% of the number of parameters and 7.47% of the FLOPs of INAR-SSD, respectively. Meanwhile, the comparison results with BTC-YOLOv5s and MGA-YOLO show that the detection accuracies of BTC-YOLOv5s and MGA-YOLO are 5.4% and 1.7% less than that of LCGSC-YOLO, respectively. Besides, the model inference speed of LCGSC-YOLO are 4.41 and 2.52 times faster than that of them, respectively. However, the parameter amount and FLOPs of LCGSC-YOLO are only 18.73% and 12.60% of those of BTC-YOLOv5s, and about a quarter of those of MGA-YOLO. In comparison of YOLOv4, LCGSC-YOLO has 57.85M and 38.1G fewer parameters and FLOPs, respectively, while the model inference speed of LCGSC-YOLO is 39 FPS more than YOLOv4. The above analysis results illustrate that the proposed method is superior to the comparative experimental methods in terms of comprehensive performance.</p>
<p>
<xref ref-type="fig" rid="f6">
<bold>Figure&#xa0;6</bold>
</xref> shows the radar charts of the different model experimental results. it can be concluded that INAR-SSD has the largest FLOPs, YOLOv4 has the largest number of parameters, and the mean average precision, precisions, and recalls of the different modelling methods are almost overlapping. In addition, The FPS of LCGSC-YOLO is obviously superior to the other methods, while the number of parameters and FLOPs are lower than those of the comparative experimental methods. In terms of the area surrounded by the test results of different models, LCGSC-YOLO mainly occupies the left area of the figure, which implies that LCGSC-YOLO has better detection performance while having fewer number of parameters and FLOPs.</p>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>Radar plots showing the results of the five model tests.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1398277-g006.tif"/>
</fig>
<p>As illustrated in <xref ref-type="fig" rid="f7">
<bold>Figure&#xa0;7</bold>
</xref>, a 3D bar chart is drawn based on the test results of different models, which can visualize the comparison results of different models. It comes to a conclusion that the test results of different models do not show much difference in terms of mean average precision, precision, and recall. However, it can be observed that from INAR-SSD to LCGSC-YOLO are gradually increasing and decreasing in terms of the performance of FPS and FLOPs, respectively. It proclaims that the computational amount of the LCGSC-YOLO is gradually decreasing, and the inference speed is continuously increasing. Overall, compared with other models, LCGSC-YOLO has the lowest number of parameters and computational amount as well as the fastest inference speed.</p>
<fig id="f7" position="float">
<label>Figure&#xa0;7</label>
<caption>
<p>3D bar graphs of test results for five different models.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1398277-g007.tif"/>
</fig>
<p>To visualize the detection performance of the LCGSC-YOLO, <xref ref-type="fig" rid="f8">
<bold>Figure&#xa0;8</bold>
</xref> shows the detection results of INAR-SSD, BTC-YOLOv5s, YOLOv4 and MGA-YOLO and LCGSC-YOLO on different disease images, respectively. To improve readability, the detection information in <xref ref-type="fig" rid="f8">
<bold>Figure&#xa0;8</bold>
</xref> substitutes specific letters for disease names. A represents Scab, B represents Rust, C denotes Powdery_mildew, D denotes Frog_eye_leaf_spot, E indicates Alternaria leaf spot, and F indicates Grey spot, G means Mosaic. As shown in <xref ref-type="fig" rid="f8">
<bold>Figure&#xa0;8</bold>
</xref>, from the overall detection results, the proposed LCGSC-YOLO outperforms other algorithms in detecting different kinds of diseases. In particular, LCGSC-YOLO has better detection performance in detecting small diseases. To be specific, as indicated by the red circle from the Scab detection result images, it can be seen that LCGSCYOLO can separately detect adjacent disease regions, while other algorithms recognize them as a single disease. Moreover, it is apparent from the result images of Rust diseases that LCGSCYOLO can detect small disease areas at the edge positions. In addition, for the three diseases Frog_eye_leaf_spot, Alternaria leaf spot, and Grey spot, all methods have varying degrees of miss detection. In these three disease categories, it was difficult for all methods to identify all diseases due to the simultaneous presence of diseases in multiple leaves in the same scene. For Powdery_mildew and Mosaic, the detection results of all methods were almost the same.</p>
<fig id="f8" position="float">
<label>Figure&#xa0;8</label>
<caption>
<p>Comparison of different models for detecting apple leaf disease images. <bold>(A-G)</bold> indicates the name of the different diseases. The red circles show the contrasting positions.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1398277-g008.tif"/>
</fig>
<p>To further illustrate the superiority of LCGSC-YOLO, <xref ref-type="fig" rid="f9">
<bold>Figure&#xa0;9</bold>
</xref> compares the model detection capabilities of INAR-SSD, BTC-YOLOv5s, YOLOv4, MGA-YOLO, and LCGSC-YOLO on specific scenarios. The scenes from top to bottom are dark, rainy, strong lighting, multiple leaves, two spots and dense scenes. As displayed in <xref ref-type="fig" rid="f9">
<bold>Figure&#xa0;9</bold>
</xref>, for disease images in dark scenes, the other four algorithms did not recognize small diseases, while LCGSC-YOLO was able to recognize them. For images of rainy scenes, LCGSC-YOLO accurately detected Grey spot that were difficult to recognize due to rainfall reflection, but the other four algorithms identified the disease spot as the same as adjacent disease spots. In the detection results of two spots scenes and multiple leaves scenes, there were varying degrees of missed detections. Due to the small size and dispersion of all diseases, it is difficult for all methods to identify all diseases. In lighting scenes, INAR-SSD incorrectly identifies a disease in the light. In addition, in dense scenes, LCGSC-YOLO can identify adjacent diseases separately. From the aforementioned analysis, it can be concluded that LCGSC-YOLO also has comparably equally excellent detection performance in special scenarios.</p>
<fig id="f9" position="float">
<label>Figure&#xa0;9</label>
<caption>
<p>Comparison of the detection effects of different models on apple leaf disease images in special scenes. The different scenarios are represented from top to bottom.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1398277-g009.tif"/>
</fig>
<p>In general, by combining the detection capabilities of different models in different scenarios in <xref ref-type="fig" rid="f8">
<bold>Figures&#xa0;8</bold>
</xref>, <xref ref-type="fig" rid="f9">
<bold>9</bold>
</xref>, it is obvious that LCGSC-YOLO shows excellent performance in detecting apple leaf diseases. The model utilizes fewer parameters and lower computational effort to achieve efficient leaf disease detection, and especially excels in model lightweighting. However, despite the obvious advantages of LCGSC-YOLO in terms of lightweighting, it is still important to note that its performance may be degraded when dealing with complex scenarios such as dense diseases, multiple leaves or two spots. In addition, the model showed missed detection in terms of detecting tiny diseases, which requires further optimization of the method. Therefore, in the future, the study will continue to explore the optimization of the LCGSC-YOLO model to improve its robustness and adaptability, and to further optimize its detection accuracy for more comprehensive and stable apple leaf disease detection.</p>
</sec>
</sec>
<sec id="s4" sec-type="conclusions">
<label>4</label>
<title>Conclusions</title>
<p>To address the issues of complex background and high model complexity of apple leaf disease detection in natural scenes, the paper improves a lightweight model based on the YOLO framework and names it LCGSC-YOLO. The LCNet is employed to reconstruct the backbone network, which significantly decreases the complexity of the model. The GSConv module and the VOVGSCSP module are adopted into the neck network, which reduces the model parameters and computations while enhancing the feature fusion capability. The CA attention mechanism embedded in the network effectively alleviates the problem of degradation of detection accuracy caused by model lightweighting. Through experimental analysis and comparison, the mAP of LCGSC-YOLO is 95.5% and the inference speed is 53 FPS, which satisfies the requirements of practical applications. Therefore, this method can provide technical support for lightweight deployment of embedded devices in apple leaf disease detection.</p>
<p>However, this study may have some limitations in terms of data diversity and changes in environmental conditions. Firstly, the diversity of plant varieties, growth stages, and pests and diseases in agricultural scenarios requires a broadly representative and comprehensive datasets, but the current datasets only cover several of the most common disease categories, which cannot comprehensively cover all diseases and scenario types. Secondly, frequently changing light conditions can affect the accuracy of the model in predicting diseases. To overcome these limitations, this study plans to extend data collection, increase data diversity, and employ data enhancement techniques and light invariant feature extraction methods to improve the robustness and adaptability of the model.</p>
<p>This research realizes the efficient detection of apple leaf diseases. The main influencing factors, such as environmental conditions and disease categories, were considered during the study to ensure the generalization ability and applicability of the model. However, apple health is also affected by a variety of other factors that need to be further explored in future research. Future research directions include, but are not limited to, the following: firstly, the structure and parameters of the model will be further optimized to improve its applicability and robustness in complex situations. Secondly, this study will pay special attention to various changing situations in natural scenarios and continuously expand the datasets in order to evaluate the performance of the model more comprehensively. In addition, other advanced techniques and methods such as transfer learning, and federated learning will be explored. With these improvements, this study expects to provide more comprehensive and reliable technical support for apple disease detection and management.</p>
</sec>
</body>
<back>
<sec id="s5" sec-type="data-availability">
<title>Data availability statement</title>
<p>Publicly available datasets were analyzed in this study. This data can be found here: <uri xlink:href="https://drive.google.com/drive/folders/1MRfK5eOm5-6KZTngPzpzjp9gx1NyEvZY?usp=drive_link">https://drive.google.com/drive/folders/1MRfK5eOm5-6KZTngPzpzjp9gx1NyEvZY?usp=drive_link</uri>.</p>
</sec>
<sec id="s6" sec-type="author-contributions">
<title>Author contributions</title>
<p>JW: Writing &#x2013; review &amp; editing, Validation, Funding acquisition, Conceptualization. CQ: Writing &#x2013; original draft, Validation, Software, Conceptualization. BH: Writing &#x2013; review &amp; editing, Resources, Investigation, Data curation. YY: Writing &#x2013; original draft, Resources, Investigation, Data curation. YZ: Writing &#x2013; review &amp; editing, Resources, Investigation, Data curation. WF: Writing &#x2013; review &amp; editing, Supervision.</p>
</sec>
<sec id="s7" sec-type="funding-information">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research, authorship, and/or publication of this article. This research was funded partly by the Doctoral Foundation of Henan Polytechnic University under Grant B2022-15.</p>
</sec>
<ack>
<title>Acknowledgments</title>
<p>The authors would like to thank the reviewers for their constructive comments and suggestions which strengthened a lot this paper.</p>
</ack>
<sec id="s8" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s9" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Ahmed</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Shahidi</surname> <given-names>T. R.</given-names>
</name>
<name>
<surname>Alam</surname> <given-names>S. M. I.</given-names>
</name>
<name>
<surname>Momen</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Rice leaf disease detection using machine learning techniques</article-title>,&#x201d; in <conf-name>2019 International Conference on Sustainable Technologies for Industry 4.0 (STI)</conf-name>, (<conf-loc>Dhaka, Bangladesh</conf-loc>: <publisher-name>IEEE</publisher-name>), <fpage>01</fpage>&#x2013;<lpage>05</lpage>.</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Arsenovic</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Karanovic</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Sladojevic</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Anderla</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Stefanovic</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Solving current limitations of deep learning based approaches for plant disease detection</article-title>. <source>Symmetry</source> <volume>11</volume>, <fpage>01</fpage>&#x2013;<lpage>18</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/sym11070939</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Attri</surname> <given-names>I.</given-names>
</name>
<name>
<surname>Awasthi</surname> <given-names>L. K.</given-names>
</name>
<name>
<surname>Sharma</surname> <given-names>T. P.</given-names>
</name>
<name>
<surname>Rathee</surname> <given-names>P.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>A review of deep learning techniques used in agriculture</article-title>. <source>Ecol. Inf.</source> <volume>77</volume>, <fpage>01</fpage>&#x2013;<lpage>22</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ecoinf.2023.102217</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Barman</surname> <given-names>U.</given-names>
</name>
<name>
<surname>Choudhury</surname> <given-names>R. D.</given-names>
</name>
<name>
<surname>Sahu</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Barman</surname> <given-names>G. G.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Comparison of convolution neural networks for smartphone image based real time classification of citrus leaf disease</article-title>. <source>Comput. Electron. Agric.</source> <volume>177</volume>, <fpage>01</fpage>&#x2013;<lpage>09</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2020.105661</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bhuiyan</surname> <given-names>M. A. B.</given-names>
</name>
<name>
<surname>Abdullah</surname> <given-names>H. M.</given-names>
</name>
<name>
<surname>Arman</surname> <given-names>S. E.</given-names>
</name>
<name>
<surname>Rahman</surname> <given-names>S. S.</given-names>
</name>
<name>
<surname>Al Mahmud</surname> <given-names>K.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Bananasqueezenet: A very fast, lightweight convolutional neural network for the diagnosis of three prominent banana leaf diseases</article-title>. <source>Smart Agric. Technol.</source> <volume>4</volume>, <fpage>01</fpage>&#x2013;<lpage>13</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.atech.2023.100214</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bi</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Duan</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Fu</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Kang</surname> <given-names>J.-R.</given-names>
</name>
<name>
<surname>Shi</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Mobilenet based apple leaf diseases identification</article-title>. <source>Mobile Networks Appl.</source> <volume>27</volume>, <fpage>1</fpage>&#x2013;<lpage>9</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11036-020-01640-1</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Chollet</surname> <given-names>F.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Xception: Deep learning with depthwise separable convolutions</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE conference on computer vision and pattern recognition</conf-name>, <conf-loc>Las Vegas, Nevada</conf-loc>. (<publisher-name>IEEE</publisher-name>), <fpage>1251</fpage>&#x2013;<lpage>1258</lpage>.</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cui</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Gao</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Wei</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Du</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Guo</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Dong</surname> <given-names>S.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>Pp-lcnet: A lightweight cpu convolutional neural network</article-title>. <source>arXiv preprint arXiv:2109.15099</source> <volume>12</volume>, <fpage>1</fpage>&#x2013;<lpage>9</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.2109.15099</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dhaka</surname> <given-names>V. S.</given-names>
</name>
<name>
<surname>Meena</surname> <given-names>S. V.</given-names>
</name>
<name>
<surname>Rani</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Sinwar</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Ijaz</surname> <given-names>M. F.</given-names>
</name>
<name>
<surname>Wo&#x17a;niak</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>A survey of deep convolutional neural networks applied for prediction of plant leaf diseases</article-title>. <source>Sensors</source> <volume>21</volume>, <elocation-id>4749</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/s21144749</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Elbasi</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Zaki</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Topcu</surname> <given-names>A. E.</given-names>
</name>
<name>
<surname>Abdelbaki</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Zreikat</surname> <given-names>A. I.</given-names>
</name>
<name>
<surname>Cina</surname> <given-names>E.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). <article-title>Crop prediction model using machine learning algorithms</article-title>. <source>Appl. Sci.</source> <volume>13</volume>, <elocation-id>9288</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/app13169288</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Harakannanavar</surname> <given-names>S. S.</given-names>
</name>
<name>
<surname>Rudagi</surname> <given-names>J. M.</given-names>
</name>
<name>
<surname>Puranikmath</surname> <given-names>V. I.</given-names>
</name>
<name>
<surname>Siddiqua</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Pramodhini</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Plant leaf disease detection using computer vision and machine learning algorithms</article-title>. <source>Global Transit. Proc.</source> <volume>3</volume>, <fpage>305</fpage>&#x2013;<lpage>310</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.gltp.2022.03.016</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hossin</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Sulaiman</surname> <given-names>M. N.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>A review on evaluation metrics for data classification evaluations</article-title>. <source>Int. J. Data Min. knowledge Manage. process</source> <volume>5</volume>, <fpage>01</fpage>&#x2013;<lpage>11</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.5121/ijdkp.2015.5201</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Howard</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Sandler</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Chu</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>L.-C.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Tan</surname> <given-names>M.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). &#x201c;<article-title>Searching for mobilenetv3</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE/CVF international conference on computer vision</conf-name>, <conf-loc>Long Beach Convention &amp; Entertainment Center. (Los Angeles CA, United States</conf-loc>: <publisher-name>IEEE</publisher-name>), <fpage>1314</fpage>&#x2013;<lpage>1324</lpage>.</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hu</surname> <given-names>L.-Y.</given-names>
</name>
<name>
<surname>Hong</surname> <given-names>Y. A.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>J.-Y.</given-names>
</name>
<name>
<surname>Yang-Tian Su</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Gong</surname> <given-names>X.-Q.</given-names>
</name>
<name>
<surname>Kun</surname> <given-names>Z.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Overexpression of mdmips1 enhances drought tolerance and water-use efficiency in apple</article-title>. <source>J. Integr. Agric.</source> <volume>21</volume>, <fpage>1968</fpage>&#x2013;<lpage>1981</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/S2095-3119(21)63822-4</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Guo</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Lightweight one-stage maize leaf disease detection model with knowledge distillation</article-title>. <source>Agriculture</source> <volume>13</volume>, <fpage>01</fpage>&#x2013;<lpage>22</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agriculture13091664</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Hu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Shen</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>G.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Squeeze-and-excitation networks</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE conference on computer vision and pattern recognition</conf-name>. (<conf-loc>Seoul, South Korea</conf-loc>: <publisher-name>IEEE</publisher-name>), <fpage>7132</fpage>&#x2013;<lpage>7141</lpage>.</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hyson</surname> <given-names>D. A.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>A comprehensive review of apples and apple components and their relationship to human health</article-title>. <source>Adv. Nutr.</source> <volume>2</volume>, <fpage>408</fpage>&#x2013;<lpage>420</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3945/an.111.000513</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jackulin</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Murugavalli</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>A comprehensive review on detection of plant disease using machine learning and deep learning approaches</article-title>. <source>Measure.: Sensors</source> <volume>24</volume>, <fpage>01</fpage>&#x2013;<lpage>10</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.measen.2022.100441</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jiang</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>B.</given-names>
</name>
<name>
<surname>He</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Liang</surname> <given-names>C.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Real-time detection of apple leaf diseases using deep learning approach based on improved convolutional neural networks</article-title>. <source>IEEE Access</source> <volume>7</volume>, <fpage>59069</fpage>&#x2013;<lpage>59080</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ACCESS.2019.2914929</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jiang</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Lu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Cai</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>G.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Image recognition of four rice leaf diseases based on deep learning and support vector machine</article-title>. <source>Comput. Electron. Agric.</source> <volume>179</volume>, <fpage>01</fpage>&#x2013;<lpage>09</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2020.105824</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Johannes</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Picon</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Alvarez-Gila</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Echazarra</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Rodriguez-Vaamonde</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Navajas</surname> <given-names>A. D.</given-names>
</name>
<etal/>
</person-group>. (<year>2017</year>). <article-title>Automatic plant disease diagnosis using mobile capture devices, applied on a wheat use case</article-title>. <source>Comput. Electron. Agric.</source> <volume>138</volume>, <fpage>200</fpage>&#x2013;<lpage>209</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2017.04.013</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Justus</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Brennan</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Bonner</surname> <given-names>S.</given-names>
</name>
<name>
<surname>McGough</surname> <given-names>A. S.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Predicting the computational cost of deep learning models</article-title>,&#x201d; in <conf-name>2018 IEEE international conference on big data (Big Data)</conf-name>. (<conf-loc>Seattle, WA, USA</conf-loc>: <publisher-name>IEEE</publisher-name>), <fpage>3873</fpage>&#x2013;<lpage>3882</lpage>.</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kaur</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Harnal</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Gautam</surname> <given-names>V.</given-names>
</name>
<name>
<surname>Singh</surname> <given-names>M. P.</given-names>
</name>
<name>
<surname>Singh</surname> <given-names>S. P.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>An approach for characterization of infected area in tomato leaf disease based on deep learning and object detection technique</article-title>. <source>Eng. Appl. Artif. Intell.</source> <volume>115</volume>, <fpage>01</fpage>&#x2013;<lpage>12</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.engappai.2022.105210</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Khan</surname> <given-names>A. I.</given-names>
</name>
<name>
<surname>Quadri</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Banday</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Shah</surname> <given-names>J. L.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Deep diagnosis: A real-time apple leaf disease detection system based on deep learning</article-title>. <source>Comput. Electron. Agric.</source> <volume>198</volume>, <elocation-id>107093</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2022.107093</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Kiani Galoogahi</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Fagg</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Ramanan</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Lucey</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Need for speed: A benchmark for higher frame rate object tracking</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE International Conference on Computer Vision</conf-name>. (<conf-loc>Seattle, WA, USA</conf-loc>: <publisher-name>IEEE</publisher-name>), <fpage>1125</fpage>&#x2013;<lpage>1134</lpage>.</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Qiao</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2022</year>b). <article-title>A multi-scale cucumber disease detection method in natural scenes based on yolov5</article-title>. <source>Comput. Electron. Agric.</source> <volume>202</volume>, <fpage>01</fpage>&#x2013;<lpage>12</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2022.107363</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wei</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Zhan</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Ren</surname> <given-names>Q.</given-names>
</name>
</person-group> (<year>2022</year>a). <article-title>Slim-neck by gsconv: A better design paradigm of detector architectures for autonomous vehicles</article-title>. <source>arXiv preprint arXiv:2206.02424</source> <volume>120</volume>, <fpage>01</fpage>&#x2013;<lpage>17</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11554-024-01436-6</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Shi</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Fang</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Yin</surname> <given-names>F.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Real-time detection of apple leaf diseases in natural scenes based on yolov5</article-title>. <source>Agriculture</source> <volume>13</volume>, <elocation-id>878</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agriculture13040878</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Ma</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zheng</surname> <given-names>H.-T.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Shufflenet v2: Practical guidelines for efficient cnn architecture design</article-title>,&#x201d; in <conf-name>Proceedings of the European conference on computer vision (ECCV)</conf-name>. (<conf-loc>Anchorage, Alaska, United States</conf-loc>: <publisher-name>Springer</publisher-name>), <fpage>116</fpage>&#x2013;<lpage>131</lpage>.</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Maddikunta</surname> <given-names>P. K. R.</given-names>
</name>
<name>
<surname>Hakak</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Alazab</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Bhattacharya</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Gadekallu</surname> <given-names>T. R.</given-names>
</name>
<name>
<surname>Khan</surname> <given-names>W. Z.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). <article-title>Unmanned aerial vehicles in smart agriculture: Applications, requirements, and challenges</article-title>. <source>IEEE Sensors J.</source> <volume>21</volume>, <fpage>17608</fpage>&#x2013;<lpage>17619</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/JSEN.2021.3049471</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ngugi</surname> <given-names>L. C.</given-names>
</name>
<name>
<surname>Abelwahab</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Abo-Zahhad</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Recent advances in image processing techniques for automated leaf pest and disease recognition&#x2013;a review</article-title>. <source>Inf. Process. Agric.</source> <volume>8</volume>, <fpage>27</fpage>&#x2013;<lpage>51</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.inpa.2020.04.004</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Niu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Zhong</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>A review on the attention mechanism of deep learning</article-title>. <source>Neurocomputing</source> <volume>452</volume>, <fpage>48</fpage>&#x2013;<lpage>62</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.neucom.2021.03.091</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Orchi</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Sadik</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Khaldoun</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>On using artificial intelligence and the internet of things for crop disease detection: A contemporary survey</article-title>. <source>Agriculture</source> <volume>12</volume>, <elocation-id>9</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agriculture12010009</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Rastogi</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Arora</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Sharma</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2015</year>). &#x201c;<article-title>Leaf disease detection and grading using computer vision technology &amp; fuzzy logic</article-title>,&#x201d; in <conf-name>2015 2nd international conference on signal processing and integrated networks (SPIN)</conf-name>. (<conf-loc>Noida, India</conf-loc>: <publisher-name>IEEE</publisher-name>), <fpage>500</fpage>&#x2013;<lpage>505</lpage>.</citation>
</ref>
<ref id="B35">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Redmon</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Divvala</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Girshick</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Farhadi</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>You only look once: Unified, realtime object detection</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE conference on computer vision and pattern recognition</conf-name>. (<conf-loc>Long Beach, USA</conf-loc>: <publisher-name>IEEE</publisher-name>), <fpage>779</fpage>&#x2013;<lpage>788</lpage>.</citation>
</ref>
<ref id="B36">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Redmon</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Farhadi</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Yolo9000: better, faster, stronger</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE conference on computer vision and pattern recognition</conf-name>. (<conf-loc>Salt Lake City,USA</conf-loc>: <publisher-name>IEEE</publisher-name>), <fpage>7263</fpage>&#x2013;<lpage>7271</lpage>.</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ren</surname> <given-names>S.</given-names>
</name>
<name>
<surname>He</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Girshick</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Faster r-cnn: Towards real-time object detection with region proposal networks</article-title>. <source>Adv. Neural Inf. Process. Syst.</source> <volume>39</volume>, <fpage>1137</fpage>&#x2013;<lpage>1149</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TPAMI.2016.2577031</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Roy</surname> <given-names>A. M.</given-names>
</name>
<name>
<surname>Bhaduri</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>A deep learning enabled multi-class plant disease detection model based on computer vision</article-title>. <source>Ai</source> <volume>2</volume>, <fpage>413</fpage>&#x2013;<lpage>428</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/ai2030026</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sujatha</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Chatterjee</surname> <given-names>J. M.</given-names>
</name>
<name>
<surname>Jhanjhi</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Brohi</surname> <given-names>S. N.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Performance of deep learning vs machine learning in plant leaf disease detection</article-title>. <source>Microprocessors Microsys.</source> <volume>80</volume>, <elocation-id>103615</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.micpro.2020.103615</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sun</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Xing</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Cao</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Fan</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>P.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>A novel detection method for hot spots of photovoltaic (pv) panels using improved anchors and prediction heads of yolov5 network</article-title>. <source>Energy Rep.</source> <volume>8</volume>, <fpage>1219</fpage>&#x2013;<lpage>1229</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.egyr.2022.08.130</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tian</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Qiao</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Computer vision technology in agricultural automation&#x2014;a review</article-title>. <source>Inf. Process. Agric.</source> <volume>7</volume>, <fpage>1</fpage>&#x2013;<lpage>19</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.inpa.2019.09.006</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Mga-yolo: A lightweight one-stage network for apple leaf disease detection</article-title>. <source>Front. Plant Sci.</source> <volume>13</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2022.927424</pub-id>
</citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Alad-yolo: An lightweight and accurate detector for apple leaves</article-title>. <source>Front. Plant Sci.</source> <volume>14</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2023.1204569</pub-id>
</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zeng</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Crop leaf disease recognition based on self-attention convolutional neural network</article-title>. <source>Comput. Electron. Agric.</source> <volume>172</volume>, <fpage>01</fpage>&#x2013;<lpage>13</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2020.105341</pub-id>
</citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Meng</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Bi</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>H.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). <article-title>Mbab-yolo: A modified lightweight architecture for real-time small target detection</article-title>. <source>IEEE Access</source> <volume>11</volume>, <fpage>01</fpage>&#x2013;<lpage>09</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ACCESS.2023.3286031</pub-id>
</citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhu</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Ma</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>C.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Eadd-yolo: An efficient and accurate disease detector for apple leaf using improved lightweight yolov5</article-title>. <source>Front. Plant Sci.</source> <volume>14</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2023.1120724</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>