<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Plant Sci.</journal-id>
<journal-title>Frontiers in Plant Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Plant Sci.</abbrev-journal-title>
<issn pub-type="epub">1664-462X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpls.2024.1373104</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Plant Science</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>BRA-YOLOv7: improvements on large leaf disease object detection using FasterNet and dual-level routing attention in YOLOv7</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Ye</surname>
<given-names>Rong</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2634598"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Gao</surname>
<given-names>Quan</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2726926"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Li</surname>
<given-names>Tong</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>College of Food Science and Technology, Yunnan Agricultural University</institution>, <addr-line>Kunming, Yunnan</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>The Key Laboratory for Crop Production and Smart Agriculture of Yunnan Province, Yunnan Agricultural University</institution>, <addr-line>Kunming, Yunnan</addr-line>, <country>China</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>College of Big Data, Yunnan Agricultural University</institution>, <addr-line>Kunming, Yunnan</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Jeffrey Too Chuan Tan, Genovasi University College, Malaysia</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Huahong Huang, Zhejiang Agriculture and Forestry University, China</p>
<p>Naveen Kumar Mahanti, Dr. Y.S.R. Horticultural University, India</p>
<p>Muhammad Aqib, PMAS-Arid Agriculture University Rawalpindi, Pakistan</p>
<p>Yunchao Tang, Dongguan University of Technology, China</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Quan Gao, <email xlink:href="mailto:gaoq@ynau.edu.cn">gaoq@ynau.edu.cn</email>; Tong Li, <email xlink:href="mailto:tli@ynu.edu.cn">tli@ynu.edu.cn</email>
</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>09</day>
<month>12</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>15</volume>
<elocation-id>1373104</elocation-id>
<history>
<date date-type="received">
<day>19</day>
<month>01</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>29</day>
<month>10</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2024 Ye, Gao and Li</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Ye, Gao and Li</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Tea leaf diseases are significant causes of reduced quality and yield in tea production. In the Yunnan region, where the climate is suitable for tea cultivation, tea leaf diseases are small, scattered, and vary in scale, making their detection challenging due to complex backgrounds and issues such as occlusion, overlap, and lighting variations. Existing object detection models often struggle to achieve high accuracy in detecting tea leaf diseases. To address these challenges, this paper proposes a tea leaf disease detection model, BRA-YOLOv7, which combines a dual-level routing dynamic sparse attention mechanism for fast identification of tea leaf diseases in complex scenarios. BRA-YOLOv7 incorporates PConv and FasterNet as replacements for the original network structure of YOLOv7, reducing the number of floating-point operations and improving efficiency. In the Neck layer, a dual-level routing dynamic sparse attention mechanism is introduced to enable flexible computation allocation and content awareness, enhancing the model&#x2019;s ability to capture global information about tea leaf diseases. Finally, the loss function is replaced with MPDIoU to enhance target localization accuracy and reduce false detection cases. Experiments and analysis were conducted on a collected dataset using the Faster R-CNN, YOLOv6, and YOLOv7 models, with Mean Average Precision (mAP), Floating-point Operations (FLOPs), and Frames Per Second (FPS) as evaluation metrics for accuracy and efficiency. The experimental results show that the improved algorithm achieved a 4.8% improvement in recognition accuracy, a 5.3% improvement in recall rate, a 5% improvement in balance score, and a 2.6% improvement in mAP compared to the traditional YOLOv7 algorithm. Furthermore, in external validation, the floating-point operation count decreased by 1.4G, FPS improved by 5.52%, and mAP increased by 2.4%. In conclusion, the improved YOLOv7 model demonstrates remarkable results in terms of parameter quantity, floating-point operation count, model size, and convergence time. It provides efficient lossless identification while balancing recognition accuracy, real-time performance, and model robustness. This has significant implications for adopting targeted preventive measures against tea leaf diseases in the future.</p>
</abstract>
<kwd-group>
<kwd>tea leaf diseases</kwd>
<kwd>dual-level routing dynamic sparse attention mechanism</kwd>
<kwd>FasterNet</kwd>
<kwd>YOLOv7 algorithm</kwd>
<kwd>lightweight model</kwd>
</kwd-group>
<counts>
<fig-count count="14"/>
<table-count count="3"/>
<equation-count count="18"/>
<ref-count count="38"/>
<page-count count="15"/>
<word-count count="5626"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-in-acceptance</meta-name>
<meta-value>Technical Advances in Plant Science</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>Yunnan is internationally recognized as the birthplace of tea trees, and the tea industry is a characteristic advantage industry in Yunnan. Yunnan&#x2019;s tea plantation area and the comprehensive associated output value of the industry have consistently ranked among the top in the country for many years. Yunnan has recently listed it as the province&#x2019;s top priority among its eight key agricultural industries. The tea industry plays a crucial role in consolidating the achievements of poverty alleviation efforts and promoting the implementation of the rural revitalization strategy, which holds significant political, social, and economic significance (<xref ref-type="bibr" rid="B15">Li et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B26">Sun et&#xa0;al., 2023</xref>). Most of Yunnan&#x2019;s tea gardens are located in mountainous areas, where production conditions are poor and mechanization levels are relatively low. The most serious issue is the insufficient investment in tea leaf scientific research, which leads to a low rate of transformation of research achievements.</p>
<p>Traditional agricultural producers often rely on manual experience to determine tea diseases, which is inefficient and prone to misjudging the disease cycle, resulting in the inability to take targeted protective measures in advance. This greatly reduces the accuracy and scientific nature of tea disease identification (<xref ref-type="bibr" rid="B35">Zhang et&#xa0;al., 2023</xref>). During the growth period, diseases can further intensify their spread, and new diseases are likely to occur, leading to missing the optimal treatment period (<xref ref-type="bibr" rid="B21">Rajathi and Parameswari, 2022</xref>).</p>
<p>In recent years, deep learning and image processing have been widely applied in crop disease diagnosis (<xref ref-type="bibr" rid="B29">Waheed et&#xa0;al., 2020</xref>) and gene identification (<xref ref-type="bibr" rid="B8">Hong et&#xa0;al., 2020</xref>). Applying artificial intelligence methods to crop disease diagnosis can provide a new solution for sustainable crop development and is of great significance for ensuring healthy crop growth. Disease identification generally involves four steps: image preprocessing, image segmentation, disease image feature extraction, and disease identification. Hossain et&#xa0;al. (<xref ref-type="bibr" rid="B9">Hossain et&#xa0;al., 2018</xref>) developed an image processing method that can analyze 11 features of tea diseases and used a support vector machine classifier to identify and classify the two most common tea diseases: tea brown blight and tea leaf spot. Sun et&#xa0;al. (<xref ref-type="bibr" rid="B25">Sun et&#xa0;al., 2018</xref>) improved the method of extracting significant disease maps of tea diseases from complex environments by combining simple linear iterative clustering (SLIC) and support vector machines (SVM). Hu et&#xa0;al. (<xref ref-type="bibr" rid="B10">Hu et&#xa0;al., 2021</xref>) developed a model for analyzing the severity of tea withering disease in natural scene photos. They used an SVM classifier to segment the disease spot location from tea withering disease leaf images to calculate the initial disease severity (IDS) index. Xu et&#xa0;al. (<xref ref-type="bibr" rid="B32">Xu et&#xa0;al., 2020</xref>) used an improved Faster R-CNN algorithm to identify tea bud images, but the model had poor universality and slow segmentation speed. As mentioned earlier, deep neural network technology has been proven to be effective in detecting and identifying tea diseases, but most of them are limited to diagnosing or classifying simple crop disease images. With the complexity of background images in current natural scenes, the upgrading of tea varieties, and the growth changes of multiple diseases, some traditional deep learning models have a large number of parameters and slow operation speed, making it difficult to achieve an effective balance between recognition efficiency and accuracy, which does not match the actual scenario.</p>
<p>With the development of deep learning, target detection algorithms are mainly divided into two categories: one-stage and two-stage detection algorithms. One-stage algorithms, such as the YOLO (<xref ref-type="bibr" rid="B22">Redmon et&#xa0;al., 2016</xref>; <xref ref-type="bibr" rid="B36">Zhang et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B17">Lin et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B18">Lv et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B23">Soeb et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B37">Zhao et&#xa0;al., 2023</xref>) series, extract features only once and are widely used in agriculture due to their evolution in the era of deep learning. Bai et&#xa0;al. (<xref ref-type="bibr" rid="B2">Bai et&#xa0;al., 2024</xref>) designed a lightweight and efficient T-YOLO model for the rapid and accurate detection of tea vegetative buds. This model incorporates the lightweight module C2fG2 and the efficient feature extraction module DBS into the backbone and neck of the YOLOv5 baseline model. Furthermore, the head network of the model is pruned, effectively reducing the number of parameters. Xue et&#xa0;al. (<xref ref-type="bibr" rid="B34">Xue et&#xa0;al., 2023</xref>) integrates self-attention and convolution (ACmix) with the Convolution Block Attention Module (CBAM) based on YOLOv5, enabling the improved YOLO-Tea model to more effectively focus on tea diseases and insect pests. Consequently, the detection results of the enhanced model are significantly superior to those of the original.</p>
<p>Tea gardens often have complex environmental conditions, with soil, pests, or diseases that have similar colors overlapping and causing difficulties in target detection due to the presence of irrelevant features. Therefore, several aspects need to be considered during the recognition process: 1) in natural environmental conditions, tea leaves are often subjected to intense lighting and moderate wind speeds, which can affect the extraction of disease features; 2) the color and texture distribution of disease spots in tea leaf images vary, and multiple disease spots may coexist and overlap, causing uncertainty in the boundary between normal pixels and diseased pixels; 3) the use of multi-scale convolution and attention mechanism modules should effectively adjust the receptive field size to enhance the ability of image feature extraction by parameter tuning.</p>
<p>Due to the real-time image processing capability and superior training efficiency compared to other models in the YOLO series, the YOLOv7 model is considered for target detection in tea leaf disease images. Considering the presence of a large number of invalid background areas and redundant information in the samples, as well as issues such as varying resolutions, leaf deficiency, and non-uniform image quality in the same tea leaf disease image, this paper adopts YOLOv7 as the base model for object detection and conducts research and algorithm optimization specifically for the real scenes of tea leaves to improve the accuracy of tea leaf disease image recognition.</p>
</sec>
<sec id="s2">
<label>2</label>
<title>Data and methods</title>
<sec id="s2_1">
<label>2.1</label>
<title>Image capture</title>
<p>In Yunnan region, large-leaf tea plantation covers more than 80% of the national plantation area. This article focuses on the Hekai Base in Menghai County, Xishuangbanna Prefecture, Yunnan Province (latitude 21.5, longitude 100.28) as the research object. The tea plantation is shown in <xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref>. Due to the suitable temperature and high humidity in Yunnan, the occurrence of large-leaf tea diseases is highly seasonal, with the highest incidence in autumn (<xref ref-type="bibr" rid="B24">Sun et&#xa0;al., 2020</xref>). Therefore, the shooting time for this study was from July 1st to July 15th, 2022. Considering the influence of light intensity on the disease dataset, photos were taken respectively from 9 to 11 am and from 3 to 5 pm. The image capture device used was a Canon EOS 800D, with a photo resolution of 4608&#xd7;3456, saved in.PNG format.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Tea plantation.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1373104-g001.tif"/>
</fig>
<p>To meet the requirements of diverse pest detection in complex environments and to ensure the authenticity of the growth environment, the captured images have the following conditions: slight occlusion, severe occlusion, overlap, natural light angles, side light angles, back light angles, etc. Examples of tea disease samples are shown in <xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref>.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Example of tea disease samples.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1373104-g002.tif"/>
</fig>
<sec id="s2_1_1">
<label>2.1.1</label>
<title>Image preprocessing and dataset partitioning</title>
<p>A total of 3,246 tea disease images were collected, which included different diseases, lighting conditions, degrees of occlusion, and overlapping diseases. After screening, 2,789 qualified images were selected. Among them, 10% of the images were randomly chosen as the validation set to evaluate the generalization of the detection model, while the remaining 2,510 images were randomly divided into a training set (2,259 images) and a test set (251 images) in a 9:1 ratio. Care was taken to ensure that there were no duplicate images among the training, validation, and test sets to prevent overfitting of the model (<xref ref-type="bibr" rid="B6">Halstead et&#xa0;al., 2018</xref>). The distribution of the sample dataset is shown in <xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>. The annotation software, LabelImg, was used for manual annotation of tea disease targets in the training set. The annotations were made based on the minimum bounding rectangle around the disease to minimize the inclusion of background areas. The annotated files were saved in XML format (<xref ref-type="bibr" rid="B12">Jintasuttisak et&#xa0;al., 2022</xref>). The visualization analysis of the annotated tea disease files is shown in <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3</bold>
</xref>. From <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3</bold>
</xref>, it can be observed that the sizes of the bounding boxes are uneven, but the ratios are mostly distributed between 0.04 and 0.4. Small-sized disease targets are more abundant and are not easy to detect.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Distribution of the sample dataset.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="center">Types of Tea Plant Diseases</th>
<th valign="top" align="center">Total Number of Datasets</th>
<th valign="top" align="center">Train Set</th>
<th valign="top" align="center">Test Set</th>
<th valign="top" align="center">Validation Set</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">
<bold>Tea Cloud Spot Blight</bold>
</td>
<td valign="top" align="center">932</td>
<td valign="top" align="center">766</td>
<td valign="top" align="center">79</td>
<td valign="top" align="center">87</td>
</tr>
<tr>
<td valign="top" align="left">
<bold>Tea Red Spot Disease</bold>
</td>
<td valign="top" align="center">746</td>
<td valign="top" align="center">605</td>
<td valign="top" align="center">65</td>
<td valign="top" align="center">76</td>
</tr>
<tr>
<td valign="top" align="left">
<bold>Tea White Star Disease</bold>
</td>
<td valign="top" align="center">594</td>
<td valign="top" align="center">477</td>
<td valign="top" align="center">56</td>
<td valign="top" align="center">61</td>
</tr>
<tr>
<td valign="top" align="left">
<bold>Tea Leaf Spot Disease</bold>
</td>
<td valign="top" align="center">517</td>
<td valign="top" align="center">411</td>
<td valign="top" align="center">51</td>
<td valign="top" align="center">55</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Visualization analysis of annotated tea disease files. <bold>(A)</bold> Category Number <bold>(B)</bold> Length and Width of Label Frame <bold>(C)</bold> Distribution of Central Points <bold>(D)</bold> Width and Height Distribution.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1373104-g003.tif"/>
</fig>
<p>In order to enhance the model&#x2019;s generalization ability, data augmentation was performed on the images of Yunnan large-leaf sun-dried green tea diseases. Specifically, 1) image brightness adjustment was applied by increasing and decreasing the brightness by 1.4 times and 0.6 times respectively. Through these brightness transformations, the model becomes more suitable for complex tea plantation environments with changing lighting conditions; 2) image contrast adjustment was applied by increasing and decreasing the contrast by 1.4 times and 0.6 times respectively. This helps to improve the clarity, grayscale, and texture details of the tea leaf images; 3) Gaussian blur and random rotation were applied. Gaussian blur enhances the details in disease images and increases image smoothness, while random rotation enhances the adaptability of the detection model. After applying brightness and contrast enhancement, Gaussian blur, and random rotation to the selected disease images in the dataset, the total number of images reached 15534. <xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4</bold>
</xref> illustrates the results of data augmentation.</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Image enhancement processing.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1373104-g004.tif"/>
</fig>
</sec>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>The improved YOLOv7 network model</title>
<p>In single-stage object detection algorithms, YOLOv7 performs well and is the most optimized model in terms of inference speed and recognition performance among the YOLO series. Due to its shallow network depth and smaller feature map width, it achieves fast inference speed and is widely used in real-time detection of diseases in practical scenarios. YOLOv7 consists of four components: Input, Backbone, Neck, and Head.</p>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>Optimize loss function</title>
<p>When solving object detection problems using CNNs, regardless of whether it&#x2019;s a regression or classification problem, a loss function is essential and also a major factor affecting the accuracy of the results. In this paper, the Mean Position-Density IoU (MPDIoU) loss function (<xref ref-type="bibr" rid="B33">Xu and Jeongyoung, 2021</xref>; <xref ref-type="bibr" rid="B19">Ma and Xu, 2023</xref>; <xref ref-type="bibr" rid="B20">Ma et&#xa0;al., 2023</xref>) is used to replace the original YOLOv7 network model&#x2019;s object regression (CIoU) loss function. MPDIoU includes regression of both overlapping and non-overlapping bounding boxes, center point distance loss, and deviations in width and height. During the training process, it accurately optimizes the bounding box regression process when the predicted box and annotated box have the same center point overlap and proportional height and width deviations. This is illustrated in <xref ref-type="fig" rid="f5">
<bold>Figures&#xa0;5</bold>
</xref>, <xref ref-type="fig" rid="f6">
<bold>6</bold>
</xref>
</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>BRA-YOLOv7 network architecture.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1373104-g005.tif"/>
</fig>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>Illustration of factors in MPDIoU calculation.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1373104-g006.tif"/>
</fig>
<p>In the training phase, the objective of this model optimization is to make each predicted box</p>
<p>
<inline-formula>
<mml:math display="inline" id="im1">
<mml:mrow>
<mml:msub>
<mml:mi>B</mml:mi>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mi>T</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> as close as possible to the annotated box <inline-formula>
<mml:math display="inline" id="im2">
<mml:mrow>
<mml:msub>
<mml:mi>B</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mi>T</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, minimizing the loss function L as shown below:</p>
<disp-formula id="eq1">
<label>(1)</label>
<mml:math display="block" id="M1">
<mml:mrow>
<mml:mi>&#x2112;</mml:mi>
<mml:mo>=</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mmultiscripts>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msub>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>B</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>t</mml:mi>
<mml:mo>&#x2208;</mml:mo>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi mathvariant="double-struck">B</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mi>&#x2112;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>B</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>B</mml:mi>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>|</mml:mo>
<mml:mi>&#x398;</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mstyle>
</mml:mrow>
<mml:mprescripts/>
<mml:mi>&#x398;</mml:mi>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mmultiscripts>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Where <inline-formula>
<mml:math display="inline" id="im3">
<mml:mrow>
<mml:msub>
<mml:mi>B</mml:mi>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the set of annotated boxes, <inline-formula>
<mml:math display="inline" id="im4">
<mml:mi>&#x398;</mml:mi>
</mml:math>
</inline-formula> is the parameter of the regression deep model. Based on this, the penalty term of the bounding box regression (MPDIoU) loss function is formulated as follows:</p>
<disp-formula id="eq2">
<label>(2)</label>
<mml:math display="block" id="M2">
<mml:mrow>
<mml:msub>
<mml:mi>&#x2112;</mml:mi>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>D</mml:mi>
<mml:mi>I</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>U</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>M</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>D</mml:mi>
<mml:mi>I</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>U</mml:mi>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq3">
<label>(3)</label>
<mml:math display="block" id="M3">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>D</mml:mi>
<mml:mi>I</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>U</mml:mi>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mo>&#x2229;</mml:mo>
<mml:mi>B</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mo>&#x222a;</mml:mo>
<mml:mi>B</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mi>d</mml:mi>
<mml:mn>1</mml:mn>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mi>w</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>+</mml:mo>
<mml:msup>
<mml:mi>h</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mi>d</mml:mi>
<mml:mn>2</mml:mn>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mi>w</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>+</mml:mo>
<mml:msup>
<mml:mi>h</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq4">
<label>(4)</label>
<mml:math display="block" id="M4">
<mml:mrow>
<mml:msubsup>
<mml:mi>d</mml:mi>
<mml:mn>1</mml:mn>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mi>x</mml:mi>
<mml:mn>1</mml:mn>
<mml:mi>B</mml:mi>
</mml:msubsup>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mi>x</mml:mi>
<mml:mn>1</mml:mn>
<mml:mi>A</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>+</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mi>y</mml:mi>
<mml:mn>1</mml:mn>
<mml:mi>B</mml:mi>
</mml:msubsup>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mi>y</mml:mi>
<mml:mn>1</mml:mn>
<mml:mi>A</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq5">
<label>(5)</label>
<mml:math display="block" id="M5">
<mml:mrow>
<mml:msubsup>
<mml:mi>d</mml:mi>
<mml:mn>2</mml:mn>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mi>x</mml:mi>
<mml:mn>2</mml:mn>
<mml:mi>B</mml:mi>
</mml:msubsup>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mi>x</mml:mi>
<mml:mn>2</mml:mn>
<mml:mi>A</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>+</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mi>y</mml:mi>
<mml:mn>2</mml:mn>
<mml:mi>B</mml:mi>
</mml:msubsup>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mi>y</mml:mi>
<mml:mn>2</mml:mn>
<mml:mi>A</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</disp-formula>
<p>In <xref ref-type="disp-formula" rid="eq2">Equations 2</xref>&#x2013;<xref ref-type="disp-formula" rid="eq5">5</xref>, <inline-formula>
<mml:math display="inline" id="im5">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>D</mml:mi>
<mml:mi>I</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>U</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> represents the regression boundary, <inline-formula>
<mml:math display="inline" id="im6">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mo>&#xa0;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im7">
<mml:mi>B</mml:mi>
</mml:math>
</inline-formula> represent the predicted box and the ground truth box, <inline-formula>
<mml:math display="inline" id="im8">
<mml:mrow>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mi>x</mml:mi>
<mml:mn>1</mml:mn>
<mml:mi>A</mml:mi>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi>y</mml:mi>
<mml:mn>1</mml:mn>
<mml:mi>A</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im9">
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mi>x</mml:mi>
<mml:mn>2</mml:mn>
<mml:mi>A</mml:mi>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi>y</mml:mi>
<mml:mn>2</mml:mn>
<mml:mi>A</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> represent the coordinates of the top-left and bottom-right corners of box <inline-formula>
<mml:math display="inline" id="im10">
<mml:mi>A</mml:mi>
</mml:math>
</inline-formula>, <inline-formula>
<mml:math display="inline" id="im11">
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mi>x</mml:mi>
<mml:mn>1</mml:mn>
<mml:mi>B</mml:mi>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi>y</mml:mi>
<mml:mn>1</mml:mn>
<mml:mi>B</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im12">
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mi>x</mml:mi>
<mml:mn>2</mml:mn>
<mml:mi>B</mml:mi>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi>y</mml:mi>
<mml:mn>2</mml:mn>
<mml:mi>B</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> represent the coordinates of the top-left and bottom-right corners of box <inline-formula>
<mml:math display="inline" id="im13">
<mml:mi>B</mml:mi>
</mml:math>
</inline-formula>.</p>
</sec>
<sec id="s2_4">
<label>2.4</label>
<title>PConv</title>
<p>In addition to model accuracy, the calculation power (FLOPs) and parameter size required during forward propagation are also important factors in accelerating the inference speed of neural networks. By reducing the demands on GPU performance and memory usage, we can design a faster YOLOv7 neural network. In this study, we introduced PConv and FasterNet to replace the original network structure of YOLOv7.In the main network, we introduced a new type of convolution called PConv (Partial Convolution) (<xref ref-type="bibr" rid="B3">Chen et&#xa0;al., 2023</xref>), which reduces redundant calculations and memory accesses. The structure of PConv is shown in <xref ref-type="fig" rid="f7">
<bold>Figure&#xa0;7</bold>
</xref>. Compared to conventional convolutions <xref ref-type="fig" rid="f7">
<bold>Figure&#xa0;7A</bold>
</xref> and depth-wise convolutions <xref ref-type="fig" rid="f7">
<bold>Figure&#xa0;7B</bold>
</xref>, PConv only applies filters to a few input channels, while leaving the rest of the channels unchanged. By exploiting the redundancy in feature maps, we systematically apply regular convolutions (Conv) to a subset of input channels while keeping the remaining channels intact.</p>
<fig id="f7" position="float">
<label>Figure&#xa0;7</label>
<caption>
<p>Structures of different convolutional networks. <bold>(A)</bold> Standard Convolution <bold>(B)</bold> Depthwise Convolution <bold>(C)</bold> PConv.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1373104-g007.tif"/>
</fig>
<p>PConv can be considered to have the same number of channels in the input and output feature maps without loss of generality. The floating point operations of PConv are shown in <xref ref-type="disp-formula" rid="eq6">Equation 6</xref>, and the memory access is relatively low, as shown in <xref ref-type="disp-formula" rid="eq7">Equation 7</xref>. Therefore, for a conventional ratio of <inline-formula>
<mml:math display="inline" id="im14">
<mml:mrow>
<mml:mtext>r</mml:mtext>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mtext>C</mml:mtext>
<mml:mtext>p</mml:mtext>
</mml:msub>
<mml:mo stretchy="false">/</mml:mo>
<mml:mtext>C</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo stretchy="false">/</mml:mo>
<mml:mn>4</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, PConv has only 1/16 and 1/4 of the floating point operations and memory access compared to conventional convolution</p>
<disp-formula id="eq6">
<label>(6)</label>
<mml:math display="block" id="M6">
<mml:mrow>
<mml:mtext>h</mml:mtext>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>w</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:msup>
<mml:mi>k</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>&#xd7;</mml:mo>
<mml:msubsup>
<mml:mi>c</mml:mi>
<mml:mi>p</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq7">
<label>(7)</label>
<mml:math display="block" id="M7">
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>w</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>2</mml:mn>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msup>
<mml:mi>k</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>&#xd7;</mml:mo>
<mml:msubsup>
<mml:mi>c</mml:mi>
<mml:mi>p</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:mo>&#x2248;</mml:mo>
<mml:mi>h</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>w</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>2</mml:mn>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</disp-formula>
<p>PConv has lower FLOPs and higher FLOPS compared to conventional convolutions and depthwise convolutions. FLOPS stands for Floating Point Operations per Second and serves as a measure of effective computing speed. PConv better utilizes the computational power of devices and is also effective in spatial feature extraction.</p>
<p>The ELAN module in the backbone network can effectively improve the learning ability of the network without disrupting the original gradient path. However, the ELAN module heavily relies on CBS convolutional layers, which have a large number of parameters. Additionally, during feature extraction, the ELAN module can lead to isolated feature channels, which affects the model&#x2019;s detection efficiency. To enhance the feature extraction capability of the ELAN module, this paper replaces the CBS convolutional layers with PConv, which has fewer parameters. The resulting ELAN-P structure is shown in <xref ref-type="fig" rid="f8">
<bold>Figure&#xa0;8</bold>
</xref>.</p>
<fig id="f8" position="float">
<label>Figure&#xa0;8</label>
<caption>
<p>ELAN-P network structure diagram.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1373104-g008.tif"/>
</fig>
</sec>
<sec id="s2_5">
<label>2.5</label>
<title>Fusion of PConv with FasterNet module</title>
<p>FasterNet is a new family of neural networks that run faster and achieve higher accuracy on multiple processing platforms, surpassing other neural networks. FasterNet is mainly composed of four levels and its structure is shown in <xref ref-type="fig" rid="f9">
<bold>Figure&#xa0;9</bold>
</xref>. Each FasterNet Block consists of a PConv layer and two PWConv layers, presenting an inverted residual block. Stage 3 and Stage 4 layers have an expanded number of channels and higher floating-point operation efficiency per second. FasterNet performs well and is generally fast on various devices, including GPUs, CPUs, and ARM processors.</p>
<fig id="f9" position="float">
<label>Figure&#xa0;9</label>
<caption>
<p>FasterNet architecture diagram.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1373104-g009.tif"/>
</fig>
</sec>
<sec id="s2_6" sec-type="intro">
<label>2.6</label>
<title>Introduction of dual-level routing in the dynamic sparse attention mechanism</title>
<p>In the visual Transformer, attention mechanism is a crucial part. Considering the scalability issues in terms of model computation and memory requirements, we noticed that the multi-head self-attention mechanism can enable the model to better capture discriminative features from different perspectives, thereby improving the model&#x2019;s performance (<xref ref-type="bibr" rid="B5">Gao et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B16">Li et&#xa0;al., 2023</xref>). Taking reference from YOLOv7 in Tea Tree Disease Detection training, the model performs poorly when there are occluded disease parts. Therefore, we introduce a double-layer routing-based dynamic sparse attention mechanism to achieve more flexible computation allocation and content perception.</p>
<p>Double-layer routed attention (<xref ref-type="bibr" rid="B13">Kwan-Wu et&#xa0;al., 2016</xref>; <xref ref-type="bibr" rid="B11">Jiang et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B38">Zhu et&#xa0;al., 2023</xref>) is a dynamic and query-aware sparse attention mechanism. The main idea is to filter out most irrelevant key-value pairs at a coarse-grained level and calculate coarse-grained routing features through average pooling. After computing and reading the relevance, scattered key-value pairs are collected to calculate fine-grained attention from token to token, leaving only a small number of fine-grained routing regions, as shown in <xref ref-type="fig" rid="f10">
<bold>Figure&#xa0;10</bold>
</xref>.</p>
<fig id="f10" position="float">
<label>Figure&#xa0;10</label>
<caption>
<p>Bi-level routing attention mechanism.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1373104-g010.tif"/>
</fig>
<p>First, the disease image is segmented into <italic>S&#xd7;S</italic> non-overlapping regions, where each region contains a feature vector of size <inline-formula>
<mml:math display="inline" id="im15">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mi>H</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>W</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mi>S</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</inline-formula>. Here, <inline-formula>
<mml:math display="inline" id="im16">
<mml:mi>H</mml:mi>
</mml:math>
</inline-formula> represents the height of the original image, and <inline-formula>
<mml:math display="inline" id="im17">
<mml:mi>W</mml:mi>
</mml:math>
</inline-formula> represents the width of the original image. The feature vectors are then linearly mapped to obtain <inline-formula>
<mml:math display="inline" id="im18">
<mml:mrow>
<mml:mi>Q</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>K</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>V</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, as shown in <xref ref-type="disp-formula" rid="eq8">Equation 8</xref>. In this equation, <inline-formula>
<mml:math display="inline" id="im19">
<mml:mrow>
<mml:msup>
<mml:mi>X</mml:mi>
<mml:mi>r</mml:mi>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>&#x211d;</mml:mi>
<mml:mrow>
<mml:msup>
<mml:mi>S</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>&#xd7;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>H</mml:mi>
<mml:mi>W</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mi>S</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> epresents the sub-region of the feature map, <inline-formula>
<mml:math display="inline" id="im20">
<mml:mrow>
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mi>q</mml:mi>
</mml:msup>
<mml:mo>&#x3001;</mml:mo>
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mi>k</mml:mi>
</mml:msup>
<mml:mo>&#x3001;</mml:mo>
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mi>v</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> represent the projection weights for query, key, and value respectively. By calculating the mean values of each region, <inline-formula>
<mml:math display="inline" id="im21">
<mml:mrow>
<mml:msup>
<mml:mi>Q</mml:mi>
<mml:mi>r</mml:mi>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mi>K</mml:mi>
<mml:mi>r</mml:mi>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>&#x211d;</mml:mi>
<mml:mrow>
<mml:msup>
<mml:mi>S</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> re obtained. The adjacency matrix of the correlation between <inline-formula>
<mml:math display="inline" id="im22">
<mml:mrow>
<mml:msup>
<mml:mi>Q</mml:mi>
<mml:mi>r</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im23">
<mml:mrow>
<mml:msup>
<mml:mi>K</mml:mi>
<mml:mi>r</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is computed, as shown in <xref ref-type="disp-formula" rid="eq9">Equation 9</xref>. By multiplying the transposed matrices of <inline-formula>
<mml:math display="inline" id="im24">
<mml:mrow>
<mml:msup>
<mml:mi>Q</mml:mi>
<mml:mi>r</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im25">
<mml:mrow>
<mml:msup>
<mml:mi>K</mml:mi>
<mml:mi>r</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula>
<mml:math display="inline" id="im26">
<mml:mrow>
<mml:msup>
<mml:mi>A</mml:mi>
<mml:mi>r</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is obtained, which represents the level of correlation between two regions. we obtain <inline-formula>
<mml:math display="inline" id="im27">
<mml:mrow>
<mml:msup>
<mml:mi>A</mml:mi>
<mml:mi>r</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> as shown in <xref ref-type="disp-formula" rid="eq10">Equation 10</xref>. <inline-formula>
<mml:math display="inline" id="im28">
<mml:mrow>
<mml:msup>
<mml:mi>Q</mml:mi>
<mml:mi>r</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> represents the region-level query, <inline-formula>
<mml:math display="inline" id="im29">
<mml:mrow>
<mml:msup>
<mml:mi>K</mml:mi>
<mml:mi>r</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> represents the region-level key, and <inline-formula>
<mml:math display="inline" id="im30">
<mml:mi>T</mml:mi>
</mml:math>
</inline-formula> represents the transpose operation. For coarse-grained region-level routing computation, a routing index matrix <inline-formula>
<mml:math display="inline" id="im31">
<mml:mrow>
<mml:msup>
<mml:mi>I</mml:mi>
<mml:mi>r</mml:mi>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>&#x2115;</mml:mi>
<mml:mrow>
<mml:msup>
<mml:mi>S</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is used. This matrix stores the indices of the top k connections for each region, while eliminating the weaker correlations. To efficiently process the collected key <inline-formula>
<mml:math display="inline" id="im32">
<mml:mi>K</mml:mi>
</mml:math>
</inline-formula> and value <inline-formula>
<mml:math display="inline" id="im33">
<mml:mi>V</mml:mi>
</mml:math>
</inline-formula> tensors, a public key normalization operation is applied, as shown in <xref ref-type="disp-formula" rid="eq11">Equations 11</xref>, <xref ref-type="disp-formula" rid="eq12">12</xref>. Here, <inline-formula>
<mml:math display="inline" id="im34">
<mml:mrow>
<mml:msup>
<mml:mi>K</mml:mi>
<mml:mi>g</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> represents the aggregated tensor for keys, <inline-formula>
<mml:math display="inline" id="im35">
<mml:mi>K</mml:mi>
</mml:math>
</inline-formula> represents the original keys, <inline-formula>
<mml:math display="inline" id="im36">
<mml:mrow>
<mml:msup>
<mml:mi>I</mml:mi>
<mml:mi>r</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> represents the routing index matrix, <inline-formula>
<mml:math display="inline" id="im37">
<mml:mrow>
<mml:msup>
<mml:mi>V</mml:mi>
<mml:mi>g</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> represents the aggregated tensor for values, and <inline-formula>
<mml:math display="inline" id="im38">
<mml:mi>V</mml:mi>
</mml:math>
</inline-formula> represents the original values. Finally, the attention mechanism is applied to <inline-formula>
<mml:math display="inline" id="im39">
<mml:mrow>
<mml:msup>
<mml:mi>K</mml:mi>
<mml:mi>g</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im40">
<mml:mrow>
<mml:msup>
<mml:mi>V</mml:mi>
<mml:mi>g</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> to obtain the feature map <inline-formula>
<mml:math display="inline" id="im41">
<mml:mi>O</mml:mi>
</mml:math>
</inline-formula>, as shown in <xref ref-type="disp-formula" rid="eq13">Equation 13</xref>. <inline-formula>
<mml:math display="inline" id="im42">
<mml:mi>O</mml:mi>
</mml:math>
</inline-formula> represents the fine-grained attention from token to token, and <inline-formula>
<mml:math display="inline" id="im43">
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>V</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> represents the local context enhancement term.</p>
<disp-formula id="eq8">
<label>(8)</label>
<mml:math display="block" id="M8">
<mml:mrow>
<mml:mi>Q</mml:mi>
<mml:mo>=</mml:mo>
<mml:msup>
<mml:mi>X</mml:mi>
<mml:mi>r</mml:mi>
</mml:msup>
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mi>q</mml:mi>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:mtext>&#x2009;&#x2009;&#x2009;&#x2009;</mml:mtext>
<mml:mi>K</mml:mi>
<mml:mo>=</mml:mo>
<mml:msup>
<mml:mi>X</mml:mi>
<mml:mi>r</mml:mi>
</mml:msup>
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mi>k</mml:mi>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:mtext>&#x2009;&#x2009;&#x2009;&#x2009;&#x2009;</mml:mtext>
<mml:mi>V</mml:mi>
<mml:mo>=</mml:mo>
<mml:msup>
<mml:mi>X</mml:mi>
<mml:mi>r</mml:mi>
</mml:msup>
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mi>v</mml:mi>
</mml:msup>
<mml:mtext>&#xa0;</mml:mtext>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq9">
<label>(9)</label>
<mml:math display="block" id="M9">
<mml:mrow>
<mml:msup>
<mml:mi>A</mml:mi>
<mml:mi>r</mml:mi>
</mml:msup>
<mml:mo>=</mml:mo>
<mml:msup>
<mml:mi>Q</mml:mi>
<mml:mi>r</mml:mi>
</mml:msup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mi>K</mml:mi>
<mml:mi>r</mml:mi>
</mml:msup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:msup>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mi>T</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq10">
<label>(10)</label>
<mml:math display="block" id="M10">
<mml:mrow>
<mml:msup>
<mml:mi>I</mml:mi>
<mml:mi>r</mml:mi>
</mml:msup>
<mml:mo>=</mml:mo>
<mml:mi>t</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>k</mml:mi>
<mml:mi>I</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msup>
<mml:mi>A</mml:mi>
<mml:mi>r</mml:mi>
</mml:msup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mtext>&#xa0;</mml:mtext>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq11">
<label>(11)</label>
<mml:math display="block" id="M11">
<mml:mrow>
<mml:msup>
<mml:mi>K</mml:mi>
<mml:mi>g</mml:mi>
</mml:msup>
<mml:mo>=</mml:mo>
<mml:mi>g</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>h</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mi>I</mml:mi>
<mml:mi>r</mml:mi>
</mml:msup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq12">
<label>(12)</label>
<mml:math display="block" id="M12">
<mml:mrow>
<mml:msup>
<mml:mi>V</mml:mi>
<mml:mi>g</mml:mi>
</mml:msup>
<mml:mo>=</mml:mo>
<mml:mi>g</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>h</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>V</mml:mi>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mi>I</mml:mi>
<mml:mi>r</mml:mi>
</mml:msup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq13">
<label>(13)</label>
<mml:math display="block" id="M13">
<mml:mrow>
<mml:mi>O</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi>A</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>Q</mml:mi>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mi>K</mml:mi>
<mml:mi>g</mml:mi>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mi>V</mml:mi>
<mml:mi>g</mml:mi>
</mml:msup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>+</mml:mo>
<mml:mi>L</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>V</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
</sec>
</sec>
<sec id="s3">
<label>3</label>
<title>Experiments and discussions</title>
<p>To verify the detection effectiveness of BRA-YOLOv7 on the detection of tea leaf diseases, including tea leaf blight, tea red spot disease, tea white spot disease, and tea gray blight, this study conducted three comparative experiments with BRA-YOLOv7 and three popular network models: YOLOv7, Faster-RCNN, and SSD. The experiments were performed on Ubuntu 18.04.5 LTS operating system with an Intel<sup>&#xae;</sup> Xeon<sup>&#xae;</sup> Gold 5220RCPU@2.20GHz CPU and an NVIDIA Quadro RXT 5000 GPU with 32GB memory. The deep learning framework used was Pytorch 1.12.1 with CUDA 11.2. To ensure the scientific rigor of the model testing results, the hardware devices and software environment used in this study were identical.</p>
<sec id="s3_1">
<label>3.1</label>
<title>Training process and analysis</title>
<p>The loss function (<xref ref-type="bibr" rid="B31">Wen et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B1">Ali et&#xa0;al., 2023</xref>) is an important indicator that measures the difference between the predicted results and the actual results of a model. A smaller value of the loss function indicates a better performance of the model, as it means the predicted results are closer to the actual results. As shown in <xref ref-type="fig" rid="f11">
<bold>Figure&#xa0;11</bold>
</xref>, during the initial stage of training, BRA-YOLOv7 exhibits a fast descent in the loss function. However, after 50 epochs, the descent speed slows down and the oscillation of the curve becomes more pronounced. As the training continues, the curve gradually flattens, indicating the convergence of the loss function. Eventually, the total loss on the training set stabilizes below 2%, while the total loss on the validation set stabilizes below 8%. By comparing the change in the loss function curves between the original YOLOv7 and the improved YOLOv7, it is evident that the improved YOLOv7 shows significant reductions in the loss of predicted box position, predicted box confidence, and classification. The most significant reduction is observed in the predicted box position loss, which decreases by more than 20% in both the training and testing sets.</p>
<fig id="f11" position="float">
<label>Figure&#xa0;11</label>
<caption>
<p>Comparison of loss function change curves. <bold>(A)</bold> BRA-YOLOv7 (Training set); <bold>(B)</bold> BRA-YOLOv7 (Validation set); <bold>(C)</bold> YOLOv7 (Training set); <bold>(D)</bold> YOLOv7 (Validation set); Red: Val Box; Blue: Val Objectness; Purple: Val Classification; Green: Total loss value.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1373104-g011.tif"/>
</fig>
<p>The main model performance evaluation metrics used in this article include precision, recall, F1 score, average precision (AP), and mean average precision (mAP), as shown in <xref ref-type="disp-formula" rid="eq14">Equations 14</xref>&#x2013;<xref ref-type="disp-formula" rid="eq18">18</xref> (<xref ref-type="bibr" rid="B14">Lee et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B7">Han et&#xa0;al., 2024</xref>).</p>
<disp-formula id="eq14">
<label>(14)</label>
<mml:math display="block" id="M14">
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq15">
<label>(15)</label>
<mml:math display="block" id="M15">
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq16">
<label>(16)</label>
<mml:math display="block" id="M16">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mn>1</mml:mn>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>=</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>R</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>R</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq17">
<label>(17)</label>
<mml:math display="block" id="M17">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>=</mml:mo>
<mml:mstyle displaystyle="true">
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x222b;</mml:mo>
<mml:mn>0</mml:mn>
<mml:mn>1</mml:mn>
</mml:msubsup>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>R</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mrow>
</mml:mstyle>
<mml:mo>&#xa0;</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq18">
<label>(18)</label>
<mml:math display="block" id="M18">
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>A</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>C</mml:mi>
</mml:msubsup>
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mstyle>
</mml:mrow>
<mml:mi>C</mml:mi>
</mml:mfrac>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Where <inline-formula>
<mml:math display="inline" id="im44">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> represents the number of test images in the tea disease category that are correctly identified by the model as belonging to that category, <inline-formula>
<mml:math display="inline" id="im45">
<mml:mrow>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> represents the number of test images in other categories of tea diseases that are incorrectly identified by the model as belonging to the current category, <inline-formula>
<mml:math display="inline" id="im46">
<mml:mrow>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> represents the number of test images in the current category of tea diseases that are incorrectly identified by the model as belonging to other categories, and <inline-formula>
<mml:math display="inline" id="im47">
<mml:mi>C</mml:mi>
</mml:math>
</inline-formula> represents the number of categories of tea diseases in the test set.</p>
<p>From the perspective of prediction results, precision is a metric used for statistics. It reflects the proportion of samples that are predicted as a certain class and actually belong to that class, which is also known as the &#x2018;classification accuracy&#x2019;. Recall, on the other hand, measures the ability of the model to retrieve samples correctly among all the samples in that class. The balanced score is a comprehensive measure based on precision and recall, using their harmonic mean. As shown in <xref ref-type="fig" rid="f12">
<bold>Figure&#xa0;12</bold>
</xref>, BRA-YOLOv7 has achieved significant improvements in detection performance. Compared to the YOLOv7 model, Precision, Recall, and F1 have improved by 6.37%, 6.14%, and 6.25% respectively.</p>
<fig id="f12" position="float">
<label>Figure&#xa0;12</label>
<caption>
<p>Curves depicting changes in accuracy, recall rate, and balanced score. <bold>(A)</bold> YOLOv7 precision; <bold>(B)</bold> YOLOv7 recall; <bold>(C)</bold> YOLOv7 F1 score; <bold>(D)</bold> BRA-YOLOv7 precision; <bold>(E)</bold> BRA-YOLOv7 recall; <bold>(F)</bold> BRA-YOLOv7 F1 score. Different colored thin lines represent the values for Tea Cloud Spot Blight, Tea Red Spot Disease, Tea White Star Disease, and Tea Leaf Spot Disease, respectively. The thick blue line indicates the average value of these four diseases.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1373104-g012.tif"/>
</fig>
<p>AP (Average Precision) represents the average accuracy of a specific class at different IOU thresholds. mAP (mean Average Precision) refers to the mean value of AP for various classes. As shown in <xref ref-type="fig" rid="f13">
<bold>Figure&#xa0;13</bold>
</xref>, the BRA-YOLOv7 model demonstrates improvements in tea disease recognition compared to YOLOv7, Faster-RCNN, and SSD. For Single Target Unobstructed recognition, the AP gains are 4.76%, 14.71%, 5.98% respectively. For Single Target Occlusion recognition, the AP gains are 4.72%, 14.4%, 5.63% respectively. For Multiple Target Unobstructed recognition, the AP gains are 5.69%, 15.7%, 7.93% respectively. For a, the AP gains are 5.26%, 15.27%, 8.04% respectively. The overall mAP improvements are 4.71%, 14.69%, 6.95% respectively.</p>
<fig id="f13" position="float">
<label>Figure&#xa0;13</label>
<caption>
<p>Comparison of AP and mAP for different models.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1373104-g013.tif"/>
</fig>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Model validation experiment</title>
<p>In order to further verify the advantages of the improved model in this study, different lighting intensities were used to detect and identify Tea blight disease, Tea red star disease, Tea white star disease, and Tea wheel spot disease under the conditions of single-target and multi-target with and without occlusion. To ensure the reliability of the results, BRA-YOLOv7, YOLOv7, YOLOv8 (<xref ref-type="bibr" rid="B27">Tian et&#xa0;al., 2022</xref>), Faster-RCNN (<xref ref-type="bibr" rid="B4">Cheng and Li, 2023</xref>), and SSD (<xref ref-type="bibr" rid="B30">Wang et&#xa0;al., 2023</xref>) networks were trained and tested using the same external validation set, while the platform configurations for training were also kept consistent. The final comparison results are shown in <xref ref-type="fig" rid="f14">
<bold>Figure&#xa0;14</bold>
</xref>. A represents Tea blight disease, B represents Tea red star disease, C represents Tea white star disease, and D represents Tea wheel spot disease.</p>
<fig id="f14" position="float">
<label>Figure&#xa0;14</label>
<caption>
<p>Comparison of detection results for different models.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1373104-g014.tif"/>
</fig>
<p>In the test, the four models can successfully detect single-object occlusion and multi-object occlusion in both strong and decreasing light conditions. It is observed that the confidence level decreases as the light intensity decreases, indicating that light intensity has an impact on the model&#x2019;s detection. Among the models, BRA-YOLOv7 and YOLOv7 exhibit the highest confidence in the detection results. The BRA-YOLOv7 model can address the issue of disease localization deviation and avoid repeated detection, showing an average confidence improvement of over 3% compared to the original YOLOv7 model. In the case of multi-object occlusion, the Faster-RCNN model has the lowest confidence in the detection results, leading to missed detections and incorrect recognition. Although SSD can correctly recognize tea diseases, its model accuracy is relatively low. Overall, BRA-YOLOv7 performs better than the other three models in detecting small target diseases.</p>
<p>
<xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref> presents a comparison of external parameters for five models in this experiment, including mAP value, floating-point operation count (FLOPs), and frames per second (FPS) during external validation. After incorporating FasterNet, dynamic sparse attention mechanism, and MPDIoU loss function, this study reduced the floating-point operation count by 15.5G compared to the original model, increased the FPS by 5.51% compared to YOLOv7, and improved the mAP value by 4.2% compared to YOLOv7. Overall, BRA-YOLOv7 outperforms the original YOLOv7, YOLOv8, Faster-RCNN, and SSD in terms of detection accuracy and speed. It provides support for the intelligent recognition of edge devices and tea plantation drones in future deployments.</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>External validation parameters for comparing models.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="center">Model</th>
<th valign="top" align="center">mAP/%</th>
<th valign="top" align="center">FLOPs/G</th>
<th valign="top" align="center">FPS/Hz</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="center">
<bold>BRA-YOLOv7</bold>
</td>
<td valign="top" align="center">94.9</td>
<td valign="top" align="center">89.7</td>
<td valign="top" align="center">46.08</td>
</tr>
<tr>
<td valign="top" align="center">
<bold>YOLOv7</bold>
</td>
<td valign="top" align="center">90.7</td>
<td valign="top" align="center">105.2</td>
<td valign="top" align="center">43.67</td>
</tr>
<tr>
<td valign="top" align="center">
<bold>YOLOv8</bold>
</td>
<td valign="top" align="center">90.4</td>
<td valign="top" align="center">165.7</td>
<td valign="top" align="center">36.71</td>
</tr>
<tr>
<td valign="top" align="center">
<bold>Faster-RCNN</bold>
</td>
<td valign="top" align="center">81.3</td>
<td valign="top" align="center">346.6</td>
<td valign="top" align="center">7.03</td>
</tr>
<tr>
<td valign="top" align="center">
<bold>SSD</bold>
</td>
<td valign="top" align="center">88.6</td>
<td valign="top" align="center">285.4</td>
<td valign="top" align="center">18.97</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Ablation experiment</title>
<p>To verify the effectiveness of different improvement modules in the Neck layer of the YOLOv7 model proposed in this article, in the same platform and parameter settings, ablation experiments were conducted on the dataset set to compare the detection accuracy of the BRA-YOLOv7 model with the RFE-YOLOv7 (<xref ref-type="bibr" rid="B28">Tian and Tian, 2023</xref>), FRCB-YOLOv7 (), and LW-YOLOv7; () models. The experimental results are shown in <xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref>.</p>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Comparison of ablation results.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="center">Model</th>
<th valign="top" align="center">P%</th>
<th valign="top" align="center">R%</th>
<th valign="top" align="center">mAP@0.5%</th>
<th valign="top" align="center">F1</th>
<th valign="top" align="center">FPS</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="center">BRA-YOLOv7</td>
<td valign="top" align="center">90.1</td>
<td valign="top" align="center">92.3</td>
<td valign="top" align="center">93.46</td>
<td valign="top" align="center">91.17</td>
<td valign="top" align="center">64</td>
</tr>
<tr>
<td valign="top" align="center">RFE-YOLOv7</td>
<td valign="top" align="center">81.9</td>
<td valign="top" align="center">80.2</td>
<td valign="top" align="center">78.0</td>
<td valign="top" align="center">81.04</td>
<td valign="top" align="center">69</td>
</tr>
<tr>
<td valign="top" align="center">FRCB-YOLOv7</td>
<td valign="top" align="center">86.7</td>
<td valign="top" align="center">83.2</td>
<td valign="top" align="center">87.3</td>
<td valign="top" align="center">84.91</td>
<td valign="top" align="center">74</td>
</tr>
<tr>
<td valign="top" align="center">LW-YOLOv7</td>
<td valign="top" align="center">89.3</td>
<td valign="top" align="center">85.5</td>
<td valign="top" align="center">93.2</td>
<td valign="top" align="center">87.36</td>
<td valign="top" align="center">90</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>From <xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref>, it can be seen that in terms of detection speed performance, there is not much difference between BRA-YOLOv7, RFE-YOLOv7, and FRCB-YOLOv7. However, in comparison to RFE-YOLOv7 and FRCB-YOLOv7, the BRA-YOLOv7 model has improved mAP values by 15.46% and 6.416% respectively. The higher mAP values of BRA-YOLOv7 compared to the other two methods demonstrate the effectiveness of this approach. The ablative experiments confirmed the effectiveness of the proposed improvement strategy relative to YOLOv7. Therefore, considering the detection accuracy, memory, and runtime requirements under the same experimental conditions, the BRA-YOLOv7 algorithm was selected for further research.</p>
</sec>
</sec>
<sec id="s4" sec-type="discussion">
<label>4</label>
<title>Discussion</title>
<sec id="s4_1">
<label>4.1</label>
<title>Impact of MPDIou on YOLOv7 network</title>
<p>Localization is an important part of object detection, usually achieved through bounding box regression. When training deep models for object detection and instance segmentation, we found that the same disease exhibits similar shape and size characteristics, making MPDIoU more suitable for measuring bounding box similarity. Therefore, this study combines horizontal rectangle geometry features and proposes a new MPDIoU loss function based on minimum point distance. It overcomes the limitations of common loss functions such as CIoU, DIoU, and EIoU. It can still converge when the width and height values are different, and its convergence speed is higher than the CIoU in the YOLOv7 network. This not only simplifies the computation process to a certain extent and improves the model&#x2019;s convergence speed, but also makes the regression results more accurate.</p>
</sec>
<sec id="s4_2">
<label>4.2</label>
<title>Influence of PConv and FasterNet on YOLOv7 network</title>
<p>In order to reduce the complexity of the model and achieve faster running speed for the YOLOv7 model, the FasterNet block is introduced in combination with partial convolution (PConv). This allows for maintaining high FLOPS and low FLOPs, utilizing the redundancy in feature maps, and systematically applying conventional convolution (Conv) only on a portion of input channels to extract spatial features, while keeping the rest of the channels unchanged. This helps to reduce information redundancy and facilitate information aggregation. The YOLOv7 model improves detection speed by incorporating the FasterNet Block module into the backbone network.</p>
</sec>
<sec id="s4_3">
<label>4.3</label>
<title>The impact of dual-path routing attention mechanism on the YOLOv7 network</title>
<p>Traditional attention mechanisms require computing pairwise interactions between tokens in all spatial positions, resulting in significant computational and memory costs. Therefore, they excel in capturing long-range object detection. However, in the case of disease object detection, it is often difficult to obtain complete features due to overlapping occlusions and smaller disease objects, leading to potential omissions and recognition errors. With the proposed Dual-route Attention mechanism, by leveraging BiFormer&#x2019;s ability to adaptively focus on a small subset of relevant tokens without interference from irrelevant tokens, it enables more flexible computation allocation and enhances content-awareness.</p>
</sec>
</sec>
<sec id="s5" sec-type="conclusions">
<label>5</label>
<title>Conclusions</title>    <p>This article presents an improved BRA-YOLOv7 algorithm for tea disease target detection in complex scenes. It introduces PConv and FasterNet to replace the original backbone network structure, improving floating point operation efficiency and detection speed. Additionally, a dual-layer route attention mechanism is utilized to filter out irrelevant key-value pairs at the coarse region level, making use of sparsity to save computation and memory. Lastly, a more efficient bounding box loss function called MPDIou is introduced to accelerate model convergence. The experimental results show that:</p>
<list list-type="order">
<list-item>
<p>BRA-YOLOv7 network has a total loss stable below 2% on the training set and below 7% on the validation set, which is a more than 2% decrease compared to the original YOLOv7 network. Additionally, in the improved network, there are significant decreases in bounding box position loss, bounding box confidence loss, and classification loss. Among them, the decrease in bounding box position loss is the most significant, with a decrease of over 20% in both the training and testing sets.</p>
</list-item>
<list-item>
<p>From the perspective of detection performance, BRA-YOLOv7 has achieved effective improvement in accuracy while reducing the number of parameters. Compared to the YOLOv7 network, the accuracy of BRA-YOLOv7 has improved by 6.37%, the recall rate has improved by 6.14%, and the balanced score has increased by 6.25%. In addition, BRA-YOLOv7 has improved the average precision (AP) of four types of diseases by 4.76%, 4.72%, 5.69%, and 5.26% respectively, resulting in an overall mAP improvement of 4.71%.</p>
</list-item>
<list-item>
<p>After external data verification, BRA-YOLOv7 network reduces floating-point operations by 15.5G compared to YOLOv7. The FPS is improved by 5.51% compared to the original model, and the mAP value in actual detection is increased by 4.2%.</p>
</list-item>
</list>
</sec>
</body>
<back>
<sec id="s6" sec-type="data-availability">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material. Further inquiries can be directed to the corresponding authors.</p>
</sec>
<sec id="s7" sec-type="author-contributions">
<title>Author contributions</title>
<p>RY: Data curation, Software, Visualization, Writing &#x2013; original draft, Writing &#x2013; review &amp; editing. QG: Data curation, Methodology, Resources, Writing &#x2013; review &amp; editing. TL: Supervision, Writing &#x2013; original draft, Writing &#x2013; review &amp; editing.</p>
</sec>
<sec id="s8" sec-type="funding-information">
<title>Funding</title>
<p>The author(s) declare that no financial support was received for the research, authorship, and/or publication of this article.</p>
</sec>
<sec id="s9" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s10" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Ali</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Anuran</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Devavrat</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2023</year>). <source>Shah federated optimization of smooth loss functions</source> (<publisher-name>IEEE Transactions on Information Theory</publisher-name>). doi:&#xa0;<pub-id pub-id-type="doi">10.1109/tit.2023.3317168</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bai</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Long</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Wen</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Han</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>T-YOLO: a lightweight and efficient detection model for nutrient buds in complex tea-plantation environments</article-title>. <source>J. Sci. Food Agric.</source> <volume>104</volume>, <fpage>5698</fpage>&#x2013;<lpage>5711</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/jsfa.v104.10</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Kao</surname> <given-names>S.</given-names>
</name>
<name>
<surname>He</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Zhuo</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Wen</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>C.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). &#x201c;<article-title>Run, don&#x2019;t walk: chasing higher FLOPS for faster neural networks</article-title>,&#x201d; in <source>Proceedings of the IEEE/CVF conference on computer vision and pattern recognition</source>, <fpage>12021</fpage>&#x2013;<lpage>12031</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/cvpr52729.2023.01157</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cheng</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Improved YOLOv7 algorithm for detecting bone marrow cells</article-title>. <source>Sensors</source>. <volume>23</volume> (<issue>17</issue>), <elocation-id>7640</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/s23177640</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gao</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Fan</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Interactive speech emotion recognition using fused coordinates and multi-head attention mechanism</article-title>. <source>Comput. Appl.</source>, <fpage>1</fpage>&#x2013;<lpage>7</lpage>.</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Halstead</surname> <given-names>M.</given-names>
</name>
<name>
<surname>McCool</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Denman</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Perez</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Fookes</surname> <given-names>C.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Fruit quantity and ripeness estimation using a robotic vision system</article-title>. <source>IEEE Robot Autom. Let</source> <volume>3</volume>, <fpage>2995</fpage>&#x2013;<lpage>3002</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/lra.2018.2849514</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Han</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Shu</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>K.</given-names>
</name>
</person-group> (<year>2024</year>). <source>A method for plant disease enhance detection based on improved YOLOv8.2024 IEEE 33rd international symposium on industrial electronics (ISIE)</source> (<publisher-name>IEEE</publisher-name>), <fpage>1</fpage>&#x2013;<lpage>6</lpage>.</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hong</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Jiang</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Du</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Tao</surname> <given-names>H.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>DeepHiC: A generative adversarial network for enhancing Hi-C data resolution</article-title>. <source>PloS Comput. Biol.</source> <volume>16</volume>, <elocation-id>e1007287</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pcbi.1007287</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Hossain</surname> <given-names>M. S.</given-names>
</name>
<name>
<surname>Mou</surname> <given-names>R. M.</given-names>
</name>
<name>
<surname>Hasan</surname> <given-names>M. M.</given-names>
</name>
<name>
<surname>Chakraborty</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Abdur Razzak</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Recognition and detection of tea leaf &#x2018;s diseases using support vector machine</article-title>,&#x201d; in <source>Proceedings of the 2018 IEEE 14th international colloquium on signal processing and its application, CSPA</source> (<publisher-name>Institute of Electrical and Electronics Engineers Inc</publisher-name>), <fpage>150</fpage>&#x2013;<lpage>154</lpage>.</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hu</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Wei</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Bao</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Liang</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Estimation of tea leaf blight severity in natural scene images</article-title>. <source>Precis. Agric.</source> <volume>22</volume>, <fpage>1239</fpage>&#x2013;<lpage>1262</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11119-020-09782-8</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Jiang</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Chin</surname> <given-names>K.-W.</given-names>
</name>
<name>
<surname>He</surname> <given-names>T.</given-names>
</name>
<name>
<surname>He</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Soh</surname> <given-names>S .</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2023</year>). <source>Joint link scheduling and routing in two-tier RF-energy-harvesting ioT networks</source> (<publisher-name>IEEE Internet of Things Journal</publisher-name>). doi:&#xa0;<pub-id pub-id-type="doi">10.1109/jiot.2021.3085862</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jintasuttisak</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Edirisinghe</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Elbattay</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Deep neural network based date palm tree detection in drone imagery</article-title>. <source>Comput. Electron. Agric.</source> <volume>192</volume>, <elocation-id>106560</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2021.106560</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Kwan-Wu</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Sieteng</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2016</year>). <source>Joint routing and links scheduling in two-tier multi-hop RF-energy harvesting networks</source> (<publisher-name>IEEE Communications Letters</publisher-name>). doi:&#xa0;<pub-id pub-id-type="doi">10.1109/lcomm.2016.2590463</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lee</surname> <given-names>S. H.</given-names>
</name>
<name>
<surname>Lin</surname> <given-names>S. R.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>S. F.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Identification of tea foliar diseases and pest damage under practical field conditions using a convolutional neural network</article-title>. <source>Plant Pathol.</source> <volume>69</volume>, <fpage>1731</fpage>&#x2013;<lpage>1739</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/ppa.13251</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Shi</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Du</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Mao</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Fan</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Y.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Symptom recognition of disease and insect damage based on Mask R-CNN, wavelet transform, and F-RNet</article-title>. <source>Front. Plant Sci.</source> <volume>13</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2022.922797</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Image denoising network model with fused multi-head attention mechanism</article-title>. <source>Comput. Sci.</source> <volume>50</volume>, <fpage>326</fpage>&#x2013;<lpage>333</lpage>.</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lin</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>He</surname> <given-names>N.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Real-time detection method for larch tree insect damage based on improved YOLOv4</article-title>. <source>Trans. Chin. Soc. Agric. Machinery</source> <volume>54</volume>, <fpage>304</fpage>&#x2013;<lpage>312, 393</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.6041/j.issn.1000-1298.2023.04.031</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lv</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Wei</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Z.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Collaborative recognition of tomato flowers and fruits in a greenhouse using an enhanced combination of YOLOX-ViT</article-title>. <source>J. Agric. Eng.</source> <volume>39</volume>, <fpage>124</fpage>&#x2013;<lpage>134</lpage>.</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ma</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>The bounding box regression loss function for minimum point distance and its application</article-title>. <source>Small Microcomputer Syst.</source>, <fpage>1</fpage>&#x2013;<lpage>8</lpage>.</citation>
</ref>
<ref id="B20">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Ma</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2023</year>). <source>MPDIoU: A loss for efficient and accurate bounding box regression. arXiv - CS - computer vision and pattern recognition</source>. arxiv-2307.07662. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arxiv.2307.07662</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rajathi</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Parameswari</surname> <given-names>P.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Early stage prediction of plant leaf diseases using deep learning models</article-title>. <source>Comput. Vis. Mach. Learn.Agric</source>, <fpage>245</fpage>&#x2013;<lpage>260</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/978-981-16-9991-7-15</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Redmon</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Divvala</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Girshick</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Farhadi</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>You only look once: Unified, real-time object detection</article-title>,&#x201d; in <source>Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)</source>, <fpage>779</fpage>&#x2013;<lpage>788</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/cvpr.2016.91</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Soeb</surname> <given-names>M. J. A.</given-names>
</name>
<name>
<surname>Jubayer</surname> <given-names>M. F.</given-names>
</name>
<name>
<surname>Tarin</surname> <given-names>T. A.</given-names>
</name>
<name>
<surname>Mamun</surname> <given-names>M. R. A.</given-names>
</name>
<name>
<surname>Ruhad</surname> <given-names>F. M.</given-names>
</name>
<name>
<surname>Parven</surname> <given-names>A.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). <article-title>Tea leaf disease detection and identification based on YOLOv7 (YOLO-T)</article-title>. <source>Sci. Rep.</source> <volume>13</volume>, <fpage>6078</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-023-33270-4</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sun</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Shao</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Research progress on six important leaf diseases of tea plants</article-title>. <source>Tea</source> <volume>46</volume>, <fpage>71</fpage>&#x2013;<lpage>76</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3969/j.issn.0577-8921.2020.02.002</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sun</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Jiang</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Dong</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Rao</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>SLIC_SVM based leaf diseases saliency map extraction of tea plant. Comput</article-title>. <source>Electron. Agric.</source> <volume>157</volume>, <fpage>102</fpage>&#x2013;<lpage>109</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2018.12.042</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sun</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Guo</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Yao</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Shen</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>TeaDiseaseNet: multi-scale self-attentive tea disease detection</article-title>. <source>Front. Plant Sci</source>. <volume>14</volume>, <elocation-id>1257212</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2023.1257212</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tian</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Duan</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Yuan</surname> <given-names>A.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>VMF-SSD: A Novel v-space based multi-scale feature fusion SSD for apple leaf disease detection</article-title>. <source>IEEE/ACM Trans. Comput. Biol. Bioinf.</source> <volume>20</volume>, <fpage>2016</fpage>&#x2013;<lpage>2028</lpage>.</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tian</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Tian</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>A new lunar dome detection method based on improved YOLOv7</article-title>. <source>Sensors</source>. <volume>23</volume> (<issue>19</issue>), <elocation-id>8304</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/s23198304</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Waheed</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Goyal</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Gupta</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Khanna</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Hassanien</surname> <given-names>A. E.</given-names>
</name>
<name>
<surname>Pandey</surname> <given-names>H. M.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>An optimized dense convolutional neural network model for disease recognition and classification in corn leaf</article-title>. <source>Comput. Electron. Agric.</source> <volume>175</volume>, <elocation-id>105456</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2020.105456</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Xiao</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Detection of famous tea buds based on improved YOLOv7 network</article-title>. <source>Agriculture</source>. <volume>13</volume> (<issue>6</issue>), <fpage>1190</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agriculture13061190</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Wen</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Improved loss function for image classification</article-title>. <source>Compu. Intell. Neurosci.</source> <volume>2021</volume>, <fpage>1</fpage>&#x2013;<lpage>8</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1155/2021/6660961</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Lai</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Tea leaf bud image recognition method based on Faster R-CNN deep network</article-title>. <source>Optoelectronics&#xb7;Laser</source> <volume>31</volume>, <fpage>1131</fpage>&#x2013;<lpage>1139</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.16136/j.joel.2020.11.0164</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Jeongyoung</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>ICIoU: improved loss based on complete intersection over union for bounding box regression</article-title>. <source>IEEE Access</source>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/access.2021.3100414</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xue</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Bai</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Lin</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>YOLO-tea: A tea disease detection model improved by YOLOv5</article-title>. <source>Forests</source> <volume>14</volume>, <fpage>415</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/f14020415</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Xie</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Dong</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>Y.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). <article-title>A tea bud segmentation, detection and picking point localization based on the MDY7-3PTB model</article-title>. <source>Front. Plant Sci.</source> <volume>14</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2023.1199473</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Hu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Xie</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Real-time detection of navel orange fruits based on improved algorithm of PP-YOLO</article-title>. <source>J. Beijing Union Univ.</source> <volume>36</volume>, <fpage>58</fpage>&#x2013;<lpage>66</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.16255/j.cnki.ldxbz</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Gong</surname> <given-names>Z.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Improved real-time detection algorithm for safety helmet using modified YOLOv7-tiny</article-title>. <source>Radio Eng.</source> <volume>53</volume> (<issue>08</issue>), <fpage>1741</fpage>&#x2013;<lpage>1749</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3969/j.issn.1003-3106.2023.08.001</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Zhu</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Ke</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Lau</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2023</year>). <source>BiFormer: vision transformer with bi-level routing attention[C]//2023 IEEE conference on computer vision and pattern recognition.</source> (<publisher-loc>Vancouver</publisher-loc>: <publisher-name>IEEE Press</publisher-name>), <fpage>10323</fpage>&#x2013;<lpage>10333</lpage>.arxiv-2303.08810</citation>
</ref>
</ref-list>
</back>
</article>