<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Plant Sci.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Plant Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Plant Sci.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">1664-462X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpls.2025.1664650</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Automated weed monitoring and control: enhancing detection accuracy using a YOLOv7-AlexNet fusion network</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Faizan Zeb</surname><given-names>Muhammad</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Iqbal</surname><given-names>Abid</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>*</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Husnain</surname><given-names>Ghassan</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>*</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2621092/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Zafar</surname><given-names>Wisal</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3012577/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Junaid</surname><given-names>Ahmad</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Alzahrani</surname><given-names>Ali Saeed</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Bukhari</surname><given-names>Syed Hashim Raza</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Naidu</surname><given-names>Ramasamy Srinivasaga</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>Department of Computer Science, Iqra National University Peshawar</institution>, <city>Peshawar</city>,&#xa0;<country country="pk">Pakistan</country></aff>
<aff id="aff2"><label>2</label><institution>Department of Computer Engineering, College of Computer Sciences and Information Technology, King Faisal University</institution>, <city>AlAhsa</city>,&#xa0;<country country="sa">Saudi Arabia</country></aff>
<aff id="aff3"><label>3</label><institution>Department of Computer Science, Cecos University of Information Technology (IT) and Emerging Sciences</institution>, <city>Peshawar</city>,&#xa0;<country country="pk">Pakistan</country></aff>
<author-notes>
<corresp id="c001"><label>*</label>Correspondence: Abid Iqbal, <email xlink:href="mailto:aaiqbal@kfu.edu.sa">aaiqbal@kfu.edu.sa</email>; Ghassan Husnain, <email xlink:href="mailto:ghassan.husnain@gmail.com">ghassan.husnain@gmail.com</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2025-11-25">
<day>25</day>
<month>11</month>
<year>2025</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2025</year>
</pub-date>
<volume>16</volume>
<elocation-id>1664650</elocation-id>
<history>
<date date-type="received">
<day>22</day>
<month>07</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>25</day>
<month>09</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2025 Faizan Zeb, Iqbal, Husnain, Zafar, Junaid, Alzahrani, Bukhari and Naidu.</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Faizan Zeb, Iqbal, Husnain, Zafar, Junaid, Alzahrani, Bukhari and Naidu</copyright-holder>
<license>
<ali:license_ref start_date="2025-11-25">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<p>The agricultural sector is crucial to global sustainability, but it still faces challenges, particularly from weed invasions that severely compromise crop yields. Although considerable efforts have been made to address the weed problem using computer vision detection methods, the technology is still limited. Weedy sites or their crop hosts share many perceptual features, making it difficult to detect with confidence. Most weed detection methods used today suffer from several problems: the inability to distinguish crops from similar-looking weeds, inconsistent performance across weed growth stages, and sensitivity to operational constraints. Previous methods have employed models such as YOLOv5, ResNet, and Faster R-CNN, but have suffered from issues with accuracy, estimation times, and the ability to detect small weeds in dense stands. In this study, we present a hybrid deep learning system that utilizes YOLOv7 for weed detection and AlexNet for weed species classification. YOLOv7 was used due to its fast recognition capabilities and ability to discriminate with better granularity when detecting grass in dense environments. It was found that using AlexNet to classify weed species accurately increases the specificity of the system. Experimental results of the hybrid model demonstrated improvements over previous methods, achieving a precision, recall, F1 score, mAP@0.50, and mAP@0.5:0.95 of 0.80, 0.85, 0.87 0.89, and 0.50, respectively. The field test detection capability showed that AlexNet achieved precision, recall, and F1 scores of 95%, 97%, and 94%, respectively. Thus, these results indicate that the YOLOv7-AlexNet hybrid model provides both robust and efficient real-time detection and classification of weeds in agriculture. The next step is to expand the dataset to include a wider variety of weed species and environmental conditions, and to validate the developed model by deploying the YOLOv7-AlexNet hybrid model on field computers, thereby expanding its practical application in production environments.</p>
</abstract>
<kwd-group>
<kwd>deep learning</kwd>
<kwd>YOLOv7</kwd>
<kwd>AlexNet architecture</kwd>
<kwd>precision agriculture</kwd>
<kwd>weed detection &amp; classification</kwd>
</kwd-group>
<funding-group>
<award-group id="gs1">
<funding-source id="sp1">
<institution-wrap>
<institution>King Faisal University</institution>
<institution-id institution-id-type="doi" vocab="open-funder-registry" vocab-identifier="10.13039/open_funder_registry">10.13039/501100020912</institution-id>
</institution-wrap>
</funding-source>
</award-group>
<funding-statement>The author(s) declare financial support was received for the research and/or publication of this article. This work was supported by the Deanship of Scientific Research, Vice Presidency for Graduate Studies and Scientific Research, King Faisal University, Saudi Arabia Grant No. KFU253419.</funding-statement>
</funding-group>
<counts>
<fig-count count="15"/>
<table-count count="6"/>
<equation-count count="8"/>
<ref-count count="52"/>
<page-count count="23"/>
<word-count count="11350"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Sustainable and Intelligent Phytoprotection</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>Weed management has always been a significant concern in agriculture, as weeds harm crop production and resource use efficiency. Weeds compete with crops for sunlight, moisture, and nutrients, which reduces the quantity and quality of agricultural production. According to the FAO, weeds are estimated to account for 34% of agricultural losses worldwide, making it imperative that we find more effective or scalable methods of weed control (<xref ref-type="bibr" rid="B3">Atta et&#xa0;al., 2023</xref>). As global food demand continues to grow, the need for more sophisticated weed detection technology also increases. Farmers traditionally relied on manual scouting to assess weeds in their fields (<xref ref-type="bibr" rid="B43">Singh et&#xa0;al., 2020</xref>). This often involved labor-intensive scouting methods, such as sampling farmers&#x2019; crops at predetermined intervals using zigzag patterns or employing crop scouts. Labor costs, time, and human susceptibility to error and bias (<xref ref-type="bibr" rid="B27">Niyigena et&#xa0;al., 2023</xref>). Many farmers have attempted to reduce manual labor by using synthetic herbicides; however, this approach has led to other problems, including herbicide resistance, environmental degradation, and soil and water contamination by heavy metals (<xref ref-type="bibr" rid="B14">Hassan et&#xa0;al., 2023</xref>).</p>
<p>To tackle these issues, the research uses a hybrid framework based on deep learning that can automatically extract discriminative features across different field conditions, while data augmentation and various training samples improve the model&#x2019;s robustness to variability in the natural environment. Conversely, they generally did not perform well under field conditions with all the additional inherent random variables that affected the results based on natural environmental factors, such as light, weed density, obstruction, and crop similarities. Furthermore, their generalization was limited by their reliance on manually created features for engineering, which can reduce their effectiveness in many situations across various types of agricultural areas (<xref ref-type="bibr" rid="B5">Bruinsma, 2017</xref>).</p>
<p>Deep learning (DL), and especially convolutional neural networks (CNNs), have changed the game of visual recognition tasks (including in precision agriculture). By automatically extracting multi-level features from raw image data, DL-based approaches can eliminate human biases that typically arise from preprocessing steps used to classify agricultural images. All DL-based methods have improved the overall classification accuracy of the original image data. Several different implementations of DL methods (ResNet, VGG, Faster R-CNN, YOLOv3, YOLOv4, and YOLOv5) have been used in weed detection tasks (<xref ref-type="bibr" rid="B37">Saini, 2022</xref>; <xref ref-type="bibr" rid="B6">Dargan et&#xa0;al., 2020</xref>). As is the case in most studies, existing models still have limitations. Although Faster R-CNN has a high accuracy rate, it remains a two-stage model that requires a second classification stage, resulting in slower inference speeds for images. However, it is more accurate than YOLOv5s (<xref ref-type="bibr" rid="B39">Salazar-Gomez et&#xa0;al., 2021</xref>). YOLOv5s, on the other hand, is faster during image inference since it is a single-stage model; however, it has been reported to have COI issues for weed species that are close together, and we were unable to produce solid results for overlapping weed species. <xref ref-type="bibr" rid="B33">Rahman et&#xa0;al., 2023</xref>).</p>
<p>To address these constraints, researchers are seeking hybrid models that leverage the strengths of multiple models. In this research, we present a hybrid framework based on deep learning that integrates YOLOv7 and AlexNet to achieve automated weed detection and control. YOLOv7 represents an innovative leap forward in object detection, offering enhanced measures of speed and accuracy through improvements, such as the Extended Efficient Layer Aggregation Network (E-ELAN) (<xref ref-type="bibr" rid="B16">Jiang et&#xa0;al., 2022a</xref>). In our framework, YOLOv7 recognizes complex field conditions and identifies weed targets at rapid real-time speeds. Once weed detection is accomplished, the features of the weeds are passed to AlexNet, a powerful CNN framework proposed for efficiency in classification tasks. While the weeds are already classified, AlexNet is used for fine-grained identification of weed species (<xref ref-type="bibr" rid="B4">Beeharry and Bassoo, 2020</xref>).</p>
<p>In this research, we develop a novel hybrid framework that combines YOLOv7 and AlexNet to address the primary challenges associated with automatic weed detection and classification in agricultural settings. This framework enables the detection of small and overlapping weeds in real-time and is easily integrated with unmanned aerial vehicles (UAVs) or robots, further advancing by allowing the classification of visually similar species. The proposed architecture is modular and can be scaled to different agricultural settings. It incorporates sustainable agriculture principles by leveraging additional monitoring strategies to minimize reliance on negative herbicides and support data-driven precision agriculture. Specifically, we apply transfer learning, where pre-trained learning models, deceptively trained on large and complex datasets, enable efficient and effective learning (<xref ref-type="bibr" rid="B15">Iman et&#xa0;al., 2023</xref>). Combining YOLOv7&#x2019;s fast and accurate weed detection capabilities with AlexNet&#x2019;s lightweight yet effective classification offered a unique approach for automated weed detection, localization, and classification (<xref ref-type="bibr" rid="B22">L&#xf3;pez-Correa et&#xa0;al., 2022</xref>). YOLOv7 can effectively detect weeds in complex and crowded field conditions, which is vital for timely action and reduced herbicide use. At the same time, AlexNet provides efficient and accurate species classification at a superior level. The strengths of both modes combine to create a functional, real-world solution for agricultural scenarios, significantly enhancing the efficacy, efficiency, and sustainability of modern weed management systems. This study aims to (1) create an integrated deep learning framework consisting of YOLOv7 and AlexNet for real-time weed detection and classification; (2) assess the model&#x2019;s detection performance for small, overlapping, and visually mimicking weed species; and (3) analyze the usability of the proposed model for utilization in real-time agricultural contexts (i.e., UAVs or mobile robots). The vital contributions are summarized as follows:</p>
<list list-type="bullet">
<list-item>
<p>The authors proposed a hybrid deep learning approach that utilized YOLOv7 for deep learning object detection and AlexNet for classification, enabling real-time and accurate weed sensing within a single system.</p></list-item>
<list-item>
<p>The model improved detection ability, especially when detecting small weeds, overlapping weeds, and closely growing weeds in complex paddock environments. This addressed one of the shortcomings of previous methods.</p></list-item>
<list-item>
<p>By integrating AlexNet, the method demonstrated improved classification ability in detecting weeds belonging to the same genus with similar morphological characteristics.</p></list-item>
<list-item>
<p>The proposed model retained real-time inference times, making it suitable for deployment on UAVs, mobile robots, or other real-time agricultural platforms.</p></list-item>
<list-item>
<p>The authors incorporated modular and scalable architecture, allowing the method to quickly transfer or adapt to different agricultural contexts and datasets with minimal training.</p></list-item>
<list-item>
<p>The suggested system could encourage environmental sustainability in agriculture through enhanced precision and timeliness of weed identification, which may help mitigate excessive reliance on herbicide applications in future practices.</p></list-item>
<list-item>
<p>Transfer learning techniques enhanced the performance and generalizability of the model, enabling effective training with limited quantities of annotated agricultural datasets.</p></list-item>
</list>
<p>The organization of this paper is as follows: Section 1 outlines key aspects related to agriculture and weed control. A brief overview of the existing work in deep learning and machine learning methods for weed identification is presented in Section 2. Section 3 describes the materials and the experimental methods used for categorization and detection of weeds. Some of the methods involved in this study include feature selection, bounding box selection, weed identification and classification. Section 4 of the paper presents the results, and the discussion of the study. Section 5 outlines the conclusion and future work of the proposed algorithms.</p>
</sec>
<sec id="s2">
<label>2</label>
<title>Literature survey</title>
<p>In the literature review section, the <xref ref-type="table" rid="T1"><bold>Table&#xa0;1</bold></xref> summarizes the comparative analysis of the analyzed methods, describing the methodology, performance information, key limitations, and contributions to the need for a hybrid model. The table provides a clear overview of the research gaps and adequately substantiates the need to propose a better model. The researchers (<xref ref-type="bibr" rid="B23">L&#xf3;pez-Mart&#xed;nez et&#xa0;al., 2023</xref>) reported that the experiment yielded an accuracy of 0.65 for CNN models, utilizing an HPC cluster to classify the weeds. The optimal training time for CNN models in the experiment was 37 minutes and 55.193 seconds, utilizing six HPCC cores. It has been observed that using the Lightweight Deep Learning model, YOLO5 outperforms SSD-ResNet50 in weed identification and classification. It supports commercial real-time weed control through an autonomous laser-weeding robot. In the context of weed detection, the mAP of YOLOv5 was recorded at 0.88 @0.5. The YOLOv5 model achieved a frame per second (FPS) rate of 27. Another research group reported that Deep learning techniques, particularly Transformer models such as SegFormer, have proven highly effective for the detection and classification of weeds. The model offers greater accuracy and efficiency compared to traditional methods. SegFormer achieved a Mean Accuracy (mAcc) of 75.18% and a Mean Intersection over Union (mIoU) of 65.74%. In contrast, Swin Transformer had nearly five times as many parameters compared to SegFormer.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Literature integrated summary.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Author &amp; ref</th>
<th valign="middle" align="center">Dataset</th>
<th valign="middle" align="center">Method</th>
<th valign="middle" align="center">Classification</th>
<th valign="middle" align="center">Accuracy</th>
<th valign="middle" align="center">Limitation</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center"><xref ref-type="bibr" rid="B23">L&#xf3;pez-Mart&#xed;nez et&#xa0;al., 2023</xref></td>
<td valign="middle" align="center">DeepWeeds</td>
<td valign="middle" align="center">HPCC infrastructure + DL (CNN)</td>
<td valign="middle" align="center">Multiclass</td>
<td valign="middle" align="center">0.65%</td>
<td valign="middle" align="center">The HPCC requires a high-quality network infrastructure to operate effectively.</td>
</tr>
<tr>
<td valign="middle" align="center"><xref ref-type="bibr" rid="B8">Fatima et&#xa0;al., 2023</xref></td>
<td valign="middle" align="center">Weed Images Collected (9000)</td>
<td valign="middle" align="center">YOLO5 + SSD-ResNet50</td>
<td valign="middle" align="center">Multiclass</td>
<td valign="middle" align="center">0.88 (mAP)</td>
<td valign="middle" align="center">The dataset includes only a Limited Number of weed species, with a total of only 4 species used.</td>
</tr>
<tr>
<td valign="middle" align="center"><xref ref-type="bibr" rid="B17">Jiang et&#xa0;al., 2022b</xref></td>
<td valign="middle" align="center">Weed (1006) images</td>
<td valign="middle" align="center">Swin Transformer, SegFormer and Segmenter.</td>
<td valign="middle" align="center">Multiclass</td>
<td valign="middle" align="center">75.18%</td>
<td valign="middle" align="center">Limited number of images and only 10 species are used.</td>
</tr>
<tr>
<td valign="middle" align="center"><xref ref-type="bibr" rid="B9">Gallo et&#xa0;al., 2023</xref></td>
<td valign="middle" align="center">2 Dataset Lincoln Beet (LB) + Chicory Plant (CP) (4402) images</td>
<td valign="middle" align="center">YOLOv7</td>
<td valign="middle" align="center">Binary</td>
<td valign="middle" align="center">56.6%</td>
<td valign="middle" align="center">The two datasets comprise a total of 4,402 images, with only two classes being used.</td>
</tr>
<tr>
<td valign="middle" align="center"><xref ref-type="bibr" rid="B18">Khan et&#xa0;al., 2022</xref></td>
<td valign="middle" align="center">Collected locally not available publicly</td>
<td valign="middle" align="center">Tiny-YOLOv4</td>
<td valign="middle" align="center">Multiclass</td>
<td valign="middle" align="center">49.4%<break/>(mAP)</td>
<td valign="middle" align="center">Limited dataset and limited classes. Old Version of Yolo is used.</td>
</tr>
<tr>
<td valign="middle" align="center"><xref ref-type="bibr" rid="B20">Liu et&#xa0;al., 2022a</xref></td>
<td valign="middle" align="center">1000 images of maize seedlings and weeds</td>
<td valign="middle" align="center">Multiple YOLOv4-tiny</td>
<td valign="middle" align="center">Multiple</td>
<td valign="middle" align="center">86.69% (mAP)</td>
<td valign="middle" align="center">The training data was imbalanced, and the model was only tested on weeds during the maize seedling stage with a limited Number of Classes.</td>
</tr>
<tr>
<td valign="middle" align="center"><xref ref-type="bibr" rid="B40">Saleem et&#xa0;al., 2022</xref></td>
<td valign="middle" align="center">Deep Weeds (17,509), 8 Classes</td>
<td valign="middle" align="center">Faster RCNN</td>
<td valign="middle" align="center">Multiclass</td>
<td valign="middle" align="center">87.64%, (mAP)</td>
<td valign="middle" align="center">Limited number of classes.</td>
</tr>
<tr>
<td valign="middle" align="center"><xref ref-type="bibr" rid="B12">Hashemi-Beni et al., 2020</xref></td>
<td valign="middle" align="center">Crop/Weed Field Image 60 Images</td>
<td valign="middle" align="center">U-Net and FCN-8s,</td>
<td valign="middle" align="center">Multiclass</td>
<td valign="middle" align="center">(75.1%) &amp; (66.72%)</td>
<td valign="middle" align="center">There is a need to enhance segmentation and further improve object detection.</td>
</tr>
<tr>
<td valign="middle" align="center"><xref ref-type="bibr" rid="B44">Sivakumar et&#xa0;al., 2020</xref></td>
<td valign="middle" align="center">Soybean weed</td>
<td valign="middle" align="center">Faster R-CNN and Single Shot Detector (SSD)</td>
<td valign="middle" align="center">Single Class</td>
<td valign="middle" align="center">0.85 (Mean IoU)<break/>0.84 (Mean IoU)</td>
<td valign="middle" align="center">The dataset exhibited limited variation, and the SSD model showed higher misclassification rates, particularly for the limited Number of Classes.</td>
</tr>
<tr>
<td valign="middle" align="center"><xref ref-type="bibr" rid="B19">Kulkarni, 2019</xref></td>
<td valign="middle" align="center">250 Crop and weed images</td>
<td valign="middle" align="center">CNN model</td>
<td valign="middle" align="center">Binary</td>
<td valign="middle" align="center">85%</td>
<td valign="middle" align="center">Small dataset, no specific weed species.</td>
</tr>
<tr>
<td valign="middle" align="center"><xref ref-type="bibr" rid="B42">Sarker and Kim, 2019</xref></td>
<td valign="middle" align="center">2000 images.</td>
<td valign="middle" align="center">(R-FCN) with ResNet-101</td>
<td valign="middle" align="center">Multiclass</td>
<td valign="middle" align="center">0.81%<break/>(mAp)</td>
<td valign="middle" align="center">A small dataset was used, and dropout techniques and data augmentation were employed to overcome the limitations of the small dataset size.</td>
</tr>
<tr>
<td valign="middle" align="center"><xref ref-type="bibr" rid="B41">Sampurno et&#xa0;al., 2024</xref></td>
<td valign="middle" align="center">5000 images<break/>Acquired from (T-PIRC)</td>
<td valign="middle" align="center">YOLOv5 and YOLOv8</td>
<td valign="middle" align="center">Multiclass</td>
<td valign="middle" align="center">82.40 (mAp)<break/>82.10 (mAp)</td>
<td valign="middle" align="center">With a limited dataset, the model could be further trained on more diverse data from other orchard environments.</td>
</tr>
<tr>
<td valign="middle" align="center"><xref ref-type="bibr" rid="B51">Xu et&#xa0;al., 2024</xref></td>
<td valign="middle" align="center">grass weeds and wheat</td>
<td valign="middle" align="center">WeedsNet</td>
<td valign="middle" align="center">Binary</td>
<td valign="middle" align="center">62.3%</td>
<td valign="middle" align="center">Limited datasets need to enhance the accuracy.</td>
</tr>
<tr>
<td valign="middle" align="center"><xref ref-type="bibr" rid="B13">Hashemi-Beni et&#xa0;al., 2022</xref></td>
<td valign="middle" align="center">Crop/Weed Field Image Dataset &amp; Sugar Cane Orthomosaic dataset</td>
<td valign="middle" align="center">SegNet, FCN-32s, FCN-16s, FCN-8s, U-Net</td>
<td valign="middle" align="center">Multiclass</td>
<td valign="middle" align="center">84.3%<break/>81.1%<break/>77.9%.</td>
<td valign="middle" align="center">The precision has to be improved. A little dataset was utilized.</td>
</tr>
<tr>
<td valign="middle" align="center"><xref ref-type="bibr" rid="B38">Saini and Nagesh, 2024</xref></td>
<td valign="middle" align="center">CottonWeeds</td>
<td valign="middle" align="center">YOLOv5 models</td>
<td valign="middle" align="center">Multiclass</td>
<td valign="middle" align="center">87.4% (mAp)</td>
<td valign="middle" align="center">Utilized a little dataset. The precision has to be improved.</td>
</tr>
<tr>
<td valign="middle" align="center"><xref ref-type="bibr" rid="B34">Rai et al., 2023</xref></td>
<td valign="middle" align="center">common, annotated imagery dataset</td>
<td valign="middle" align="center">YOLOv3 object detection model</td>
<td valign="middle" align="center">Multiclass</td>
<td valign="middle" align="center">54.3% (mAp)</td>
<td valign="middle" align="center">A limited number of images and weed species were used.</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The study, evaluated by (<xref ref-type="bibr" rid="B17">Jiang et&#xa0;al., 2022b</xref>), assessed the effectiveness of the YOLOv7 model for weed detection using UAV images of chicory plantations and the Lincoln beet dataset. On the Chicory Plant (CP) dataset, YOLOv7 achieved promising results with mAP@0.5 scores, recall, and precision scores of 56.6%, 62.1%, and 61.3%, respectively. When applied to the Lincoln beet (LB) dataset, YOLOv7 outperformed previous models by increasing the mAP@0.5 scores from 51% to 61%, the mAP for weeds from 67.5% to 74.1%, and the mAP for sugar beets from 34.6% to 48%.</p>
<p>The Tiny-YOLOv4 model was employed to detect potato weeds in real-time (<xref ref-type="bibr" rid="B9">Gallo et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B18">Khan et&#xa0;al., 2022</xref>). On a limited dataset, this model&#x2019;s testing accuracy was 49.4%. The best performing model was used to identify weeds in potato fields.</p>
<p>The images of corn and weeds were manually collected and labeled in one study (<xref ref-type="bibr" rid="B20">Liu et&#xa0;al., 2022a</xref>). Several YOLOv4-tiny network models were trained and evaluated. The real-time detection of weeds in maize seedlings using deep learning, particularly YOLOv4-tiny, achieved a mean Average Precision (mAP) of 86.69%, a detection speed of 57.33 frames per second (f/s), and a model size of 34.08 MB. These results highlight the model&#x2019;s effectiveness in various weather conditions. The Faster R-CNN architecture achieved the highest mean average accuracy score among all the evaluated models. It was trained using various deep learning backbones and classification models, including Inception-v2, ResNet-50, and ResNet-101. The final model utilized was ResNet-101, which effectively learned the features that distinguish seven weed classes and negative classes. The mAP reached 87.64%, demonstrating effective detection of most weed classes.</p>
<p>The studies proposed by (<xref ref-type="bibr" rid="B40">Saleem et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B12">Hashemi-Beni et al., 2020</xref>) utilize the deep learning algorithms to categorize remote sensing images for agricultural purposes, with a particular focus on crop and weed identification using the U-net and FCN-8s models. U-Net showcased its capability by achieving an impressive standard accuracy of 72.2%. Meanwhile, FCN-8s experience a significant decrease, but maintain a high accuracy of 72.1%. The effects highlight the use of deep learning techniques, such as U-Net and FCN-8s, to improve crop and weed identification in agricultural settings. Ongoing upgrades and optimization of this system can enhance accuracy and effectiveness benefiting precision agricultural operations.</p>
<p>The researchers (<xref ref-type="bibr" rid="B44">Sivakumar et&#xa0;al., 2020</xref>) evaluated weed identification by comparing two object identity models, Faster R-CNN and Single Shot Detector (SSD), using IoU and inference speed. Regarding performance, recall, F1 score, and IoU, the Faster R-CNN version with 200 box suggestions demonstrated overall weed detection performance comparable to that of the SSD model. Notably, the inference time taken by the Faster RCNN model was the same as that of the SSD version. With 200 suggestions, the Faster R-CNN model has achieved an IoU of 0.85, an F1 score of 0.66, a precision of 0.65, and a recall of 0.68. In contrast, the SSD model achieved an IoU of 0.84, an f1 score of 0.67, an Accuracy of 0.66 and a Recall of 0.66. The recognition accuracy was higher for the optimal confidence threshold of the SSD, but it lagged behind the Faster R-CNN model. The SSD requires improved generalization capability for utilizing UAV data to detect weeds in the mid-to-late stages of soybean fields. IoT-based weed detection systems utilize CNN and image processing for classifying weeds, allowing for remote monitoring of crops and reducing the need for manual labor and chemical usage in agriculture. The system achieves an average accuracy of 85% with 250 training images, along with an average false ratio of 7% and a false acceptance ratio of 2.6%.</p>
<p>The researchers (<xref ref-type="bibr" rid="B19">Kulkarni, 2019</xref>; <xref ref-type="bibr" rid="B42">Sarker and Kim, 2019</xref>), implemented Region-based deep convolutional neural networks, particularly ResNet-101 with R-FCN, demonstrate exceptional performance in detecting weeds in farmland. They show better performance in object classification performance supported by dropout techniques and data augmentation. ResNet-101 with R-FCN outperforms other methods in object detection. The implementation of data augmentation and dropout techniques effectively reduces overfitting. Our proposed method achieves an accuracy of 81%, comparable to the baseline set by both the Faster R-CNN and R-FCN. It performs especially well with datasets and improves object identification accuracy.</p>
<p>This study proposed by (<xref ref-type="bibr" rid="B41">Sampurno et&#xa0;al., 2024</xref>) to evaluate each YOLO model based on detection accuracy, model complexity, and inference time conducted a comparative analysis. They observed that smaller variants, such as YOLOv5 and YOLOv8, have proven more effective than their larger counterparts. For detection, YOLOv5n-seg achieved an mAP@0.5 value of 80.90%, while YOLOv8s-seg achieved a value of 82.40%. a model for detecting weeds in wheat fields using RGB images is challenging due to the similar appearance of grass weeds and wheat (<xref ref-type="bibr" rid="B51">Xu et&#xa0;al., 2024</xref>). A dual-path weed detection network (WeedsNet) has been proposed to address this issue by utilizing both RGB and depth images. WeedsNet effectively combines multi-modal information, significantly enhancing detection accuracy to 62.3% in natural wheat fields, while achieving a detection speed of 0.5 seconds per image. Precise crop/weed mapping is essential for targeted treatment, and advancements in UAS-based remote sensing and deep learning have improved this process. This research compares various deep learning methods for crop/weed discrimination using UAS data. It evaluates several U-Net, SegNet, FCN (32s, 16s, 8s), and DeepLabV3+ architectures. The impressive performance, utilizing the resources of DeepLabV3+, was highlighted by its accuracy of 84.3%, with FCN-8s at 81.1% and U-Net at 77.9%. These results indicate the significant potential of the ResNet-18-based DeepLabV3+ to eliminate weeds.</p>
<p>Another research group utilizes pre-trained YOLOv5 models for weed identification, with YOLOv5x achieving remarkable results, including a mAP_0.5:0.95 score of 72.5% and a mAP_0.5 score of 87.4% (<xref ref-type="bibr" rid="B13">Hashemi-Beni et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B38">Saini and Nagesh, 2024</xref>). The result indicates that the model is effective in identifying weeds in cotton fields. The research demonstrates the potential of CottonWeeds as a key training platform for developing real-time, in-field weed recognition systems.</p>
<p>The successful implementation of site-specific weed management (SSWM) requires the reliable identification of weed species, as well as their exact location on the site (<xref ref-type="bibr" rid="B34">Rai et al., 2023</xref>). The YOLOv3 model effectively identified multiple weeds of different species, achieving precision scores of 43.28%, 26.30%, 89.89%, and 57.80% for the possessed species, and a mAP of 54.3%. These findings highlight the potential of deep learning models for weed identification and the benefits of object detection for SSWM.</p>
<p>Contemporary deep learning networks, such as ResNet and VGG, can identify objects of interest; however, these models come with very high computational costs, making real-time object detection impractical. YOLO models, particularly YOLOv3, YOLOv4, and YOLOv5, offer faster weed detection. Nevertheless, their accuracy diminishes when weeds are small or tightly placed. This limitation leads to the selection of YOLOv7 due to its enhanced properties.</p>
</sec>
<sec id="s3">
<label>3</label>
<title>Material and techniques</title>
<p>The process in the diagram integrates YOLOv7 and AlexNet deep neural network systems, where weed detection is performed using YOLOv7, and its classification is determined by AlexNet, as illustrated in <xref ref-type="fig" rid="f1"><bold>Figure&#xa0;1</bold></xref> below. Here is a thorough, step-by-step breakdown:</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Hybrid proposed model for weed detection and classification.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1664650-g001.tif">
<alt-text content-type="machine-generated">Flowchart illustrating a process for weed identification. It starts with a weed dataset. The data undergoes preprocessing with steps such as data cleaning and integration. The YOLO algorithm processes the input image for feature extraction. These features are fed into an Alex-Net model for classification. Finally, weed classification is achieved using the extracted features.</alt-text>
</graphic></fig>
<sec id="s3_1">
<label>3.1</label>
<title>Brief introduction of the hybrid proposed for weed detection and classification</title>
<sec id="s3_1_1">
<label>3.1.1</label>
<title>Proposed YOLOv7-AlexNet methodology</title>
<p>The proposed framework employs a hybrid architecture that combines YOLOv7 and AlexNet to rapidly and reliably detect and classify weeds in the field. <xref ref-type="fig" rid="f1"><bold>Figure&#xa0;1</bold></xref> depicts the initial labeled weed image dataset to input into the model, followed by the preprocessing steps, which include data cleaning, data transformation, and data normalization to enhance the consistency of the input data and the model&#x2019;s performance.</p>
<p>The final preprocessed images would then be passed through the detection module based on YOLOv7, which would determine the weed&#x2019;s location in real-time by creating spatial bounding boxes. YOLOv7 was selected in this framework due to its speed and ability to detect small weeds, overlapping weeds, or weeds growing in clumps, similar to how they would appear in a varied agricultural field environment. The weed-localized areas would then be passed to the classification module based on the AlexNet architecture, which provides specific species-level class labels for the weed, such as foxtail and thistle. The AlexNet model is lightweight and capable of classifying images rapidly with an acceptable degree of accuracy, enabling real-time and embedded applications in agricultural settings. The underlying principles behind the model&#x2019;s evaluation performances depend on an implicit feature fusion strategy. YOLO outsources its goal detection in space, while AlexNet performs an evaluation of the image and looks for visual similarities with its previous identification (<xref ref-type="bibr" rid="B25">Murad et al., 2023</xref>). The intermittent system benefits from increased robustness and generalization across different agricultural problem stations.</p>
<p>The end product merges YOLOv7 localization with AlexNet&#x2019;s classification to develop annotated images that combine bounding boxes and species names. This enables practical applications on UAVs or robotic vehicles for precision weed management and can also facilitate real-time decision-making for sustainable farming practices.</p>
<p>For our classification framework, we adopted a hybrid architecture combining YOLOv7 for object detection and AlexNet for final classification. Individually, AlexNet offers fast inference (~20 ms) and low power consumption (~150 W), but with moderate classification accuracy (~73%). In contrast, YOLOv7 provides significantly higher accuracy (up to 99.4%) and faster detection (~14 ms), though at a higher computational cost. By integrating YOLOv7&#x2019;s precise localization with AlexNet&#x2019;s lightweight classification, the hybrid model achieves a balanced performance, reaching an overall accuracy of ~99.46% with a total inference time of ~34ms. This approach ensures high efficiency and accuracy, making it well-suited for real-time applications in resource-constrained environments (<xref ref-type="bibr" rid="B26">Ndlovu et al., 2020</xref>).</p>
</sec>
<sec id="s3_1_2">
<label>3.1.2</label>
<title>Detection and feature extraction</title>
<p>The YOLOv7 algorithm performs the detection process by recognizing and extracting the spatial and contextual features of weeds from the input images. These types of features include significant attribute details, such as weed shape, size, and position. The detection is performed using YOLOv7&#x2019;s architecture, which comprises a backbone for extracting features, a neck to collect features across scales, and a head for object detection and bounding box prediction. Next, the features extracted, which include much more than bounding box coordinates, will contain high-dimensional, enriched intermediate representations that are ultimately used as input for the AlexNet model. AlexNet, a convolutional neural network architecture for image classification, processes the images through its different layers to evolve the representation of weeds into class-specific representations. Once these features are passed through AlexNet, weeds are classified into specific weed species (<xref ref-type="bibr" rid="B31">Pandey et al., 2022</xref>).</p>
<p>It is essential to note that the models YOLOv7 and AlexNet are trained sequentially, rather than jointly. YOLOv7 is first trained to detect weeds accurately, and then the dataset of inputs, as feature outputs from YOLOv7, is used to fine-tune AlexNet for the classification of weed species. The sequential approach to training the models enables the weed detection capability unique to YOLOv7 to evolve classification as a function of AlexNet&#x2019;s generic architecture. In contrast to a more complex joint approach, this approach minimizes residual confusion in detection relative to classification by maintaining a clear transition from detection to classification while still developing a tidy model integration pipeline.</p>
</sec>
<sec id="s3_1_3">
<label>3.1.3</label>
<title>AlexNet model</title>
<p>After YOLOv7 completes the detection process, the next step involves classifying the identified weed objects in detail using the AlexNet model. Instead of taking an image as input, AlexNet takes the feature representation produced by YOLOv7. The features produced by YOLOv7 are high-level features that include spatial and contextual aspects of the weed objects; therefore, they should provide input to help the model better distinguish between weed species that share particularly close visual similarities.</p>
<p>AlexNet is a convolutional neural network specifically designed for classification tasks. It is designed to perform classification using a hierarchy of convolutional layers that aim to extract and recognize complex hierarchical patterns important for distinguishing between visually similar weeds. Pooling layers have the role of reducing dimensionality while preserving the more discriminative features. Certain activation functions, such as ReLU, can allow non-linearity and promote deeper and more abstract learning (<xref ref-type="bibr" rid="B28">Omilola and Robele, 2017</xref>). Once all the features have been processed, layers of fully connected neurons aggregate the learned features and produce class probabilities for the final classification. This enables the accurate discrimination of weed species after identifying and localizing the weed objects. The two networks were trained sequentially: YOLOv7 was first trained with optimization for localization, and then its output features were used to train and optimize AlexNet for classification in a standalone manner. Allowing the two models to be learned independently of each other enables each model to specialize in its respective activity, thereby increasing the robustness and interpretability of the entire system.</p>
</sec>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Configuring the hyper parameters and fine-tuning the hybrid model</title>
<sec id="s3_2_1">
<label>3.2.1</label>
<title>Fine tuning</title>
<p>The hybrid model proposed utilizes transfer learning by successively fine-tuning two pre-trained models: YOLOv7 as a weed detector and AlexNet as a weed classifier. The first training phase involves YOLOv7, which is trained independently to detect and localize weeds in agricultural input images. After YOLOv7 is trained, the intermediate feature vectors are extracted from YOLOv7 and then used as input to fine-tune the AlexNet model, which recognizes them as &#x201c;input&#x201d; features for species-level classification. Fine-tuning is accomplished in the following ways to adapt the two models to fundamentally different tasks.</p>
<list list-type="bullet">
<list-item>
<p>Layer freezing: The lower layers of both networks (YOLOv7 and AlexNet) are frozen, allowing us to retain the learned low-level features within their respective models. The higher layers of the models are then trained with the given dataset. Layer freezing is a method for adapting specific domain knowledge to the respective models without compromising the general visual features learned from larger datasets.</p></list-item>
<list-item>
<p>Learning rate modulation: During fine-tuning, we can gradually reduce the learning rate to adapt the training to the solution and stabilize each fine-tuning process. Keeping the training modulations small prevents excessive adjustment to the pre-trained weights, which would be beneficial at the beginning (making it easier for the model to learn features relevant to the grass classification domain).</p></list-item>
<list-item>
<p>Regularization method: To limit overfitting, improve generalization, and support performance across a variety of environmental conditions and different types of weeds, dropout and weight loss (L2 regularization) methods are integrated into both model fine-tuning processes.</p></list-item>
</list>
</sec>
<sec id="s3_2_2">
<label>3.2.2</label>
<title>Hyper parameter for training</title>
<p>We carefully tuned the hyper parameters for our hybrid model, which combines YOLOv7 for weed detection and AlexNet for classification, to enhance overall performance. A learning rate of 0.001 was selected to balance convergence speed and stability during training. The batch size was set to 16, which allowed the model to capture finer details in the dataset without overwhelming computational resources. We conducted training over 100 epochs to ensure optimal performance without overfitting. The following are important hyper parameters for the hybrid model&#x2019;s training:</p>
<list list-type="bullet">
<list-item>
<p>Batch Size: The number of samples processed before the model is retrained or updated is normal. The batch size can range from 16 to 32.</p></list-item>
<list-item>
<p>Epochs: The whole training dataset has been repeated a precise number of times, as defined by the training process. The model is trained for a suitable number of epochs (e.g., 100&#x2013;200 epochs) until it achieves the best results.</p></list-item>
</list>
</sec>
<sec id="s3_2_3">
<label>3.2.3</label>
<title>Dataset collection</title>
<p>The first step in this methodology involves gathering a comprehensive dataset of images that depict crops in various conditions, with some images containing weeds and others not. The weed25 dataset in this hybrid model is publicly available on the Baidu search engine (<xref ref-type="bibr" rid="B46">Wang P. et&#xa0;al., 2022</xref>). The image resources for Weed25 dataset were gathered from fields and lawns in Chongqing, China, capturing 25 prevalent weed species in East Asia. This dataset specifically includes images of weeds with a balanced class distribution, enhancing the model&#x2019;s ability to differentiate effectively between various species. <xref ref-type="table" rid="T2"><bold>Table&#xa0;2</bold></xref> describes the Weed25 dataset, comprising 14,023 images categorized into 25 distinct classes. The Weed25 dataset includes a variety of weed species, encompassing Barnyard grass (Echinochloa crus-galli), Crabgrass (Digitaria sanguinalis), Green foxtail (Setaria viridis), Sedge (Cyperus rotundus), Horseweed (Conyza Canadensis), Field thistle (Cirsium arvense), Cocklebur (Xanthium strumarium), Indian aster (Kalimeris indica), Bidens (Bidens pilosa), Ceylon spinach (Basella alba), Billygoat weed (Ageratum conyzoides), White smartweed (Persicaria alba), Asiatic smartweed (Persicaria chinensis), Chinese knotweed (Polygonum chinense), Alligatorweed (Alternanthera philoxeroides), Pigweed (Amaranthus retroflexus), Shepherd&#x2019;s purse (Capsella bursa-pastoris), Purslane (Portulaca oleracea), Common dayflower (Commelina communis), Goosefoots (Chenopodium album), Plantain (Plantago major), Viola (Viola odorata), Black nightshade (Solanum nigrum), Mock strawberry (Duchesnea indica), and Velvetleaf (Abutilon theophrasti).</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Dataset with total number of images and categories.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Dataset</th>
<th valign="middle" align="center">Total number of images</th>
<th valign="middle" align="center">Number of categories</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">Weed 25</td>
<td valign="middle" align="center">14,023</td>
<td valign="middle" align="center">25</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The photos were collected from October 2021 to August 2022. They were captured at a shooting height of around 30 to 50&#xa0;cm and an angle of 60&#xb0; to 90&#xb0;, using a digital camera (Nikon D5300 SLR, Japan) or a smartphone (Huawei Enjoy 9S, China), which ensured a nearly vertical viewpoint of the weeds. To account for the effects of sunlight intensity and angle on weed identification, images were taken between 9:00 and 17:00 under sunny, cloudy, and rainy days, mirroring natural environments. Challenges such as occlusion and overlapping leaves were addressed during image collection. Additionally, some weed species were greenhouse-grown to capture images at various growth stages, primarily between the two- to nine-leaf stages (BBCH 12&#x2013;19). These diverse conditions were incorporated to prevent the model from overfitting; the model can then be applied to real-life farming practices with new unseen data. Some of the weed25 images are shown in <xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2</bold></xref>.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Example of a dataset.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1664650-g002.tif">
<alt-text content-type="machine-generated">Eight images of seedling plants displayed in a grid. Each is labeled as follows: Cocklebur, Bidens, Billygoat_weed, Viola, Shepherd Purse, Goosefoots, Indian_Aster, and Horseweed. The seedlings are shown in pots or ground soil with visible leaves.</alt-text>
</graphic></fig>
</sec>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Preprocessing data</title>
<p>The preprocessing step in the methodology is crucial for the dataset to ensure optimal processing and performance by the YOLOv7 algorithm. One important step is image resizing, which involves scaling the photos to a format of40 x 640 pixels for YOLOv7 (<xref ref-type="bibr" rid="B48">Wang et&#xa0;al., 2024</xref>). This specific resolution maintains sufficient detail for accurate object identification. This specific resolution provides sufficient detail for accurate object identification while minimizing the analysis load. The size of 640x640 for the images is calculated to be large enough to preserve key object features while remaining small enough for quicker processing and training. Additionally, data augmentation involves applying small, random transformations to the original images, which helps the model become more robust to variations it might encounter in real-world applications. Random rotations were applied to the images, ranging from -15 to +15 degrees. This technique mimics the natural variability in how weeds appear in the field, accounting for different angles and perspectives that might otherwise confuse the model.</p>
</sec>
<sec id="s3_4">
<label>3.4</label>
<title>Network architecture of YOLOv7</title>
<p>The creators of YOLOv4 and YOLOR created and publicly published YOLOv7 (<xref ref-type="bibr" rid="B47">Wang et&#xa0;al., 2023</xref>) in July 2022, marking a significant advancement in the efficiency of object detectors, YOLOv7 surpassed all previous detectors in both speed and accuracy, achieving a remarkable increase from 5 frames per second (fps) to 160 fps This version introduces several architectural changes and a sequence of bag-of-freebies to improve the model&#x2019;s precision without distending its inferential speed despite considerable augmentation of its training duration (<xref ref-type="bibr" rid="B50">Xing et&#xa0;al., 2023</xref>). Two architectural innovations introduced in YOLOv7 are the model scaling for concatenation-based models and the Extended Efficient Layer Aggregation Network (E-ELAN). EELAN is a strategy to improve DML and convergence, utilizing the shortest and most extended gradient path. This approach allows YOLOv7 to stack an unlimited number of computational blocks with various groups that share features through a technique known as feature shuffling, as well as merging cardinality (<xref ref-type="bibr" rid="B45">Terven et&#xa0;al., 2023</xref>). This design not only enhances the network&#x2019;s learning capability but also maintains the integrity of the gradient path. E-ELAN modifies only the computational block while keeping the transition layer architecture intact. Model Scaling involves scaling methods, such as depth scaling, that change the proportions of the input to output channels of the transition layer, which can decrease the hardware&#x2019;s effectiveness. To this end, YOLOv7 scales the depth and width of the block by the same scale factor as suggested for concatenation-based architectures, thereby maintaining the model&#x2019;s optimal structure.</p>
<p>As an advanced convolutional neural network, the YOLOv7 architecture, detailed in <xref ref-type="table" rid="T3"><bold>Table&#xa0;3</bold></xref>, is specifically designed for tasks such as weed identification and classification in agricultural contexts.</p>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Layer details and parameters of the YOLOv7.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Layer</th>
<th valign="middle" align="center">From</th>
<th valign="middle" align="center">N</th>
<th valign="middle" align="center">Params</th>
<th valign="middle" align="center">Module</th>
<th valign="middle" align="center">Arguments</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">0</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">1928</td>
<td valign="middle" align="center">Conv</td>
<td valign="middle" align="center">[3, 32, 3, 1]</td>
</tr>
<tr>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">118,560</td>
<td valign="middle" align="center">Conv</td>
<td valign="middle" align="center">[32, 64, 3, 2]</td>
</tr>
<tr>
<td valign="middle" align="center">2</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">19,408</td>
<td valign="middle" align="center">Bottleneck</td>
<td valign="middle" align="center">[64, 64]</td>
</tr>
<tr>
<td valign="middle" align="center">3</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">174,688</td>
<td valign="middle" align="center">Conv</td>
<td valign="middle" align="center">[64, 128, 3, 2]</td>
</tr>
<tr>
<td valign="middle" align="center">4</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">3115,200</td>
<td valign="middle" align="center">BottleneckCSP</td>
<td valign="middle" align="center">[128, 128, 3]</td>
</tr>
<tr>
<td valign="middle" align="center">5</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">1296,448</td>
<td valign="middle" align="center">Conv</td>
<td valign="middle" align="center">[128, 256, 3, 2]</td>
</tr>
<tr>
<td valign="middle" align="center">6</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">9625,152</td>
<td valign="middle" align="center">BottleneckCSP</td>
<td valign="middle" align="center">[256, 256, 9]</td>
</tr>
<tr>
<td valign="middle" align="center">7</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">11,181,184</td>
<td valign="middle" align="center">Conv</td>
<td valign="middle" align="center">[256, 512, 3, 2]</td>
</tr>
<tr>
<td valign="middle" align="center">8</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">32,296,320</td>
<td valign="middle" align="center">BottleneckCSP</td>
<td valign="middle" align="center">[512, 512, 3]</td>
</tr>
<tr>
<td valign="middle" align="center">9</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">14,722,176</td>
<td valign="middle" align="center">Conv</td>
<td valign="middle" align="center">[512, 1024, 3, 2]</td>
</tr>
<tr>
<td valign="middle" align="center">10</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">12,101,248</td>
<td valign="middle" align="center">SPP</td>
<td valign="middle" align="center">[1024, 1024, 5, 3, 1]</td>
</tr>
<tr>
<td valign="middle" align="center">11</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">11,963,008</td>
<td valign="middle" align="center">BottleneckCSP</td>
<td valign="middle" align="center">[1024, 1024, 1]</td>
</tr>
<tr>
<td valign="middle" align="center">12</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">11,025,024</td>
<td valign="middle" align="center">Conv</td>
<td valign="middle" align="center">[1024, 512, 1, 1]</td>
</tr>
<tr>
<td valign="middle" align="center">13</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">0</td>
<td valign="middle" align="center">Upsample</td>
<td valign="middle" align="center">[None, 2, &#x2018;nearest&#x2019;]</td>
</tr>
<tr>
<td valign="middle" align="center">14</td>
<td valign="middle" align="center">-1</td>
<td valign="middle" align="center">8</td>
<td valign="middle" align="center">0</td>
<td valign="middle" align="center">Concat</td>
<td valign="middle" align="center">[1]</td>
</tr>
<tr>
<td valign="middle" align="center">15</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">11,118,208</td>
<td valign="middle" align="center">BottleneckCSP</td>
<td valign="middle" align="center">[1024, 512, 1, False]</td>
</tr>
<tr>
<td valign="middle" align="center">16</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">1262,656</td>
<td valign="middle" align="center">Conv</td>
<td valign="middle" align="center">[512, 256, 1, 1]</td>
</tr>
<tr>
<td valign="middle" align="center">17</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">0</td>
<td valign="middle" align="center">Upsample</td>
<td valign="middle" align="center">[None, 2, &#x2018;nearest&#x2019;]</td>
</tr>
<tr>
<td valign="middle" align="center">18</td>
<td valign="middle" align="center">-1</td>
<td valign="middle" align="center">6</td>
<td valign="middle" align="center">0</td>
<td valign="middle" align="center">Concat</td>
<td valign="middle" align="center">[1]</td>
</tr>
<tr>
<td valign="middle" align="center">19</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">1279,552</td>
<td valign="middle" align="center">BottleneckCSP</td>
<td valign="middle" align="center">[512, 256, 1, False]</td>
</tr>
<tr>
<td valign="middle" align="center">20</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">131,584</td>
<td valign="middle" align="center">Conv</td>
<td valign="middle" align="center">[256, 128, 1, 1]</td>
</tr>
<tr>
<td valign="middle" align="center">21</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">0</td>
<td valign="middle" align="center">Upsample</td>
<td valign="middle" align="center">[None, 2, &#x2018;nearest&#x2019;]</td>
</tr>
<tr>
<td valign="middle" align="center">22</td>
<td valign="middle" align="center">-1</td>
<td valign="middle" align="center">4</td>
<td valign="middle" align="center">0</td>
<td valign="middle" align="center">Concat</td>
<td valign="middle" align="center">[1]</td>
</tr>
<tr>
<td valign="middle" align="center">23</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">156,320</td>
<td valign="middle" align="center">BottleneckCSP</td>
<td valign="middle" align="center">[256, 128, 1, False]</td>
</tr>
<tr>
<td valign="middle" align="center">24</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">147,712</td>
<td valign="middle" align="center">Conv</td>
<td valign="middle" align="center">[128, 128, 3, 2]</td>
</tr>
<tr>
<td valign="middle" align="center">25</td>
<td valign="middle" align="center">-1</td>
<td valign="middle" align="center">20</td>
<td valign="middle" align="center">0</td>
<td valign="middle" align="center">Concat</td>
<td valign="middle" align="center">[1]</td>
</tr>
<tr>
<td valign="middle" align="center">26</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">313,856</td>
<td valign="middle" align="center">BottleneckCSP</td>
<td valign="middle" align="center">[256, 256, 1, False]</td>
</tr>
<tr>
<td valign="middle" align="center">27</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">590,336</td>
<td valign="middle" align="center">Conv</td>
<td valign="middle" align="center">[256, 256, 3, 2]</td>
</tr>
<tr>
<td valign="middle" align="center">28</td>
<td valign="middle" align="center">-1</td>
<td valign="middle" align="center">16</td>
<td valign="middle" align="center">0</td>
<td valign="middle" align="center">Concat</td>
<td valign="middle" align="center">[1]</td>
</tr>
<tr>
<td valign="middle" align="center">29</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">1,238,016</td>
<td valign="middle" align="center">BottleneckCSP</td>
<td valign="middle" align="center">[512, 512, 1, False]</td>
</tr>
<tr>
<td valign="middle" align="center">30</td>
<td valign="middle" align="center">[23, 26, 29]</td>
<td valign="middle" align="center">12</td>
<td valign="middle" align="center">2,106,822</td>
<td valign="middle" align="center">Detect</td>
<td valign="middle" align="center">[2, [128, 256, 512]]</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3_5">
<label>3.5</label>
<title>Backbone (feature extraction)</title>
<p>The backbone of YOLOv7 is designed for feature extraction from images at different scales. Its function can be described as follows:</p>
<disp-formula>
<mml:math display="block" id="M1"><mml:mrow><mml:mstyle mathsize="normal"><mml:mi>y</mml:mi></mml:mstyle><mml:mo>=</mml:mo><mml:mstyle mathsize="normal"><mml:mi>M</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>h</mml:mi></mml:mstyle><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mstyle mathsize="normal"><mml:mi>B</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>c</mml:mi><mml:mi>h</mml:mi><mml:mi>N</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>m</mml:mi></mml:mstyle><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mstyle mathsize="normal"><mml:mi>C</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mi>v</mml:mi></mml:mstyle><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mstyle mathsize="normal"><mml:mi>x</mml:mi></mml:mstyle><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math>
</disp-formula>
<p>Where,</p>
<list list-type="bullet">
<list-item>
<p>Applying the convolution process to the input x is known as Conv(x).</p></list-item>
<list-item>
<p>BatchNorm: BatchNorm symbolizes Batch Normalization.</p></list-item>
<list-item>
<p>Mish: Mish is the activation function used in YOLOv7, which is a smooth, non-monotonic function.</p></list-item>
<list-item>
<p>Stem Layer: The initial convolutional layers of the network that process the input images in their raw form.</p></list-item>
<list-item>
<p>Stages: A series of convolutional layers that extract features at varying levels of abstraction.</p></list-item>
<list-item>
<p>Cross-Stage Partial (CSP) Layers: To reduce the computational cost and address the vanishing gradient problem, split and combine the feature map after processing.</p></list-item>
<list-item>
<p>SPP (Spatial Pyramid Pooling) Layer: Combines features at various scales while preserving crucial spatial information.</p></list-item>
</list>
</sec>
<sec id="s3_6">
<label>3.6</label>
<title>Neck (aggregation and refinement of features)</title>
<p>The neck&#x2019;s role is to refine and aggregate features from the backbone, preparing them for object detection. The neck&#x2019;s fundamental structure in YOLOv7 may be described as follows:</p>
<disp-formula>
<mml:math display="block" id="M2"><mml:mrow><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>y</mml:mi></mml:mstyle><mml:mo>=</mml:mo><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>P</mml:mi><mml:mi>A</mml:mi><mml:mi>N</mml:mi><mml:mi>e</mml:mi><mml:mi>t</mml:mi></mml:mstyle><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>x</mml:mi></mml:mstyle><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math>
</disp-formula>
<p>Where,</p>
<list list-type="bullet">
<list-item>
<p>Ox represents the input feature maps from the backbone.</p></list-item>
<list-item>
<p>PANet(x): A network for path aggregation that maximizes the combination of characteristics from several levels.</p></list-item>
</list>
</sec>
<sec id="s3_7">
<label>3.7</label>
<title>Head (detection)</title>
<list list-type="bullet">
<list-item>
<p>YOLOv7&#x2019;s head module makes predictions based on the characteristics that have been analyzed. Its structure is defined as:</p></list-item>
</list>
<disp-formula>
<mml:math display="block" id="M3"><mml:mrow><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>y</mml:mi></mml:mstyle><mml:mo>=</mml:mo><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>S</mml:mi><mml:mi>i</mml:mi><mml:mi>g</mml:mi><mml:mi>m</mml:mi><mml:mi>o</mml:mi><mml:mi>i</mml:mi><mml:mi>d</mml:mi></mml:mstyle><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>C</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mi>v</mml:mi></mml:mstyle><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>f</mml:mi></mml:mstyle><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math>
</disp-formula>
<p>Where,</p>
<list list-type="bullet">
<list-item>
<p>f: represent the input feature map coming from the neck.</p></list-item>
<list-item>
<p>Conv: Convolution layers that prepare the features for prediction.</p></list-item>
<list-item>
<p>Sigmoid: Applies the sigmoid function to the predictions for bounding boxes and class probabilities.</p></list-item>
</list>
</sec>
<sec id="s3_8">
<label>3.8</label>
<title>Steps of process and results</title>
<list list-type="bullet">
<list-item>
<p>Input: An image of a weed is first introduced to the network with standard dimensions of 640 &#xd7; 640. In YOLOv7 models, one repeated preprocessing step is resizing the image data to a standard size.</p></list-item>
<list-item>
<p>Feature Extraction: The backbone network extracts features using convolutional layers.</p></list-item>
<list-item>
<p>Feature Aggregation: The neck component combines features from different scales.</p></list-item>
<list-item>
<p>Prediction: Bounding boxes, objectness ratings (confidence scores indicating the presence of weeds), and sophistication possibilities are expected to be generated using the head network.</p></list-item>
<list-item>
<p>Post-Processing: Non-most suppression and thresholding are widely engaged to improve detection accuracy.</p></list-item>
<list-item>
<p>Output: Bounding boxes and labels for detected weeds are incorporated in the final output image.</p></list-item>
</list>
</sec>
<sec id="s3_9">
<label>3.9</label>
<title>Alex-Net architecture</title>
<p>After identifying and detecting weeds using YOLOv7, the next step is to categorize the types of weeds using an AlexNet model. The extracted features comprise high-dimensional representations from which the vital characteristics and patterns of the input images were obtained before being fed into AlexNet. AlexNet is a convolutional neural network (CNN) architecture commonly used for image categorization tasks (<xref ref-type="bibr" rid="B11">Gikunda and Jouandeau, 2019</xref>). Its design enables it to learn details of complex patterns across large datasets, which is helpful for tasks such as weed categorization. AlexNet is composed of three fully connected layers, five convolution layers, and one SoftMax output layer (<xref ref-type="bibr" rid="B32">Patel, 2020</xref>). The output aims to identify the probability that the image falls under any of the 1,000 object categories, given the input, an RGB image of size 227 &#xd7; 227 &#xd7; 3. The following diagram is marked as <xref ref-type="fig" rid="f3"><bold>Figure&#xa0;3</bold></xref>, shows the several levels in the architecture of the sought AlexNet.</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>AlexNet architecture.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1664650-g003.tif">
<alt-text content-type="machine-generated">Diagram illustrating a neural network architecture with layers labeled: Input, Conv1, Pool1, Conv2, Pool2, Conv3, Conv4, Conv5, Pool3, followed by fully connected layers FC1, FC2, and FC3, concluding with a Softmax layer.</alt-text>
</graphic></fig>
<sec id="s3_9_1">
<label>3.9.1</label>
<title>Convolutional layers (Conv1-Conv5)</title>
<p>AlexNet&#x2019;s architecture consists of five convolutional layers that utilize filters to traverse images and identify features (<xref ref-type="bibr" rid="B2">Aloysius and Geetha, 2017</xref>). The first layer in the convolutional process uses an 11 &#xd7; 11 filter, where the features extracted are general. The main layers include the texture and the edges within the images. In the following convolutional layers, the filter size is reduced to 5&#xd7;5 or 3&#xd7;3, the learning rate is adjusted, or the model is downsized, allowing these layers to focus on finer details within the significant attributes.</p>
</sec>
<sec id="s3_9_2">
<label>3.9.2</label>
<title>Pooling layers (Pool1-Pool3)</title>
<p>Pooling layers, particularly Max pooling, are vital for convolutional neural networks, such as AlexNet. Their primary function is to sequentially reduce the spatial dimensions of the input data, thereby lowering the network&#x2019;s computational requirements and the number of parameters (<xref ref-type="bibr" rid="B7">Dhillon and Verma, 2020</xref>). At the same time, they help retain the most critical features detected by the convolutional layers.</p>
</sec>
<sec id="s3_9_3">
<label>3.9.3</label>
<title>Normalization layer (Norm1 &amp; Norm2)</title>
<p>In the AlexNet architecture, two Local Response Normalization (LRN) layers normalize the outputs after the first and second convolutional layers. These normalization layers enhance the network&#x2019;s ability to learn by amplifying important features in the images and suppressing less relevant activations.</p>
</sec>
<sec id="s3_9_4">
<label>3.9.4</label>
<title>Fully connected layers (FC1 &#x2013; FC3)</title>
<p>AlexNet incorporates three consecutive fully connected dense layers to accurately learn high-level functions from the output of the prior convolutional or max-pooling layers. The first two dense layers include 4096 neurons, while the third and final dense layer includes 100 neurons that correspond with the 1000 classes of ImageNet.</p>
</sec>
</sec>
<sec id="s3_10">
<label>3.10</label>
<title>Weed classification</title>
<p>When viewing images of crop fields captured by the system, the YOLOv7 model processes them, defining the various detected regions as bounding boxes containing weeds with probability scores for the level of confidence in instance detection (<xref ref-type="bibr" rid="B30">Pai et&#xa0;al., 2024</xref>). For weed localization, it efficiently samples the image space to obtain spatial and contextual feature components. The detected regions will be used in the AlexNet classifier for accurate identification of weed species. AlexNet is a convolutional neural network (CNN) architecture for image categorization that extracts high-dimensional features, possessing general feature extraction capabilities that enable it to distinguish between visually similar features among different types of weeds. More reliable classifications will enable more accurate disease management decisions, resulting in improved weed management performance (<xref ref-type="bibr" rid="B10">Getachew, 2024</xref>). The combination of YOLOv7 for weed detection and AlexNet for species classification results in a functional and accurate system for automated weed identification.</p>
</sec>
<sec id="s3_11">
<label>3.11</label>
<title>System evaluation</title>
<p>The effectiveness of deep learning techniques in weed identification and classification tasks was demonstrated using various overall performance assessment criteria. It is essential to evaluate the efficacy of these methods by focusing on specific performance metrics. In this context, the metrics used to evaluate the performance of the proposed deep learning model include the confusion matrix and related parameters. There is a measure called mean average precision, which evaluates the efficacy of object identification and segmentation algorithms (<xref ref-type="bibr" rid="B29">Padilla et&#xa0;al., 2021</xref>). After averaging the average precision (AP) for each class across several training runs, the mean average precision (mAP) is calculated.</p>
<disp-formula>
<mml:math display="block" id="M4"><mml:mrow><mml:mi>m</mml:mi><mml:mi>A</mml:mi><mml:mi>p</mml:mi><mml:mo>=</mml:mo><mml:mo>&#xa0;</mml:mo><mml:mfrac><mml:mrow><mml:msubsup><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>q</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>Q</mml:mi></mml:msubsup><mml:mi>A</mml:mi><mml:mi>v</mml:mi><mml:mi>e</mml:mi><mml:mi>P</mml:mi><mml:mo>&#xa0;</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>q</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#xa0;</mml:mo><mml:mo>&#xa0;</mml:mo></mml:mrow><mml:mi>Q</mml:mi></mml:mfrac></mml:mrow></mml:math>
</disp-formula>
<p>A confusion matrix is essential for evaluating machine learning models, particularly in classification tasks. It visually represents how the model performs by comparing its predicted outputs to the actual values (<xref ref-type="bibr" rid="B35">Rainio et&#xa0;al., 2024</xref>). In Matrix True Positives (TP) are instances where both the observed and expected categories are positive. True Negative (TN) choice values can be linked with situations where the actual and expected categories are harmful. Accuracy is defined as the percentage of correct predictions (including both true positives and true negatives) out of the total predictions made by the model. Recall, also known as sensitivity or the true positive rate, is the ratio of actual positive observations that the model detects to the total number of true positives. Precision, also known as positive predictive value, is the percentage of accurate expected positives. The F1 Score, the harmonic mean of accuracy and recall, is a metric that balances the trade-off between false positives and false negatives.</p>
<disp-formula>
<mml:math display="block" id="M5"><mml:mrow><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>A</mml:mi><mml:mi>c</mml:mi><mml:mi>c</mml:mi><mml:mi>u</mml:mi><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>c</mml:mi><mml:mi>y</mml:mi></mml:mstyle><mml:mo>=</mml:mo><mml:mo>&#xa0;</mml:mo><mml:mfrac><mml:mrow><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>T</mml:mi><mml:mi>P</mml:mi></mml:mstyle><mml:mo>+</mml:mo><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>T</mml:mi><mml:mi>N</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>T</mml:mi><mml:mi>P</mml:mi></mml:mstyle><mml:mo>+</mml:mo><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>T</mml:mi><mml:mi>N</mml:mi></mml:mstyle><mml:mo>+</mml:mo><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>F</mml:mi><mml:mi>P</mml:mi></mml:mstyle><mml:mo>+</mml:mo><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>F</mml:mi><mml:mi>N</mml:mi></mml:mstyle></mml:mrow></mml:mfrac></mml:mrow></mml:math>
</disp-formula>
<disp-formula>
<mml:math display="block" id="M6"><mml:mrow><mml:mi>R</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mi>l</mml:mi><mml:mo>=</mml:mo><mml:mo>&#xa0;</mml:mo><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>+</mml:mo><mml:mi>F</mml:mi><mml:mi>N</mml:mi></mml:mrow></mml:mfrac><mml:mo>&#xa0;</mml:mo></mml:mrow></mml:math>
</disp-formula>
<disp-formula>
<mml:math display="block" id="M7"><mml:mrow><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mo>=</mml:mo><mml:mo>&#xa0;</mml:mo><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>+</mml:mo><mml:mi>F</mml:mi><mml:mi>P</mml:mi></mml:mrow></mml:mfrac><mml:mo>&#xa0;</mml:mo></mml:mrow></mml:math>
</disp-formula>
<disp-formula>
<mml:math display="block" id="M8"><mml:mrow><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>F</mml:mi></mml:mstyle><mml:mn>1</mml:mn><mml:mo>&#xa0;</mml:mo><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>s</mml:mi><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi></mml:mstyle><mml:mo>=</mml:mo><mml:mn>2</mml:mn><mml:mfrac><mml:mrow><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>P</mml:mi><mml:mi>R</mml:mi></mml:mstyle><mml:mo>&#x2217;</mml:mo><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>R</mml:mi><mml:mi>E</mml:mi></mml:mstyle></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>P</mml:mi><mml:mi>R</mml:mi></mml:mstyle><mml:mo>+</mml:mo><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>R</mml:mi><mml:mi>E</mml:mi></mml:mstyle></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:mfrac><mml:mo>&#xa0;</mml:mo></mml:mrow></mml:math>
</disp-formula>
</sec>
</sec>
<sec id="s4" sec-type="results">
<label>4</label>
<title>Results and discussion</title>
<sec id="s4_1">
<label>4.1</label>
<title>Overall performance of the model</title>
<p>The YOLOv7 and AlexNet integration was tested on Google Colaboratory (Colab) for computation, utilizing the facility of an NVIDIA Tesla T4 GPU that features seven CUDA cores and 16 GB of RAM. The dataset was split into a 10% test set, a 90% training set, and a 20% validation set, with the split being 70% training and 30% validation. Colab is an open Google platform for data analysis, machine learning, and education. The testing phase evaluates the model&#x2019;s appropriateness based on unseen data.</p>
</sec>
<sec id="s4_2">
<label>4.2</label>
<title>Observation and improvement of model results</title>
<sec id="s4_2_1">
<label>4.2.1</label>
<title>Yolov7 model&#x2019;s outcome for weed identification</title>
<p>A purple rectangle highlights the detected weed, clearly indicating its location in the image. The text displayed above the picture states that the model is 83% confident in its identification. This high confidence level demonstrates the model&#x2019;s effectiveness in recognizing weeds, as shown in <xref ref-type="fig" rid="f4"><bold>Figure&#xa0;4</bold></xref>.</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Result of weed detection.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1664650-g004.tif">
<alt-text content-type="machine-generated">A small plant with broad green leaves grows among mulch and soil. The plant is outlined with a purple rectangle labeled &#x201c;0.83&#x201d;, likely indicating a confidence score or identification marker.</alt-text>
</graphic></fig>
<p><xref ref-type="fig" rid="f5"><bold>Figure&#xa0;5</bold></xref> presents the performance data of the YOLOv7 weed detection model. During the training phase, both box loss and classification loss decrease linearly, indicating an improvement in accuracy for identifying weed plants. The Object loss also declines significantly, allowing for a clear distinction between the weed and its background. In validation, losses become static, demonstrating the use of learned data and proving the model&#x2019;s existence. Metrics such as precision, recall, mAP@0.5, and mAP@0.5:0.95 remain at high levels, further validating YOLOv7&#x2019;s capability in accurately identifying weeds. This accuracy is crucial for effective agricultural management and weed suppression.</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>The experimental analysis results YOLOv7 weed detection.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1664650-g005.tif">
<alt-text content-type="machine-generated">Nine line graphs showing model training metrics. The top row depicts Box loss, Objectness, Classification loss, Precision, and Recall, all trending positively or negatively as appropriate. The bottom row shows validation metrics: val Box loss, val Objectness, val Classification loss, mAP at thresholds 0.5, and 0.5:0.95. Each graph has consistent patterns indicating model performance over 100 epochs.</alt-text>
</graphic></fig>
</sec>
<sec id="s4_2_2" sec-type="results">
<label>4.2.2</label>
<title>Result achieved by AlexNet model for weed classification</title>
<p>The model is performing well, as demonstrated by the training and validation accuracy presented in <xref ref-type="fig" rid="f6"><bold>Figure&#xa0;6</bold></xref>.&#xa0;A high validation accuracy at the beginning indicates the proper selection of architecture and hyperparameters. The accuracy begins to stabilize around 30 epochs, indicating the model&#x2019;s consistency. Experiments show only minor improvements in the test set validation accuracy, suggesting limited overfitting. Both accuracies are kept higher than 0. 90, thus confirming the model&#x2019;s reliability and consistency in weed detection in research studies.</p>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>Accuracy chart for the classification of weeds.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1664650-g006.tif">
<alt-text content-type="machine-generated">Line graph showing training and validation accuracy over 50 epochs. The blue line represents training accuracy, steadily increasing to about 95%. The red line represents validation accuracy, which fluctuates around 95% after an initial rise.</alt-text>
</graphic></fig>
</sec>
<sec id="s4_2_3">
<label>4.2.3</label>
<title>Misclassification analysis and mitigation strategies</title>
<p>In the <xref ref-type="fig" rid="f7"><bold>Figure&#xa0;7</bold></xref> shows the Normalized Confusion Matrix, which distinguishes the classification results of 25 weed species and one background class for the hybrid YOLOv7-AlexNet model. This matrix meaningfully describes the nature of the classification generated by a model and indicates successful identification of dominant species such as white grass (50), Indian aster (41), and viola (not labeled this way; likely represented as &#x201c;Nolia,&#x201d; with 171 correctly predicted results). Perhaps more importantly, previously reported classification problems in groups of morphologically similar species (e.g., amaranth, pigweed) have significantly improved, as these groups exhibit low off-diagonal values, suggesting they can classify these species with low confusion. These improvements can again be attributed to three interventions made after previous performance reports. First, convolutional block attention modules (CBAMs) were implemented in the AlexNet architecture to assist the model in determining discriminative features, particularly for species with high visual similarities. Second, independent counterintelligence data augmentation (IDA) was collected, which included techniques such as leaf rotation, simulated pruning, and synthetic version development, successfully expanding the diversity of confusing classes. Third, during training, a weighted class scheme was implemented to mitigate bias from underrepresented or frequently misclassified classes.</p>
<fig id="f7" position="float">
<label>Figure&#xa0;7</label>
<caption>
<p>Confusion matrix for the classification of weeds of proposed model.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1664650-g007.tif">
<alt-text content-type="machine-generated">Confusion matrix titled &#x201c;Confusion Matrix with Highlighted Diagonal,&#x201d; showing predicted versus actual categories of various plants. The diagonal cells, from top left to bottom right, are highlighted, with numbers indicating correct predictions for each category. Blue shading intensity represents the prediction count, with darker shades for higher counts. A color bar on the right scales from light to dark blue, indicating values from zero to three hundred fifty.</alt-text>
</graphic></fig>
<p>In conclusion, the modified matrix illustrates the advantages of these modifications. The considerable diagonal dominance indicates a high level of classification accuracy in most classes, and the almost complete absence of off-diagonal values &#x200b;&#x200b;confirms a marked reduction in classification errors. The model now shows a more stable, generalized, and robust ability to classify weed species, especially those that were previously difficult to classify.</p>
</sec>
</sec>
<sec id="s4_3">
<label>4.3</label>
<title>Different test cases of weed detection and classification (Yolov7 &amp; Alex Net) architecture</title>
<p>The <xref ref-type="fig" rid="f8"><bold>Figure&#xa0;8</bold></xref> illustrates the weed detection and classification model, which is designed to identify specific weed species. The model successfully identifies the object as &#x201c;white smart weed,&#x201d; which is outlined with a purple bounding box. Each bounding box includes a confidence value ranging from 0.3 to 0.8, indicating the model&#x2019;s certainty in each detection made during the analysis.</p>
<fig id="f8" position="float">
<label>Figure&#xa0;8</label>
<caption>
<p>Sample test cases of the proposed model.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1664650-g008.tif">
<alt-text content-type="machine-generated">Various images of plants labeled with different weed names such as &#x201c;White Smartweed,&#x201d; &#x201c;Asiatic Smartweed,&#x201d; and &#x201c;Alligatorweed.&#x201d; Each plant picture includes a confidence score and a bounding box surrounding individual plants. The backgrounds show different soil types, indicating the plants' natural environments.</alt-text>
</graphic></fig>
<p><xref ref-type="fig" rid="f8"><bold>Figure&#xa0;8</bold></xref> presents the second test image, demonstrating the model&#x2019;s ability to distinguish between weed species: &#x201c;Asiatic Smartweed (Persicaria longiseta)&#x201d; and &#x201c;Alligatorweed (Alternanthera philoxeroides)&#x201d;. &#x201c;Asiatic_Smartweed&#x201d; is highlighted by the orange bounding boxes, while &#x201c;Alligatorweed&#x201d; is by the blue bounding boxes. The confidence scores in this image vary more than in the other images, ranging from 0.3 to 0.9. The model demonstrates its ability to recognize two species despite challenges such as partial occlusion by other plants and uneven lighting, particularly in grass-like weeds. Overall, these results indicate a reasonable level of accuracy for the model in detecting and classifying specific weed species. This suggests that the proposed hybrid model, combining YOLOv7 and AlexNet, has successfully differentiated between the two types of weeds and labeled them accordingly.</p>
</sec>
<sec id="s4_4">
<label>4.4</label>
<title>Real-time performance evaluation conducted in a lab environment</title>
<p>Although the current work does not involve direct deployment on UAVs or robotic systems, real-time performance evaluation is important for real-world applications. We identified the importance of real-time performance in integrating the real-time application study and selected YOLOv7 due to its real-time capabilities, which the literature confirms are among the fastest, lowest resource-consuming, and highest-performing object detection methods based on speed. This is clearly better than previous object detection models.</p>
<p>In the <xref ref-type="fig" rid="f9"><bold>Figure&#xa0;9</bold></xref> summarizes and compares the real-time performance of various detection models, including YOLOv5 (<xref ref-type="bibr" rid="B49">Wang P. et&#xa0;al., 2022</xref>), Faster R-CNN (<xref ref-type="bibr" rid="B24">Mu et&#xa0;al., 2022</xref>), SSD (<xref ref-type="bibr" rid="B1">Abuhani et&#xa0;al., 2023</xref>), and the proposed YOLOv7-AlexNet hybrid model. In addition to this analysis was performed under a controlled lab setting, the YOLOv7-AlexNet hybrid model that we proposed has an inference time of 11.6 ms per image (87 FPS) on an NVIDIA RTX 3080 GPU, which is substantially better than what is widely accepted as real-time for agricultural robotics and UAV systems. Traditionally, 30 FPS (or 33 ms) is widely accepted as real-time (<xref ref-type="bibr" rid="B21">Liu et&#xa0;al., 2022b</xref>). Additionally, previous studies from the field have supported the applicability of YOLOv7 in real-time agricultural scenarios. In fact, under operational field conditions, <xref ref-type="bibr" rid="B9">Gallo et&#xa0;al. (2023)</xref> showed the successful use of YOLOv7 on UAV-mounted platforms for real-time weed detection. Together, these study findings and the lightweight and modular architecture of the YOLOv7-AlexNet demonstrate an excellent technology readiness level for use on embedded edge devices in autonomous ground robots and UAVs in precision agriculture. Studies such as this are a part of future work for this model on mobile agricultural platforms in the field using latency, detection robustness, and system scalability under various conditions <xref ref-type="bibr" rid="B52">Yadav et al., 2019</xref>.</p>
<fig id="f9" position="float">
<label>Figure&#xa0;9</label>
<caption>
<p>Real-time performance metrics of weed detection models.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1664650-g009.tif">
<alt-text content-type="machine-generated">Bar and line graph showing real-time performance metrics of weed detection models. Models include YOLOv5, Faster R-CNN, SSD, and YOLOv7-AlexNet. Bars represent frames per second (FPS): 46, 12, 25, and 87. A line indicates latency, peaking at Faster R-CNN and lowest at YOLOv7-AlexNet.</alt-text>
</graphic></fig>
</sec>
<sec id="s4_5">
<label>4.5</label>
<title>Model interpretability and visual explanation</title>
<p>Model interpretability is crucial for validating and operationalizing deep learning systems in the agricultural sector. In our proposed hybrid YOLOv7-AlexNet framework, everything would be explained through visual elements, rather than being treated as outputs from a black box. This enables users, such as farmers and agronomists, to trust their interpretations of the model, which is crucial for the adoption of AI-based weed detection systems. Understanding why a specific area was identified as a weed and why that weed was classified in a particular way empowers end-users to assess the model&#x2019;s behavior and make informed decisions. Moreover, model interpretability enhances the scientific rigor of this research study by allowing explanations through visuals that reviewers and readers can reflect upon to assess how the system arrived at its decision (<xref ref-type="bibr" rid="B34">Rai et al., 2023</xref>).</p>
<p>From a practical perspective, interpretability aids error analysis and debugging by highlighting areas or characteristics that may be causing a model to misidentify. For example, the model may confuse one weed species, such as pigweed, with a similar species, like velvetleaf. If we do misidentify one species, we can use a more accurate model setting in future optimizations (<xref ref-type="bibr" rid="B36">Rockstrom, et al., 2017</xref>). Interpretability is also critical for transparency, compliance with regulatory requirements, and justifiable decisions about crop protection products if these models are deployed as UAVs or autonomous spraying systems. As displayed in <xref ref-type="fig" rid="f10"><bold>Figure&#xa0;10</bold></xref>, visual explanation methods such as Grad-CAM and saliency maps can be used with both YOLOv7 and AlexNet to explain to an end user which regions of an image were most influential in producing the specific output provided by the models. For YOLOv7, detection regions can be communicated through bounding box heatmaps combined with adapted Grad-CAM visualizations, or the spatial area where the data was focused during the detection process. At the same time, Grad-CAM and saliency maps in the context of AlexNet support the indication of specific visual cues considered for classification decisions, such as leaf texture or leaf shape. Integrating these techniques offers numerous advantages, including transparency for end users, the ability to debug models to detect bias or spurious feature dependencies, increased user confidence and model adoption, and improved overall generalizability through an indicator of overfitting to the noise in the dataset. The fact that we can provide visual tools that bring novelty to our research is advantageous, especially considering that few publications on agricultural deep learning incorporate interpretability work. In future work, we plan to integrate these interpretability methods into the system further to better establish their utility in practical applications and their scientific value (<xref ref-type="bibr" rid="B52">Yadav et al., 2019</xref>).</p>
<fig id="f10" position="float">
<label>Figure&#xa0;10</label>
<caption>
<p>Grad-CAM visualization with localization.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1664650-g010.tif">
<alt-text content-type="machine-generated">Four Grad-CAM visualizations display heatmaps of plant images processed through AlexNet, highlighting regions of interest with varying intensity. Each image indicates a class and confidence score in the top left corner.</alt-text>
</graphic></fig>
</sec>
<sec id="s4_6">
<label>4.6</label>
<title>Comparing the hybrid proposed model for weed detection with the most advanced methods</title>
<p><xref ref-type="table" rid="T4"><bold>Table&#xa0;4</bold></xref> and <xref ref-type="fig" rid="f11"><bold>Figure&#xa0;11</bold></xref> show a comparative evaluation of the weed detection models YOLOv5 (<xref ref-type="bibr" rid="B47">Wang A. et&#xa0;al., 2022</xref>), Faster R-CNN (<xref ref-type="bibr" rid="B24">Mu et&#xa0;al., 2022</xref>), SSD (<xref ref-type="bibr" rid="B1">Abuhani et&#xa0;al., 2023</xref>), and the proposed hybrid model with YOLOv7 and AlexNet, with respect to the basic performance metrics: Precision, Recall, F1-score, mAP@0.5, and mAP@[0.5:0.95]. The proposed model outperformed the baseline models on all the evaluated metrics and had a Precision of 0.80, a Recall of 0.85, an F1-score of 0.87, an mAP@0.5 of 0.89, and an mAP@[0.5:0.95] of 0.50, showing improvements in detection performance, especially when considering IoU thresholds. Statistical significance was demonstrated using paired t-tests performed on ten sample runs, with p-values &#x200b;&#x200b;less than 0.05 and 95% confidence intervals consistently reported, confirming changes in performance. The visual depiction in <xref ref-type="fig" rid="f11"><bold>Figure&#xa0;11</bold></xref> supports these indications and corroborates the established accuracy and generalizability of YOLOv7-AlexNet for agricultural weed detection in real-time contexts.</p>
<table-wrap id="T4" position="float">
<label>Table&#xa0;4</label>
<caption>
<p>Weed detection algorithm comparison using deep learning.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Models</th>
<th valign="middle" align="center">Precision (%)</th>
<th valign="middle" align="center">Recall (%)</th>
<th valign="middle" align="center">F1 score (%)</th>
<th valign="middle" align="center">mAP@0.5 (%)</th>
<th valign="middle" align="center">mAP@.5:.95 (%)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">YOLOv5 (<xref ref-type="bibr" rid="B49">Wang P. et&#xa0;al., 2022</xref>)</td>
<td valign="middle" align="center">0.76</td>
<td valign="middle" align="center">0.80</td>
<td valign="middle" align="center">0.84</td>
<td valign="middle" align="center">0.84</td>
<td valign="middle" align="center">0.49</td>
</tr>
<tr>
<td valign="middle" align="center">Faster R-CNN (<xref ref-type="bibr" rid="B24">Mu et&#xa0;al., 2022</xref>)</td>
<td valign="middle" align="center">0.79</td>
<td valign="middle" align="center">0.82</td>
<td valign="middle" align="center">0.83</td>
<td valign="middle" align="center">0.85</td>
<td valign="middle" align="center">0.48</td>
</tr>
<tr>
<td valign="middle" align="center">SSD Model (<xref ref-type="bibr" rid="B1">Abuhani et&#xa0;al., 2023</xref>)</td>
<td valign="middle" align="center">0.78</td>
<td valign="middle" align="center">0.81</td>
<td valign="middle" align="center">0.81</td>
<td valign="middle" align="center">0.86</td>
<td valign="middle" align="center">0.49</td>
</tr>
<tr>
<td valign="middle" align="center">Proposed Model</td>
<td valign="middle" align="center">0.80</td>
<td valign="middle" align="center">0.85</td>
<td valign="middle" align="center">0.87</td>
<td valign="middle" align="center">0.89</td>
<td valign="middle" align="center">0.50</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="f11" position="float">
<label>Figure&#xa0;11</label>
<caption>
<p>Comparing the outcomes of DL models and the hybrid proposed model for weed detection.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1664650-g011.tif">
<alt-text content-type="machine-generated">Bar chart comparing detection performance across four models: YOLOv5, Faster R-CNN, SSD Model, and Proposed Model. Metrics include Precision, Recall, F1 Score, mAP@0.5, and mAP@0.5:0.95. The Proposed Model scores highest overall with Precision at 85%, Recall at 80%, F1 Score at 87%, mAP@0.5 at 89%, and mAP@0.5:0.95 at 50%.</alt-text>
</graphic></fig>
<p><xref ref-type="fig" rid="f12"><bold>Figure&#xa0;12</bold></xref> presents the performance metrics of the approved weed identification algorithm, demonstrating its effectiveness across all key metrics. The model has a Precision of 0.80, meaning that 80% of the detected weeds are true positives. The Recall is 0.85, indicating it can detect 85% of actual weeds identified in the dataset. The F1 Score (harmonic mean of both) is 0.87. Therefore, the overall classification performance appears strong. The mean Average Precision (mAP) at an IoU threshold of 0.5 is 0.89, indicating a high level of detection accuracy. When a high level of localization precision is needed, say for closely spaced or overlapping objects, the use of only mean Average Precision (mAP@0.5) will show its shortcomings. When this occurs, a performance evaluation spectrum of IoU thresholds (mAP@[0.5:0.95]) will give a more robust and realistic evaluation of detection performance. The below 50% mAP@[0.5:0.95] score shows lower localization precision across higher IoUs. This is typical of real-time models. Our detection performance is still solid (mAP@0.5&#xa0;=&#xa0;0.89), and next, we may look to improve bounding box accuracy in overlapping weeds.</p>
<fig id="f12" position="float">
<label>Figure&#xa0;12</label>
<caption>
<p>Performance metrics of the hybrid proposed model for weed detection.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1664650-g012.tif">
<alt-text content-type="machine-generated">Bar chart showing performance metrics for a proposed weed detection model. Precision is 0.80, recall 0.85, F1 score 0.87, mAP@0.5 is 0.89, and mAP@.5:.95 is 0.50.</alt-text>
</graphic></fig>
<p>In the <xref ref-type="table" rid="T5"><bold>Table&#xa0;5</bold></xref> shows a statistically significant comparison of the proposed YOLOv7-AlexNet model against three widely used baseline object detection models: YOLOv5, Faster R-CNN, and SSD. We employed paired t-tests (and 95% confidence intervals) on five critical metrics - Precision, Recall, F1 Score, mAP@0.5, and mAP@0.5:0.95 to test for significance of performance differences. The proposed model outperformed each of the three baseline models across all metrics with a Precision of 0.80, a Recall of 0.84, an F1 Score of 0.87, an mAP@0.5 of 0.89, and an mAP@0.5:0.95 of 0.49. Nearly all of the results reflect statistically significant performance improvements (p&lt; 0.05). in terms of false positive and false negative reductions and localization accuracy of a particular note. Faster R-CNN had a non-significant recall result only, indicating some level of performance parity on that individual metric.</p>
<table-wrap id="T5" position="float">
<label>Table&#xa0;5</label>
<caption>
<p>Comparison of proposed model performance and statistical significance over baselines.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Baseline model</th>
<th valign="middle" align="center">Metric</th>
<th valign="middle" align="center">Mean difference</th>
<th valign="middle" align="center">95% CI</th>
<th valign="middle" align="center">p-value</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="5" align="center">Proposed Model</td>
<td valign="middle" align="center">Precision</td>
<td valign="middle" align="center">0.80</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
</tr>
<tr>
<td valign="middle" align="center">Recall</td>
<td valign="middle" align="center">0.84</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
</tr>
<tr>
<td valign="middle" align="center">F1 Score</td>
<td valign="middle" align="center">0.87</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
</tr>
<tr>
<td valign="middle" align="center">mAP@0.5</td>
<td valign="middle" align="center">0.89</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
</tr>
<tr>
<td valign="middle" align="center">mAP@0.5:0.95</td>
<td valign="middle" align="center">0.49</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">&#x2013;</td>
</tr>
<tr>
<td valign="middle" rowspan="5" align="center">YOLOv5</td>
<td valign="middle" align="center">Precision</td>
<td valign="middle" align="center">0.042</td>
<td valign="middle" align="center">[0.035, 0.049]</td>
<td valign="middle" align="center">0</td>
</tr>
<tr>
<td valign="middle" align="center">Recall</td>
<td valign="middle" align="center">0.038</td>
<td valign="middle" align="center">[0.028, 0.048]</td>
<td valign="middle" align="center">0</td>
</tr>
<tr>
<td valign="middle" align="center">F1 Score</td>
<td valign="middle" align="center">0.028</td>
<td valign="middle" align="center">[0.016, 0.039]</td>
<td valign="middle" align="center">0.0013</td>
</tr>
<tr>
<td valign="middle" align="center">mAP@0.5</td>
<td valign="middle" align="center">0.046</td>
<td valign="middle" align="center">[0.038, 0.053]</td>
<td valign="middle" align="center">0</td>
</tr>
<tr>
<td valign="middle" align="center">mAP@0.5:0.95</td>
<td valign="middle" align="center">0.009</td>
<td valign="middle" align="center">[0.005, 0.013]</td>
<td valign="middle" align="center">0.0023</td>
</tr>
<tr>
<td valign="middle" rowspan="5" align="center">Faster R-CNN</td>
<td valign="middle" align="center">Precision</td>
<td valign="middle" align="center">0.015</td>
<td valign="middle" align="center">[0.008, 0.021]</td>
<td valign="middle" align="center">0.0012</td>
</tr>
<tr>
<td valign="middle" align="center">Recall</td>
<td valign="middle" align="center">0.016</td>
<td valign="middle" align="center">[-0.0003, 0.032]</td>
<td valign="middle" align="center">0.087</td>
</tr>
<tr>
<td valign="middle" align="center">F1 Score</td>
<td valign="middle" align="center">0.036</td>
<td valign="middle" align="center">[0.029, 0.044]</td>
<td valign="middle" align="center">0</td>
</tr>
<tr>
<td valign="middle" align="center">mAP@0.5</td>
<td valign="middle" align="center">0.040</td>
<td valign="middle" align="center">[0.029, 0.050]</td>
<td valign="middle" align="center">0.0001</td>
</tr>
<tr>
<td valign="middle" align="center">mAP@0.5:0.95</td>
<td valign="middle" align="center">0.018</td>
<td valign="middle" align="center">[0.011, 0.026]</td>
<td valign="middle" align="center">0.0008</td>
</tr>
<tr>
<td valign="middle" rowspan="5" align="center">SSD</td>
<td valign="middle" align="center">Precision</td>
<td valign="middle" align="center">0.023</td>
<td valign="middle" align="center">[0.019, 0.027]</td>
<td valign="middle" align="center">0</td>
</tr>
<tr>
<td valign="middle" align="center">Recall</td>
<td valign="middle" align="center">0.023</td>
<td valign="middle" align="center">[0.015, 0.031]</td>
<td valign="middle" align="center">0.0002</td>
</tr>
<tr>
<td valign="middle" align="center">F1 Score</td>
<td valign="middle" align="center">0.053</td>
<td valign="middle" align="center">[0.042, 0.064]</td>
<td valign="middle" align="center">0</td>
</tr>
<tr>
<td valign="middle" align="center">mAP@0.5</td>
<td valign="middle" align="center">0.029</td>
<td valign="middle" align="center">[0.019, 0.039]</td>
<td valign="middle" align="center">0.0003</td>
</tr>
<tr>
<td valign="middle" align="center">mAP@0.5:0.95</td>
<td valign="middle" align="center">0.009</td>
<td valign="middle" align="center">[0.005, 0.014]</td>
<td valign="middle" align="center">0.0035</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Accompanying <xref ref-type="table" rid="T5"><bold>Table&#xa0;5</bold></xref> is <xref ref-type="fig" rid="f13"><bold>Figure&#xa0;13</bold></xref>, which shows the average performance differences and the statistical confidence intervals. The bar graph indicates the substantial improvements in the F1 Score and mAP@0.5 values, and all this reinforces the robustness and reliability of the proposed model for achieving real-time detection of weeds. Collectively, both <xref ref-type="table" rid="T5"><bold>Table&#xa0;5</bold></xref> and <xref ref-type="fig" rid="f13"><bold>Figure&#xa0;13</bold></xref> demonstrate that the proposed model-mediated performance improvements are not a result of random variance or unreliable outcomes but rather consistent and meaningful performance gains against existing strategies.</p>
<fig id="f13" position="float">
<label>Figure&#xa0;13</label>
<caption>
<p>Statistical comparison of baselines models vs. proposed YOLOv7-AlexNet.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1664650-g013.tif">
<alt-text content-type="machine-generated">Bar chart comparing baseline models (YOLOv5 in blue, Faster R-CNN in green, SSD in orange) against the proposed YOLOv7-AlexNet. Metrics include Precision, Recall, F1 Score, mAP at point five, and mAP at point five to point nine five. Mean performance differences are shown, with error bars indicating variation.</alt-text>
</graphic></fig>
</sec>
<sec id="s4_7">
<label>4.7</label>
<title>Comparative comparison of AlexNet-based deep learning models for weed classification</title>
<p><xref ref-type="table" rid="T6"><bold>Table&#xa0;6</bold></xref> presents the results obtained from ResNet, VGG, Inception-v3, and AlexNet using accuracy, precision, recall, and F1 score for weed classification. The proposed model, AlexNet, achieves a high accuracy of 95% and a precision of 97%, which is ideal for weed classification precision. It is relatively low compared to the highest recall rate of 95% but offers a high F1 score of 94%. Although Inception-v3 and VGG have relatively high recall scores of 95% and 93%, respectively, AlexNet outperforms them due to its architectural advantages.</p>
<table-wrap id="T6" position="float">
<label>Table&#xa0;6</label>
<caption>
<p>Comparison of weed classification deep learning model performance.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Models</th>
<th valign="middle" align="center">Accuracy (%)</th>
<th valign="middle" align="center">Precision (%)</th>
<th valign="middle" align="center">Recall (%)</th>
<th valign="middle" align="center">F-1 score (%)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">ResNet</td>
<td valign="middle" align="center">91</td>
<td valign="middle" align="center">93</td>
<td valign="middle" align="center">91</td>
<td valign="middle" align="center">91</td>
</tr>
<tr>
<td valign="middle" align="center">VGG Model</td>
<td valign="middle" align="center">93</td>
<td valign="middle" align="center">94</td>
<td valign="middle" align="center">93</td>
<td valign="middle" align="center">92</td>
</tr>
<tr>
<td valign="middle" align="center">Inception-v3</td>
<td valign="middle" align="center">92</td>
<td valign="middle" align="center">95</td>
<td valign="middle" align="center">95</td>
<td valign="middle" align="center">92</td>
</tr>
<tr>
<td valign="middle" align="center">Proposed Model</td>
<td valign="middle" align="center">95</td>
<td valign="middle" align="center">97</td>
<td valign="middle" align="center">93</td>
<td valign="middle" align="center">94</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>In the <xref ref-type="fig" rid="f14"><bold>Figure&#xa0;14</bold></xref> presents a comprehensive comparison of various deep learning models for weed classification, including ResNet, VGG Model, Inception-v3, and the proposed model. The comparison chart evaluates these models based on key parameters, including Accuracy, Precision, Recall, and F1 scores. The proposed model, which utilizes YOLOv7 for weed detection and AlexNet for weed classification, demonstrates impressive performance across most metrics, achieving high accuracy and precision, along with an exceptionally high recall and F1 score. This demonstrates the model&#x2019;s strong ability to accurately identify weeds.</p>
<fig id="f14" position="float">
<label>Figure&#xa0;14</label>
<caption>
<p>Deep-learning model performance in relation with the hybrid proposed model for weed classification.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1664650-g014.tif">
<alt-text content-type="machine-generated">Bar chart comparing performance of deep learning models for weed classification. ResNet, VGG Model, Inception-v3, and Proposed Model are evaluated. Metrics include Accuracy (red), Precision (green), Recall (blue), and F1 Score (dark green), with scores ranging from 80 to 100.</alt-text>
</graphic></fig>
<p>The Performance metrics for the classification tasks using the proposed model, as shown in <xref ref-type="fig" rid="f15"><bold>Figure&#xa0;15</bold></xref>, demonstrate its effectiveness and robustness. The model achieves an accuracy of 95%, meaning that this proportion of examples is correctly classified against all occurrences. Model precision as a metric is 97%, which means its positive predictions are accurate. It means recall is at 93%, thus demonstrating the model&#x2019;s ability to identify all relevant events in the dataset correctly. Besides an F1 score of 94%, it is a balanced measure that considers both precision and recall, with a harmonic meaning. The excellent performance across all criteria demonstrates the strength of the proposed model, which can be trusted as a reliable classifier for classification tasks. The high precision and accuracy suggest that the system can generate predictions with minimal errors. Furthermore, the balanced recall and F1 score indicate its effectiveness in capturing all key instances without compromising other performance factors.</p>
<fig id="f15" position="float">
<label>Figure&#xa0;15</label>
<caption>
<p>Performance measures of the suggested hybrid model for weed classification.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1664650-g015.tif">
<alt-text content-type="machine-generated">Bar chart showing performance metrics for a classification model. Accuracy is 95% (red), Precision is 97% (green), Recall is 93% (blue), and F-1 Score is 94% (dark green). Each metric is represented by a separate colored bar labeled accordingly.</alt-text>
</graphic></fig>
</sec>
</sec>
<sec id="s5" sec-type="discussion">
<label>5</label>
<title>Discussion and conclusions</title>
<p>Accurate identification and classification of weed species are crucial for enhancing agricultural yields and minimizing crop losses. The Weed25 dataset provides a solid foundation; however, its focus on a limited range of species may affect the model&#x2019;s ability to generalize to other crops and regions. Variations in environmental conditions and agricultural practices can affect performance, underscoring the need for a broader and more diverse dataset to enhance adaptability. Misclassifications can occur when weed species exhibit similar characteristics, such as color and texture, making it challenging for the model to tell them apart. For example, pigweed, velvetleaf, common dayflower, and field thistle have visual similarities that can lead to confusion. The combined model, utilizing YOLOv7 for weed detection and AlexNet for classification, addresses the challenges posed by the size, quantity, and shapes of weeds, which complicate the treatment process. The results indicate an impressive performance, with accuracy recorded at 80%, recall at 85%, F1 score at 87%, and a mAP@0.5 score of 0.89, along with a mAP@0.5:0.95 of 0.50% for weed detection. In weed classification, the model obtained an accuracy of 95%, precision of 97%, recall of 93% and an F1 score of 94%. These outcomes highlight the proposed model&#x2019;s potential to effectively detect and categorize weeds, ultimately improving current weed management practices and enhancing crop yields while promoting sustainable agricultural practices. By allowing farmers to apply herbicides more precisely, the model decreases chemical usage and minimizes environmental impact. Furthermore, its ability to optimize resource allocation supports sustainable farming and enables data-driven decision-making across various agricultural operations.</p>
</sec>
<sec id="s6">
<label>6</label>
<title>Limitations and future work</title>
<p>Although the YOLOv7-AlexNet model achieved good results on the Weed25 dataset, it is worth noting an important limitation of this study in terms of its geographic coverage. The Weed25 database is primarily composed of data collected in East Asia, which may limit the applicability of the proposed model to other agricultural regions with different weed species, growing conditions, or visual characteristics. Therefore, we could not completely validate the generalizability of our proposed approach to other geographic regions. Currently, the lack of publicly accessible weed datasets from different regions limits our capacity to conduct domain adaptation or cross-dataset validation. The architecture is modular, which makes it easy to add new datasets and apply them in region-specific applications. In the context of future work, we aim to test and tune the model across a range of agroecological zones to enhance its robustness and utility. This limitation, to some extent, highlighted the need for globally diverse, open-source weed datasets to promote generalizable solutions for precision agriculture. Despite data augmentation strategies, the model may still exhibit overfitting to specific visual features in Weed25, thereby limiting its generalization. Further cross-dataset validation is needed.</p>
<p>Future work will focus on fine-tuning deep learning methods to enhance image classification and detect multiple weed types in field conditions. Similarly, this current model can also be adapted for other applications, such as pest control services and diagnosing crops affected by diseases. Key tasks include creating a comprehensive and high-quality database of weed images, expanding the set of weed species and environmental conditions, reducing computational complexity, and increasing processing speed while maintaining high accuracy. Collaboration with agricultural institutions, farmers, and researchers, as well as the use of synthetic data generation methods, will be crucial for achieving these goals. These efforts aim to improve model robustness, significantly reduce false positives and negatives, and enhance applicability across various crops and geographical regions. Furthermore, key areas for further research include enhancing model interpretability to build trust among users, exploring challenges related to real-time implementation, and conducting extensive field trials to evaluate the model&#x2019;s performance in practical settings. These proposed directions aim to enhance the capabilities of weed detection technologies and make a meaningful contribution to agricultural practices.</p>
</sec>
</body>
<back>
<sec id="s7" sec-type="data-availability">
<title>Data availability statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation.</p></sec>
<sec id="s8" sec-type="author-contributions">
<title>Author contributions</title>
<p>MF: Writing &#x2013; original draft. AI: Writing &#x2013; original draft. GH:&#xa0;Writing &#x2013; review &amp; editing. WZ: Writing &#x2013; review &amp; editing.&#xa0;AJ: Writing &#x2013; review &amp; editing. AA: Writing &#x2013; review &amp; editing. SB:&#xa0;Writing &#x2013; review &amp; editing. RN: Writing &#x2013; review &amp; editing.</p></sec>
<sec id="s10" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p></sec>
<sec id="s11" sec-type="ai-statement">
<title>Generative AI statement</title>
<p>The author(s) declare that no Generative AI was used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p></sec>
<sec id="s12" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p></sec>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Abuhani</surname> <given-names>D. A.</given-names></name>
<name><surname>Hussain</surname> <given-names>M. H.</given-names></name>
<name><surname>Khan</surname> <given-names>J.</given-names></name>
<name><surname>ElMohandes</surname> <given-names>M.</given-names></name>
<name><surname>Zualkernan</surname> <given-names>I.</given-names></name>
</person-group> (<year>2023</year>). &#x201c;
<article-title>Crop and weed detection in sunflower and sugarbeet fields using single shot detectors</article-title>,&#x201d; in <conf-name>2023 IEEE international conference on omni-layer intelligent systems (COINS)</conf-name>, <conf-loc>Berlin, Germany</conf-loc>. <fpage>1</fpage>&#x2013;<lpage>5</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/COINS57856.2023.10189257</pub-id>
</mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Aloysius</surname> <given-names>N.</given-names></name>
<name><surname>Geetha</surname> <given-names>M.</given-names></name>
</person-group> (<year>2017</year>). &#x201c;
<article-title>A review on deep convolutional neural networks</article-title>,&#x201d; in <conf-name>Proc. IEEE Int. Conf. Commun. Signal Process</conf-name>. <fpage>588</fpage>&#x2013;<lpage>592</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/iccsp.2017.8286426</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Atta</surname> <given-names>M. I.</given-names></name>
<name><surname>Zehra</surname> <given-names>S. S.</given-names></name>
<name><surname>Dai</surname> <given-names>D. Q.</given-names></name>
<name><surname>Ali</surname> <given-names>H.</given-names></name>
<name><surname>Naveed</surname> <given-names>K.</given-names></name>
<name><surname>Ali</surname> <given-names>I.</given-names></name>
<etal/>
</person-group>. (<year>2023</year>). 
<article-title>Assessing the effect of heavy metals on maize (<italic>Zea mays</italic> L.) growth and soil characteristics: implications for phytoremediation</article-title>. <source>PeerJ</source> <volume>11</volume>, <elocation-id>e16067</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.7717/peerj.16067/supp-1</pub-id>, PMID: <pub-id pub-id-type="pmid">39991009</pub-id>
</mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Beeharry</surname> <given-names>Y.</given-names></name>
<name><surname>Bassoo</surname> <given-names>V.</given-names></name>
</person-group> (<year>2020</year>). &#x201c;
<article-title>Performance of ANN and AlexNet for weed detection using UAV-based images</article-title>,&#x201d; in <conf-name>2020 3rd International Conference on Emerging Trends in Electrical, Electronic and Communications Engineering (ELECOM)</conf-name>, <conf-loc>Balaclava, Mauritius</conf-loc>, <fpage>163</fpage>&#x2013;<lpage>167</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ELECOM49001.2020.9296994</pub-id>
</mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name><surname>Bruinsma</surname> <given-names>J.</given-names></name>
</person-group> (<year>2017</year>). <source>World agriculture: Towards 2015/2030: An FAO Study</source> (<publisher-loc>Abingdon, UK</publisher-loc>: 
<publisher-name>Routledge</publisher-name>). doi:&#xa0;<pub-id pub-id-type="doi">10.4324/9781315083858</pub-id>
</mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Dargan</surname> <given-names>S.</given-names></name>
<name><surname>Kumar</surname> <given-names>M.</given-names></name>
<name><surname>Ayyagari</surname> <given-names>M. R.</given-names></name>
<name><surname>Kumar</surname> <given-names>G.</given-names></name>
</person-group> (<year>2020</year>). 
<article-title>A survey of deep learning and its applications: A new paradigm to machine learning</article-title>. <source>Arch. Comput. Methods Eng.</source> <volume>27</volume>, <fpage>1071</fpage>&#x2013;<lpage>1092</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11831-019-09344-w</pub-id>
</mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Dhillon</surname> <given-names>A.</given-names></name>
<name><surname>Verma</surname> <given-names>G. K.</given-names></name>
</person-group> (<year>2020</year>). 
<article-title>Convolutional neural network: a review of models, methodologies and applications to object detection</article-title>. <source>Prog. Artif. Intell.</source> <volume>9</volume>, <fpage>85</fpage>&#x2013;<lpage>112</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s13748-019-00203-0</pub-id>
</mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Fatima</surname> <given-names>H. S.</given-names></name>
<name><surname>Rehman</surname> <given-names>M.</given-names></name>
<name><surname>Anwar</surname> <given-names>A.</given-names></name>
<name><surname>Khan</surname> <given-names>T.</given-names></name>
<name><surname>Iqbal</surname> <given-names>J.</given-names></name>
<name><surname>Alam</surname> <given-names>M. Z.</given-names></name>
</person-group> (<year>2023</year>). 
<article-title>Formation of a lightweight, deep learning-based weed detection system for a commercial autonomous laser weeding robot</article-title>. <source>Appl. Sci.</source> <volume>13</volume>, <elocation-id>3997</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/app13063997</pub-id>
</mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Gallo</surname> <given-names>I.</given-names></name>
<name><surname>D&#x2019;Amato</surname> <given-names>E.</given-names></name>
<name><surname>Tisato</surname> <given-names>F.</given-names></name>
<name><surname>Savini</surname> <given-names>M.</given-names></name>
</person-group> (<year>2023</year>). 
<article-title>Deep object detection of crop weeds: Performance of YOLOv7 on a real-case dataset from UAV images</article-title>. <source>Remote Sens.</source> <volume>15</volume>, <elocation-id>539</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/rs15020539</pub-id>
</mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name><surname>Getachew</surname> <given-names>B.</given-names></name>
</person-group> (<year>2024</year>). <source>Plant Species Classification Using Deep Learning</source> (<publisher-loc>Twickenham, UK</publisher-loc>: 
<publisher-name>St. Mary's University Press</publisher-name>).
</mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Gikunda</surname> <given-names>P. K.</given-names></name>
<name><surname>Jouandeau</surname> <given-names>N.</given-names></name>
</person-group> (<year>2019</year>). 
<article-title>State-of-the-art convolutional neural networks for smart farms: A review</article-title>. <source>Adv. Intell. Syst. Comput.</source> <volume>997</volume>, <fpage>763</fpage>&#x2013;<lpage>775</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/978-3-030-22871-2_53</pub-id>
</mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Hashemi-Beni</surname> <given-names>L.</given-names></name>
<name><surname>Gebrehiwot</surname> <given-names>A.</given-names></name>
<name><surname>Ezequiel</surname> <given-names>C.</given-names></name>
<name><surname>Dlamini</surname> <given-names>W.</given-names></name>
<name><surname>Abdollahi</surname> <given-names>A</given-names></name>
</person-group>. (<year>2020</year>). 
<article-title>Deep learning for remote sensing image classification for agriculture applications</article-title>. <source>Int. Arch. Photogramm. Remote Sens. Spat. Inf. Sci.</source> <volume>XLIV-M-2</volume>, <fpage>51</fpage>&#x2013;<lpage>54</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.5194/isprs-archives-xliv-m-2-2020-51-2020</pub-id>
</mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Hashemi-Beni</surname> <given-names>L.</given-names></name>
<name><surname>Gebrehiwot</surname> <given-names>A.</given-names></name>
<name><surname>Karimoddini</surname> <given-names>A.</given-names></name>
<name><surname>Shahbazi</surname> <given-names>A.</given-names></name>
<name><surname>Dorbu</surname> <given-names>F</given-names></name>
</person-group>. (<year>2022</year>). 
<article-title>Deep convolutional neural networks for weeds and crops discrimination from UAS imagery</article-title>. <source>Front. Remote Sens.</source> <volume>3</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/frsen.2022.755939</pub-id>
</mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Hassan</surname> <given-names>M. S.</given-names></name>
<name><surname>Khan</surname> <given-names>M. A.</given-names></name>
<name><surname>Malik</surname> <given-names>M. K.</given-names></name>
<name><surname>Iqbal</surname> <given-names>N.</given-names></name>
<name><surname>Ali</surname> <given-names>Z.</given-names></name>
</person-group> (<year>2023</year>). 
<article-title>Ultra-Responses of <italic>Asphodelus tenuifolius</italic> L. (Wild Onion) and <italic>Convolvulus arvensis</italic> L. (Field Bindweed) against Shoot Extract of <italic>Trianthema portulacastrum</italic> L. (Horse Purslane)</article-title>. <source>Plants</source> <volume>12</volume>, <elocation-id>458</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/plants12030458</pub-id>, PMID: <pub-id pub-id-type="pmid">36771542</pub-id>
</mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Iman</surname> <given-names>M.</given-names></name>
<name><surname>Arabnia</surname> <given-names>H. R.</given-names></name>
<name><surname>Rasheed</surname> <given-names>K.</given-names></name>
</person-group> (<year>2023</year>). 
<article-title>A review of deep transfer learning and recent advancements</article-title>. <source>Technologies</source> <volume>11</volume>, <elocation-id>40</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/technologies11020040</pub-id>
</mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Jiang</surname> <given-names>J.</given-names></name>
<name><surname>Cao</surname> <given-names>B.</given-names></name>
<name><surname>Chen</surname> <given-names>Y.</given-names></name>
<name><surname>Luo</surname> <given-names>H.</given-names></name>
<name><surname>Xue</surname> <given-names>J.</given-names></name>
<name><surname>Xiong</surname> <given-names>X.</given-names></name>
<etal/>
</person-group>. (<year>2022</year>a). 
<article-title>An attention mechanism-improved YOLOv7 object detection algorithm for hemp duck count estimation</article-title>. <source>Agriculture</source> <volume>12</volume>, <elocation-id>1659</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agriculture12101659</pub-id>
</mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Jiang</surname> <given-names>K.</given-names></name>
<name><surname>Zhang</surname> <given-names>Y.</given-names></name>
<name><surname>Wang</surname> <given-names>J.</given-names></name>
<name><surname>Xu</surname> <given-names>Y.</given-names></name>
<name><surname>Li</surname> <given-names>T.</given-names></name>
<name><surname>Chen</surname> <given-names>X.</given-names></name>
</person-group> (<year>2022</year>b). 
<article-title>Transformer-based weed segmentation for grass management</article-title>. <source>Sensors</source> <volume>23</volume>, <elocation-id>65</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/s23010065</pub-id>, PMID: <pub-id pub-id-type="pmid">36616662</pub-id>
</mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Khan</surname> <given-names>F.</given-names></name>
<name><surname>Zafar</surname> <given-names>N.</given-names></name>
<name><surname>Tahir</surname> <given-names>M. N.</given-names></name>
<name><surname>Aqib</surname> <given-names>M.</given-names></name>
<name><surname>Saleem</surname> <given-names>S.</given-names></name>
<name><surname>Haroon</surname> <given-names>Z</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>Deep learning-based approach for weed detection in potato crops</article-title>. <source>Environ. Sci. Proc.</source> <volume>22</volume>, <elocation-id>6</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/environsciproc2022023006</pub-id>
</mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Kulkarni</surname> <given-names>S.</given-names></name>
</person-group> (<year>2019</year>). 
<article-title>IoT based weed detection using image processing and CNN</article-title>. <source>Int. J. Eng. Appl. Sci. Technol.</source> doi:&#xa0;<pub-id pub-id-type="doi">10.33564/IJEAST.2019.V04I03.089</pub-id>
</mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Liu</surname> <given-names>Y.</given-names></name>
<name><surname>Lee</surname> <given-names>S. A. S.</given-names></name>
<name><surname>Chen</surname> <given-names>W</given-names></name>
</person-group>. (<year>2022</year>a). 
<article-title>Real-time detection of seedling maize weeds in sustainable agriculture</article-title>. <source>Sustainability</source> <volume>14</volume>, <elocation-id>15088</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/su142215088</pub-id>
</mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Liu</surname> <given-names>S.</given-names></name>
<name><surname>Li</surname> <given-names>M.</given-names></name>
<name><surname>Wang</surname> <given-names>H.</given-names></name>
<name><surname>Zhang</surname> <given-names>C.</given-names></name>
<name><surname>Wang</surname> <given-names>G.</given-names></name>
</person-group> (<year>2022</year>b). 
<article-title>Real-time detection of weeds in maize seedlings using YOLOv4-tiny</article-title>. <source>Comput. Electron. Agric.</source> <volume>198</volume>, <elocation-id>107077</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2022.107077</pub-id>
</mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>L&#xf3;pez-Correa</surname> <given-names>J. M.</given-names></name>
<name><surname>Morales</surname> <given-names>L.</given-names></name>
<name><surname>Pineda</surname> <given-names>J.</given-names></name>
<name><surname>Hern&#xe1;ndez</surname> <given-names>J.</given-names></name>
<name><surname>Torres</surname> <given-names>J.</given-names></name>
<name><surname>P&#xe9;rez</surname> <given-names>M</given-names></name>
</person-group>. (<year>2022</year>). 
<article-title>Intelligent weed management based on object detection neural networks in tomato crops</article-title>. <source>Agronomy</source> <volume>12</volume>, <elocation-id>2953</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agronomy12122953</pub-id>
</mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>L&#xf3;pez-Mart&#xed;nez</surname> <given-names>M.</given-names></name>
<name><surname>Morales</surname> <given-names>L.</given-names></name>
<name><surname>Pineda</surname> <given-names>J.</given-names></name>
<name><surname>Hern&#xe1;ndez</surname> <given-names>J.</given-names></name>
<name><surname>Torres</surname> <given-names>J.</given-names></name>
<name><surname>P&#xe9;rez</surname> <given-names>M.</given-names></name>
</person-group> (<year>2023</year>). 
<article-title>A high-performance computing cluster for distributed deep learning: A practical case of weed classification using convolutional neural network models</article-title>. <source>Appl. Sci.</source> <volume>13</volume>, <elocation-id>6007</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/app13106007</pub-id>
</mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Mu</surname> <given-names>Y.</given-names></name>
<name><surname>Feng</surname> <given-names>R.</given-names></name>
<name><surname>Ni</surname> <given-names>R.</given-names></name>
<name><surname>Li</surname> <given-names>J.</given-names></name>
<name><surname>Luo</surname> <given-names>T.</given-names></name>
<name><surname>Liu</surname> <given-names>T.</given-names></name>
<etal/>
</person-group>. (<year>2022</year>). 
<article-title>A faster R-CNN-based model for the identification of weed seedling</article-title>. <source>Agronomy</source> <volume>12</volume>, <elocation-id>2867</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agronomy12112867</pub-id>
</mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Murad</surname> <given-names>N. Y.</given-names></name>
<name><surname>Mahmood</surname> <given-names>T.</given-names></name>
<name><surname>Forkan</surname> <given-names>A. R. M.</given-names></name>
<name><surname>Morshed</surname> <given-names>A.</given-names></name>
<name><surname>Jayaraman</surname> <given-names>P. P.</given-names></name>
<name><surname>Siddiqui</surname> <given-names>M. S.</given-names></name>
</person-group> (<year>2023</year>). 
<article-title>Weed detection using deep learning: A systematic literature review</article-title>. <source>Sensors</source> <volume>23</volume>, <elocation-id>3670</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/s23073670</pub-id>, PMID: <pub-id pub-id-type="pmid">37050730</pub-id>
</mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Ndlovu</surname> <given-names>E.</given-names></name>
<name><surname>Prinsloo</surname> <given-names>B.</given-names></name>
<name><surname>Le Roux</surname> <given-names>T. L.</given-names></name>
</person-group> (<year>2020</year>). 
<article-title>Impact of climate change and variability on traditional farming systems: Farmers&#x2019; perceptions from south-west, semi-arid Zimbabwe</article-title>. <source>J&#xe0;mb&#xe1;: J. Disaster Risk Stud.</source> <volume>12</volume>, <fpage>2072</fpage>&#x2013;<lpage>2845</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.4102/jamba</pub-id>, PMID: <pub-id pub-id-type="pmid">33101598</pub-id>
</mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Niyigena</surname> <given-names>A.</given-names></name>
<name><surname>Cubaka</surname> <given-names>V. K.</given-names></name>
<name><surname>Uwamahoro</surname> <given-names>P.</given-names></name>
<name><surname>Mutsinzi</surname> <given-names>R. G.</given-names></name>
<name><surname>Uwizeye</surname> <given-names>B.</given-names></name>
<name><surname>Mukamasabo</surname> <given-names>B.</given-names></name>
<etal/>
</person-group>. (<year>2023</year>). 
<article-title>Real-time detection and classification of <italic>scirtothrips dorsalis</italic> on fruit crops with smartphone-based deep learning system: preliminary results</article-title>. <source>Insects</source> <volume>14</volume>, <elocation-id>523</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/insects14060523</pub-id>, PMID: <pub-id pub-id-type="pmid">37367339</pub-id>
</mixed-citation>
</ref>
<ref id="B28">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name><surname>Omilola</surname> <given-names>B.</given-names></name>
<name><surname>Robele</surname> <given-names>S.</given-names></name>
</person-group> (<year>2017</year>). <source>The central position of agriculture within the 2030 Agenda for Sustainable Development (IFPRI Discussion Paper 1683)</source>. (<publisher-loc>Washington, DC, United States</publisher-loc>: 
<publisher-name>International Food Policy Research Institute (IFPRI</publisher-name>).
</mixed-citation>
</ref>
<ref id="B29">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Padilla</surname> <given-names>R.</given-names></name>
<name><surname>Passos</surname> <given-names>W. L.</given-names></name>
<name><surname>Dias</surname> <given-names>T. L. B.</given-names></name>
<name><surname>Netto</surname> <given-names>S. L.</given-names></name>
<name><surname>Da Silva</surname> <given-names>E. A. B.</given-names></name>
</person-group> (<year>2021</year>). 
<article-title>A comparative analysis of object detection metrics with a companion open-source toolkit</article-title>. <source>Electronics</source> <volume>10</volume>, <elocation-id>279</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/electronics10030279</pub-id>
</mixed-citation>
</ref>
<ref id="B30">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Pai</surname> <given-names>D. G.</given-names></name>
<name><surname>Kamath</surname> <given-names>R.</given-names></name>
<name><surname>Balachandra</surname> <given-names>M.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Deep learning techniques for weed detection in agricultural environments: A comprehensive review</article-title>. <source>IEEE Access</source> <volume>12</volume>, <fpage>113193</fpage>&#x2013;<lpage>113214</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ACCESS.2024.3418454</pub-id>
</mixed-citation>
</ref>
<ref id="B31">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Pandey</surname> <given-names>C.</given-names></name>
<name><surname>Kumar</surname> <given-names>P.</given-names></name>
<name><surname>Sharma</surname> <given-names>R.</given-names></name>
<name><surname>Singh</surname> <given-names>P.</given-names></name>
<name><surname>Verma</surname> <given-names>D.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>Smart agriculture: Technological advancements on agriculture A systematical review</article-title>. <source>Deep Learn. Sustain. Agric.</source> <volume>V</volume>, <fpage>1</fpage>&#x2013;<lpage>56</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/B978-0-323-85214-2.00002-1</pub-id>
</mixed-citation>
</ref>
<ref id="B32">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Patel</surname> <given-names>S.</given-names></name>
</person-group> (<year>2020</year>). 
<article-title>A comprehensive analysis of convolutional neural network models</article-title>. <source>Int. J. Advanced Sci Technol.</source> <volume>29</volume>, <fpage>771</fpage>&#x2013;<lpage>777</lpage>.
</mixed-citation>
</ref>
<ref id="B33">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Rahman</surname> <given-names>A.</given-names></name>
<name><surname>Lu</surname> <given-names>Y.</given-names></name>
<name><surname>Wang</surname> <given-names>H.</given-names></name>
</person-group> (<year>2023</year>). 
<article-title>Performance evaluation of deep learning object detectors for weed detection for cotton</article-title>. <source>Smart Agric. Technol.</source> <volume>3</volume>, <elocation-id>100126</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.atech.2022.100126</pub-id>
</mixed-citation>
</ref>
<ref id="B34">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Rai</surname> <given-names>N.</given-names></name>
<name><surname>Zhang</surname> <given-names>Y.</given-names></name>
<name><surname>Ram</surname> <given-names>B. G.</given-names></name>
<name><surname>Schumacher</surname> <given-names>L.</given-names></name>
<name><surname>Yellavajjala</surname> <given-names>R. K.</given-names></name>
<name><surname>Bajwa</surname> <given-names>S.</given-names></name>
<etal/>
</person-group>. (<year>2023</year>). 
<article-title>Applications of deep learning in precision weed management: A review</article-title>. <source>Computers. Electron. Agric.</source> <volume>206</volume>, <elocation-id>107698</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2023.107698</pub-id>
</mixed-citation>
</ref>
<ref id="B35">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Rainio</surname> <given-names>O.</given-names></name>
<name><surname>Teuho</surname> <given-names>J.</given-names></name>
<name><surname>Kl&#xe9;n</surname> <given-names>R.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Evaluation metrics and statistical tests for machine learning</article-title>. <source>Sci. Rep.</source> <volume>14</volume>, <fpage>1</fpage>&#x2013;<lpage>14</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-024-56706-x</pub-id>, PMID: <pub-id pub-id-type="pmid">38480847</pub-id>
</mixed-citation>
</ref>
<ref id="B36">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Rockstr&#xf6;m</surname> <given-names>J.</given-names></name>
<name><surname>Gaffney</surname> <given-names>O.</given-names></name>
<name><surname>Rogelj</surname> <given-names>J.</given-names></name>
<name><surname>Meinshausen</surname> <given-names>M.</given-names></name>
<name><surname>Nakicenovic</surname> <given-names>N.</given-names></name>
<name><surname>Schellnhuber</surname> <given-names>H. J</given-names></name>
</person-group>. (<year>2017</year>). 
<article-title>Sustainable intensification of agriculture for human prosperity and global sustainability</article-title>. <source>Ambio</source> <volume>46</volume>, <fpage>4</fpage>&#x2013;<lpage>17</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s13280-016-0793-6</pub-id>, PMID: <pub-id pub-id-type="pmid">27405653</pub-id>
</mixed-citation>
</ref>
<ref id="B37">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Saini</surname> <given-names>P.</given-names></name>
</person-group> (<year>2022</year>). &#x201c;
<article-title>Recent Advancement of Weed Detection in Crops Using Artificial Intelligence and deep learning: A review</article-title>.&#x201d; in <source>Advances in Energy Technology: Select Proceedings of EMSME 2020 (Lecture Notes in Electrical Engineering</source>, Vol. <volume>736</volume>, (<publisher-loc>Singapore</publisher-loc>: 
<publisher-name>Springer</publisher-name>), <fpage>631</fpage>&#x2013;<lpage>640</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/978-981-16-1476-7_56</pub-id>
</mixed-citation>
</ref>
<ref id="B38">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Saini</surname> <given-names>P.</given-names></name>
<name><surname>Nagesh</surname> <given-names>D. S.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>CottonWeeds: Empowering precision weed management through deep learning and comprehensive dataset</article-title>. <source>Crop Prot.</source> <volume>181</volume>, <elocation-id>106675</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.cropro.2024.106675</pub-id>
</mixed-citation>
</ref>
<ref id="B39">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Salazar-Gomez</surname> <given-names>A.</given-names></name>
<name><surname>Mart&#xed;nez</surname> <given-names>J.</given-names></name>
<name><surname>Rojas</surname> <given-names>P.</given-names></name>
<name><surname>Diaz</surname> <given-names>A.</given-names></name>
<name><surname>Vargas</surname> <given-names>C.</given-names></name>
</person-group> (<year>2021</year>). 
<article-title>Towards practical object detection for weed spraying in precision agriculture</article-title>. <source>Front. Plant Sci.</source> <volume>14</volume>, <elocation-id>1183277</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2023.1183277</pub-id>, PMID: <pub-id pub-id-type="pmid">38023838</pub-id>
</mixed-citation>
</ref>
<ref id="B40">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Saleem</surname> <given-names>M. H.</given-names></name>
<name><surname>Potgieter</surname> <given-names>J.</given-names></name>
<name><surname>Arif</surname> <given-names>K. M.</given-names></name>
<name><surname>Mahmood</surname> <given-names>A.</given-names></name>
<name><surname>Rehman</surname> <given-names>T. U.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>Weed detection by faster RCNN model: an enhanced anchor box approach</article-title>. <source>Agronomy</source> <volume>12</volume>, <elocation-id>1580</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agronomy12071580</pub-id>
</mixed-citation>
</ref>
<ref id="B41">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Sampurno</surname> <given-names>R. M.</given-names></name>
<name><surname>Lee</surname> <given-names>J.</given-names></name>
<name><surname>Nguyen</surname> <given-names>T.</given-names></name>
<name><surname>Park</surname> <given-names>D.</given-names></name>
<name><surname>Kim</surname> <given-names>S.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Intrarow uncut weed detection using you-only-look-once instance segmentation for orchard plantations</article-title>. <source>Sensors</source> <volume>24</volume>, <elocation-id>893</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/s24030893</pub-id>, PMID: <pub-id pub-id-type="pmid">38339611</pub-id>
</mixed-citation>
</ref>
<ref id="B42">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Sarker</surname> <given-names>M. I.</given-names></name>
<name><surname>Kim</surname> <given-names>H.</given-names></name>
</person-group> (<year>2019</year>). 
<article-title>Farmland weed detection with region-based deep convolutional neural networks</article-title>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1906.01885</pub-id>
</mixed-citation>
</ref>
<ref id="B43">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Singh</surname> <given-names>V.</given-names></name>
<name><surname>Reddy</surname> <given-names>P.</given-names></name>
<name><surname>Kumar</surname> <given-names>M.</given-names></name>
<name><surname>Sharma</surname> <given-names>D.</given-names></name>
<name><surname>Choudhary</surname> <given-names>R.</given-names></name>
</person-group> (<year>2020</year>). 
<article-title>Unmanned aircraft systems for precision weed detection and management: Prospects and challenges</article-title>. <source>Adv. Agron.</source> <volume>159</volume>, <fpage>93</fpage>&#x2013;<lpage>134</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/bs.agron.2019.08.004</pub-id>
</mixed-citation>
</ref>
<ref id="B44">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Sivakumar</surname> <given-names>A. N. V.</given-names></name>
<name><surname>Rajesh</surname> <given-names>K.</given-names></name>
<name><surname>Baskar</surname> <given-names>P.</given-names></name>
<name><surname>Kumar</surname> <given-names>S.</given-names></name>
<name><surname>Varadhan</surname> <given-names>M</given-names></name>
</person-group>. (<year>2020</year>). 
<article-title>Comparison of object detection and patch-based classification deep learning models on mid- to late-season weed detection in UAV imagery</article-title>. <source>Remote Sens.</source> <volume>12</volume>, <elocation-id>2136</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/rs12132136</pub-id>
</mixed-citation>
</ref>
<ref id="B45">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Terven</surname> <given-names>J.</given-names></name>
<name><surname>C&#xf3;rdova-Esparza</surname> <given-names>D. M.</given-names></name>
<name><surname>Romero-Gonz&#xe1;lez</surname> <given-names>J. A</given-names></name>
</person-group> (<year>2023</year>). 
<article-title>A comprehensive review of YOLO architectures in computer vision: from YOLOv1 to YOLOv8 and YOLO-NAS</article-title>. <source>Mach. Learn. Knowl. Extr.</source> <volume>5</volume>, <fpage>1680</fpage>&#x2013;<lpage>1716</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/make5040083</pub-id>
</mixed-citation>
</ref>
<ref id="B46">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>P.</given-names></name>
<name><surname>Tang</surname> <given-names>Y.</given-names></name>
<name><surname>Luo</surname> <given-names>F.</given-names></name>
<name><surname>Wang</surname> <given-names>L.</given-names></name>
<name><surname>Li</surname> <given-names>C.</given-names></name>
<name><surname>Niu</surname> <given-names>Q.</given-names></name>
<etal/>
</person-group>. (<year>2022</year>). 
<article-title>Weed25: A deep learning dataset for weed identification</article-title>. <source>Front. Plant Sci.</source> <volume>13</volume>, <elocation-id>1053329</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2022.1053329</pub-id>, PMID: <pub-id pub-id-type="pmid">36531369</pub-id>
</mixed-citation>
</ref>
<ref id="B47">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>C.-Y.</given-names></name>
<name><surname>Bochkovskiy</surname> <given-names>A.</given-names></name>
<name><surname>Liao</surname> <given-names>H.-Y. M.</given-names></name>
</person-group> (<year>2023</year>). &#x201c;
<article-title>YOLOv7: trainable bag-of-freebies sets new state-of-the-art for real-time object detectors</article-title>.&#x201d; in <conf-name>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition</conf-name>, (<conf-sponsor>IEEE</conf-sponsor>), <fpage>7464</fpage>&#x2013;<lpage>7475</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/CVPR52729.2023.00721</pub-id>
</mixed-citation>
</ref>
<ref id="B48">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>C.</given-names></name>
<name><surname>Li</surname> <given-names>J.</given-names></name>
<name><surname>Wang</surname> <given-names>J.</given-names></name>
<name><surname>Zhao</surname> <given-names>W.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Research on lightweight pavement disease detection model based on YOLOv7</article-title>. <source>J. Intell. Fuzzy Syst.</source> <volume>46</volume> (<issue>4</issue>), <fpage>10573</fpage>&#x2013;<lpage>10589</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3233/jifs-239289</pub-id>
</mixed-citation>
</ref>
<ref id="B49">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>A.</given-names></name>
<name><surname>Peng</surname> <given-names>T.</given-names></name>
<name><surname>Cao</surname> <given-names>H.</given-names></name>
<name><surname>Xu</surname> <given-names>Y.</given-names></name>
<name><surname>Wei</surname> <given-names>X.</given-names></name>
<name><surname>Cui</surname> <given-names>B.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>TIA-YOLOv5: An improved YOLOv5 network for real-time detection of crop and weed in the field</article-title>. <source>Front. Plant Sci</source> <volume>13</volume>, <fpage>1</fpage>&#x2013;<lpage>11</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2022.1091655</pub-id>, PMID: <pub-id pub-id-type="pmid">36618638</pub-id>
</mixed-citation>
</ref>
<ref id="B50">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Xing</surname> <given-names>W.</given-names></name>
<name><surname>Wang</surname> <given-names>C.-Y.</given-names></name>
<name><surname>Liao</surname> <given-names>H.-Y. M.</given-names></name>
<name><surname>Zhang</surname> <given-names>Y.</given-names></name>
<name><surname>Chen</surname> <given-names>Y</given-names></name>
</person-group>. (<year>2023</year>). 
<article-title>A review on object detection algorithms based deep learning methods</article-title>. <source>J. Electr. Electron. Syst. Res.</source> <volume>23</volume>, <fpage>1</fpage>&#x2013;<lpage>14</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.24191/jeesr.v23i1.001</pub-id>
</mixed-citation>
</ref>
<ref id="B51">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Xu</surname> <given-names>K.</given-names></name>
<name><surname>Li</surname> <given-names>Y.</given-names></name>
<name><surname>Chen</surname> <given-names>J.</given-names></name>
<name><surname>Wang</surname> <given-names>X.</given-names></name>
<name><surname>Zhou</surname> <given-names>Y</given-names></name>
</person-group>. (<year>2024</year>). 
<article-title>WeedsNet: a dual attention network with RGB-D image for weed detection in natural wheat field</article-title>. <source>Precis. Agric.</source> <volume>25</volume>, <fpage>460</fpage>&#x2013;<lpage>485</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11119-023-10080-2</pub-id>
</mixed-citation>
</ref>
<ref id="B52">
<mixed-citation publication-type="book">
<person-group person-group-type="editor">
<name><surname>Yadav</surname> <given-names>S. S.</given-names></name>
<name><surname>Redden</surname> <given-names>R. J.</given-names></name>
<name><surname>Hatfield</surname> <given-names>J. L.</given-names></name>
<name><surname>Ebert</surname> <given-names>A. W.</given-names></name>
<name><surname>Hunter</surname> <given-names>D.</given-names></name>
</person-group> (Eds.) (<year>2019</year>). <source>Food security and climate change</source> (<publisher-loc>Hoboken, NJ (USA</publisher-loc>: 
<publisher-name>John Wiley</publisher-name>), <fpage>568</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/9781119180661</pub-id>
</mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn id="n1" fn-type="custom" custom-type="edited-by">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1651997">Wen-Hao Su</ext-link>, China Agricultural University, China</p></fn>
<fn id="n2" fn-type="custom" custom-type="reviewed-by">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/837410">Simerjeet Kaur</ext-link>, Punjab Agricultural University, India</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1197923">Talha Ubaid</ext-link>, The University of Haripur, Pakistan</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3211870">Sylvester Badua</ext-link>, Central Luzon State University, Philippines</p></fn>
</fn-group>
</back>
</article>