<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Plant Sci.</journal-id>
<journal-title>Frontiers in Plant Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Plant Sci.</abbrev-journal-title>
<issn pub-type="epub">1664-462X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpls.2024.1396568</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Plant Science</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Performance evaluation of semi-supervised learning frameworks for multi-class weed detection</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Li</surname>
<given-names>Jiajia</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2669474"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Chen</surname>
<given-names>Dong</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1509086"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Yin</surname>
<given-names>Xunyuan</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1863983"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Li</surname>
<given-names>Zhaojian</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2673785"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Department of Electrical and Computer Engineering, Michigan State University</institution>, <addr-line>East Lansing, MI</addr-line>, <country>United States</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Environmental Institute, University of Virginia</institution>, <addr-line>Charlottesville, VA</addr-line>, <country>United States</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>School of Chemical and Biomedical Engineering, Nanyang Technological University</institution>, <addr-line>Singapore</addr-line>, <country>Singapore</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Department of Mechanical Engineering, Michigan State University</institution>, <addr-line>East Lansing, MI</addr-line>, <country>United States</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Zhou Zhang, University of Wisconsin-Madison, United States</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Lorena Parra, Universitat Polit&#xe8;cnica de Val&#xe8;ncia, Spain</p>
<p>Paulo Flores, North Dakota State University, United States</p>
<p>Sushopti Gawade, Vidyalankar Institute of Technology, India</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Zhaojian Li, <email xlink:href="mailto:lizhaoj1@egr.msu.edu">lizhaoj1@egr.msu.edu</email>
</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>20</day>
<month>08</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>15</volume>
<elocation-id>1396568</elocation-id>
<history>
<date date-type="received">
<day>05</day>
<month>03</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>24</day>
<month>07</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2024 Li, Chen, Yin and Li</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Li, Chen, Yin and Li</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Precision weed management (PWM), driven by machine vision and deep learning (DL) advancements, not only enhances agricultural product quality and optimizes crop yield but also provides a sustainable alternative to herbicide use. However, existing DL-based algorithms on weed detection are mainly developed based on supervised learning approaches, typically demanding large-scale datasets with manual-labeled annotations, which can be time-consuming and labor-intensive. As such, label-efficient learning methods, especially semi-supervised learning, have gained increased attention in the broader domain of computer vision and have demonstrated promising performance. These methods aim to utilize a small number of labeled data samples along with a great number of unlabeled samples to develop high-performing models comparable to the supervised learning counterpart trained on a large amount of labeled data samples. In this study, we assess the effectiveness of a semi-supervised learning framework for multi-class weed detection, employing two well-known object detection frameworks, namely FCOS (Fully Convolutional One-Stage Object Detection) and Faster-RCNN (Faster Region-based Convolutional Networks). Specifically, we evaluate a generalized student-teacher framework with an improved pseudo-label generation module to produce reliable pseudo-labels for the unlabeled data. To enhance generalization, an ensemble student network is employed to facilitate the training process. Experimental results show that the proposed approach is able to achieve approximately 76% and 96% detection accuracy as the supervised methods with only 10% of labeled data in CottonWeedDet3 and CottonWeedDet12, respectively. We offer access to the source code (<ext-link ext-link-type="uri" xlink:href="https://github.com/JiajiaLi04/SemiWeeds">https://github.com/JiajiaLi04/SemiWeeds</ext-link>), contributing a valuable resource for ongoing semi-supervised learning research in weed detection and beyond.</p>
</abstract>
<kwd-group>
<kwd>precision weed management</kwd>
<kwd>precision agriculture</kwd>
<kwd>label-efficient learning</kwd>
<kwd>computer vision</kwd>
<kwd>deep learning</kwd>
</kwd-group>
<counts>
<fig-count count="7"/>
<table-count count="4"/>
<equation-count count="4"/>
<ref-count count="75"/>
<page-count count="13"/>
<word-count count="5807"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-in-acceptance</meta-name>
<meta-value>Sustainable and Intelligent Phytoprotection</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>Weeds pose a significant risk to global crop production, with potential losses attributed to these unwelcome plants estimated at 43% worldwide (<xref ref-type="bibr" rid="B47">Oerke, 2006</xref>). Specifically, in the context of cotton farming, inefficient management of weeds can result in a staggering 90% reduction in yield (<xref ref-type="bibr" rid="B42">Manalil et&#xa0;al., 2017</xref>). Traditional weed control methods typically involve the use of machinery, manual weeding, or application of herbicides. These weed management approaches, while commonly utilized, require significant labor and cost considerations. Manual and mechanical weeding methods are especially labor-intensive, a predicament that has been intensified by recent global labor shortages triggered by public health crises (e.g., the COVID-19 pandemic) and geopolitical conflicts (e.g., the Russia-Ukraine War) (<xref ref-type="bibr" rid="B26">Laborde et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B4">Ben Hassen and El Bilali, 2022</xref>). Furthermore, the use of herbicides brings about significant environmental harm and potential risks to human health, and contributes to the emergence of herbicide-resistant weed species (<xref ref-type="bibr" rid="B45">Norsworthy et&#xa0;al., 2012</xref>; <xref ref-type="bibr" rid="B10">Chen et&#xa0;al., 2022b</xref>).</p>
<p>PWM, integrating sensors, computer systems, and robotics into agricultural practices, has emerged as a promising and sustainable approach for efficient weed management (<xref ref-type="bibr" rid="B68">Young et&#xa0;al., 2013</xref>). It allows for targeted treatment based on specific site conditions and weed species, thereby significantly minimizing the use of herbicides and other resources (<xref ref-type="bibr" rid="B21">Gerhards and Christensen, 2003</xref>). To achieve successful implementation of PWM, it is essential to accurately identify, localize, and monitor weeds, which requires robust machine vision algorithms for weed recognition (<xref ref-type="bibr" rid="B10">Chen et&#xa0;al., 2022b</xref>). Traditional image processing techniques, often encompassing edge detection, color analysis, and texture feature extraction, along with subsequent steps such as thresholding or supervised modeling, are widely utilized in the field of weed classification and detection (<xref ref-type="bibr" rid="B43">Meyer and Neto, 2008</xref>; <xref ref-type="bibr" rid="B62">Wang et&#xa0;al., 2019</xref>). For instance, a weed classification algorithm that relies on extracted texture features was developed by (<xref ref-type="bibr" rid="B2">Bawden et&#xa0;al., 2017</xref>). <xref ref-type="bibr" rid="B1">Ahmad et&#xa0;al. (2018)</xref> used local shape and edge orientation features to differentiate between monocot and dicot weeds. However, despite promising results, these conventional machine vision techniques often necessitate manual feature engineering for specific weed detection or classification tasks, which requires extensive domain knowledge and can be error-prone and time-consuming. Moreover, these methods may struggle with complex visual tasks and be sensitive to variations in lighting conditions and occlusion (<xref ref-type="bibr" rid="B46">O&#x2019;Mahony et&#xa0;al., 2020</xref>).</p>
<p>Recently, DL-based advanced computer vision has been recognized as a promising approach for sustainable weed management (<xref ref-type="bibr" rid="B20">Farooq et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B69">Yu et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B48">Parra et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B10">Chen et&#xa0;al., 2022b</xref>; <xref ref-type="bibr" rid="B13">Coleman et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B49">Rahman et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B50">Rai et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B56">Sportelli et&#xa0;al., 2023</xref>). For example, four different YOLO (You Only Look Once) object detectors were evaluated for weed detection in different turfgrass scenarios in <xref ref-type="bibr" rid="B56">Sportelli et&#xa0;al. (2023)</xref>. Additionally, in <xref ref-type="bibr" rid="B10">Chen et&#xa0;al. (2022b)</xref>, 35 state-of-the-art deep neural networks (DNNs) were examined and benchmarked for multi-class weed classification within cotton production systems, with nearly all models attaining high classification accuracy, reflected by F1 scores exceeding 95%. Despite their proven effectiveness, these DL-based approaches are notoriously data-hungry, and their performance is heavily dependent on large-scale and accurately labeled image datasets (<xref ref-type="bibr" rid="B40">Lu and Young, 2020</xref>; <xref ref-type="bibr" rid="B50">Rai et&#xa0;al., 2023</xref>), whereas manually labeling such large-scale image datasets is often error-prone, tedious, expensive, and time-consuming (<xref ref-type="bibr" rid="B29">Li et&#xa0;al., 2023</xref>).</p>
<p>To address these challenges, label-efficient learning algorithms (<xref ref-type="bibr" rid="B29">Li et&#xa0;al., 2023</xref>) have emerged as promising solutions to reduce the high labeling costs by harnessing the potential of unlabeled samples. Specifically, in <xref ref-type="bibr" rid="B19">dos Santos Ferreira et&#xa0;al. (2019)</xref>, the efficacy of two popular unsupervised learning algorithms, namely Joint Unsupervised Learning of Deep Representations and Image Clusters (JULE, <xref ref-type="bibr" rid="B67">Yang et&#xa0;al. (2016)</xref>) and Deep Clustering for Unsupervised Learning of Visual Features (DeepCluster, <xref ref-type="bibr" rid="B8">Caron et&#xa0;al. (2018)</xref>), were evaluated in the context of weed recognition utilizing two publicly available weed datasets. In addition, the semi-supervised learning for weed classification was studied in (<xref ref-type="bibr" rid="B35">Liu et&#xa0;al., 2023</xref>, <xref ref-type="bibr" rid="B36">2024</xref>; <xref ref-type="bibr" rid="B3">Benchallal et&#xa0;al., 2024</xref>). Furthermore, a semi-supervised learning strategy called SemiWeedNet was introduced in <xref ref-type="bibr" rid="B44">Nong et&#xa0;al. (2022)</xref>; this method was designed for the segmentation of weeds and crops in challenging environments characterized by complex backgrounds. Moreover, the study presented in <xref ref-type="bibr" rid="B24">Hu et&#xa0;al. (2021)</xref> employed the cut-and-paste image synthesis approach and semi-supervised learning to address the issue of insufficient training data for weed detection. This approach was evaluated on an image dataset consisting of 500 images across four categories: &#x201c;cotton&#x201d;, &#x201c;morningglory&#x201d;, &#x201c;grass&#x201d;, and &#x201c;other&#x201d;, which culminated in an mAP of 46.0. Although the results were intriguing, their methodology was tested only on a two-stage object detector [i.e., Faster-RCNN (<xref ref-type="bibr" rid="B52">Ren et&#xa0;al., 2015</xref>)] and a four-category image dataset, which does not sufficiently substantiate the efficacy of semi-supervised learning for weed detection. Therefore, our research aims to further probe the potential of semi-supervised learning in weed detection, and comparatively assess a variety of object detectors and multi-class weed species. The key contributions of this study are as follows:</p>
<list list-type="bullet">
<list-item>
<p>We rigorously evaluate the semi-supervised learning framework utilizing two open-source cotton weed datasets. These datasets include 3 and 12 weed classes commonly found in U.S. cotton production systems.</p>
</list-item>
<list-item>
<p>We further analyze and compare the performance of one-stage and two-stage object detectors within the semi-supervised learning framework.</p>
</list-item>
<list-item>
<p>In the spirit of reproducibility, we make all our training and evaluation codes<xref ref-type="fn" rid="fn1">
<sup>1</sup>
</xref> freely accessible.</p>
</list-item>
</list>
<p>The remainder of this paper is organized as follows: Section 2 details the dataset and technical aspects pertinent to this study. Section 3 presents experimental results and provides a comprehensive analysis, followed by further discussions and limitations in Section 4. Lastly, Section 5 offers concluding remarks and outlines potential future research directions.</p>
</sec>
<sec id="s2" sec-type="materials|methods">
<label>2</label>
<title>Materials and methods</title>
<p>In this section, we begin by introducing the two datasets employed in our study. Then, we provide an overview of two representative object detectors: the two-stage Faster R-CNN and the one-stage FCOS detector, along with the details of our semi-supervised framework. Lastly, we present the evaluation metrics and describe the experimental setups.</p>
<sec id="s2_1">
<label>2.1</label>
<title>Weed datasets</title>
<p>To assess the performance and efficacy of our semi-supervised framework, we conducted evaluations on two publicly available weed datasets tailored specifically to the U.S. cotton production systems: CottonWeedDet3 (<xref ref-type="bibr" rid="B49">Rahman et&#xa0;al., 2023</xref>) and CottonWeedDet12 (<xref ref-type="bibr" rid="B14">Dang et&#xa0;al., 2023</xref>).</p>
<p>CottonWeedDet3<xref ref-type="fn" rid="fn2">
<sup>2</sup>
</xref> (<xref ref-type="bibr" rid="B49">Rahman et&#xa0;al., 2023</xref>) comprises 848 high-resolution images (4442 &#xd7; 4335 pixels) annotated with 1532 bounding boxes. It contains three distinct classes of weeds commonly found in southern U.S. cotton fields, primarily in North Carolina and Mississippi. These images include three types of weeds: carpetweed (<italic>mollugo verticillata</italic>), morning glory (<italic>ipomoea genus</italic>), and palmer amaranth (<italic>amaranthus palmeri</italic>). For adaptability, the annotations for each image were saved in both YOLO and COCO formats. Notably, around 99% of the images contain less than 10 bounding boxes, with only a small portion (9 out of the 848 images) containing a more substantial quantity of bounding boxes, even up to 93 in some cases. Additionally, carpetweed is the most frequently annotated, while palmer amaranth is the least. Visual examples of the three-class weed images can be found in <xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref>.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Weed samples in the CottonWeedDet3 dataset (<xref ref-type="bibr" rid="B49">Rahman et&#xa0;al., 2023</xref>). Each column represents the image samples for one weed class.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1396568-g001.tif"/>
</fig>
<p>CottonWeedDet12 dataset<xref ref-type="fn" rid="fn3">
<sup>3</sup>
</xref> (<xref ref-type="bibr" rid="B14">Dang et&#xa0;al., 2023</xref>) contains 5648 images of 12 weed classes, annotated with a total of 9370 bounding boxes (saved in both YOLO and COCO formats). These images, with a resolution exceeding 10 megapixels, were captured under natural lighting conditions and across various weed growth stages in cotton fields. Each weed class is represented by more than 140 bounding boxes. Moreover, waterhemp and morning glory have the highest number of bounding boxes while goose grass and cutleaf ground cherry have the least. In terms of image volume, the CottonWeedDet12 dataset surpasses the CottonWeedDet3 dataset (<xref ref-type="bibr" rid="B49">Rahman et&#xa0;al., 2023</xref>) by more than tenfold. Moreover, it represents the most extensive public dataset currently available for weed detection in cotton production systems. <xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref> shows sample annotated images where a single weed class in each image is present, despite that each image may include multiple weed classes in the dataset.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Weed samples in the CottonWeedDet12 dataset (<xref ref-type="bibr" rid="B14">Dang et&#xa0;al., 2023</xref>).</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1396568-g002.tif"/>
</fig>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>DL-based object detectors</title>
<p>DL-based object detectors are typically structured around two primary components: a backbone and a detection head (<xref ref-type="bibr" rid="B6">Bochkovskiy et&#xa0;al., 2020</xref>). The backbone is responsible for extracting features from high-dimensional inputs and is commonly pre-trained on ImageNet data (<xref ref-type="bibr" rid="B16">Deng et&#xa0;al., 2009</xref>). Conversely, the head is leveraged to predict the classes and bounding boxes of objects. Existing detectors consist of anchor-based detectors (<xref ref-type="bibr" rid="B52">Ren et&#xa0;al., 2015</xref>; <xref ref-type="bibr" rid="B7">Cai et&#xa0;al., 2016</xref>; <xref ref-type="bibr" rid="B32">Lin et&#xa0;al., 2017</xref>) and anchor-free detectors (<xref ref-type="bibr" rid="B27">Law and Deng, 2018</xref>; <xref ref-type="bibr" rid="B60">Tian et al., 2022</xref>; <xref ref-type="bibr" rid="B74">Zhou et&#xa0;al., 2019</xref>). Anchor-based detectors utilize pre-defined anchor boxes, adjusting them for position shifts and scaling to align with the ground-truth boxes, primarily based on their intersection-over-union (IoU) scores. Conversely, the pre-defined anchor boxes are discarded in the detection head for the anchor-free object detection models.</p>
<sec id="s2_2_1">
<label>2.2.1</label>
<title>Anchor-based detectors</title>
<p>Anchor-based object detectors utilize pre-defined anchor boxes to efficiently localize and classify objects in images, being a representative approach in object detection methodologies. These methods have led to significant advancements and impressive outcomes in object detection (<xref ref-type="bibr" rid="B52">Ren et&#xa0;al., 2015</xref>; <xref ref-type="bibr" rid="B7">Cai et&#xa0;al., 2016</xref>; <xref ref-type="bibr" rid="B32">Lin et&#xa0;al., 2017</xref>). The most notable embodiment of this framework is Faster-RCNN (<xref ref-type="bibr" rid="B52">Ren et&#xa0;al., 2015</xref>), which was built upon the earlier Fast RCNN model (<xref ref-type="bibr" rid="B22">Girshick, 2015</xref>). Deviating from the selective search methods utilized in Fast RCNN, Faster RCNN employs CNNs to generate region proposals via an efficient Region Proposal Network (RPN). The features from the final shared convolutional layer are then harnessed for both RPN&#x2019;s region proposal task and Fast RCNN&#x2019;s region classification task. In this study, we use Faster RCNN as one of the detectors in our semi-supervised framework.</p>
</sec>
<sec id="s2_2_2">
<label>2.2.2</label>
<title>Anchor-free detectors</title>
<p>While anchor-based detectors have demonstrated impressive outcomes, their application to novel datasets necessitates expertise in tuning hyperparameters (<xref ref-type="bibr" rid="B25">Jiao et&#xa0;al., 2019</xref>) associated with anchor boxes. This constraint limits the adaptability of these detectors to new datasets or environments (<xref ref-type="bibr" rid="B71">Zhang et&#xa0;al., 2020</xref>). Furthermore, anchor-based approaches are often proved to be computationally expensive for current mobile/edge devices used in agricultural applications, which typically have constrained storage and computational capacity. Alternatively, these limitations are addressed in anchor-free detectors by getting rid of the need for pre-defined anchor boxes in detection models. These methods can directly predict class probabilities and bounding box offsets from full images using a single feed-forward CNN without necessitating the generation of region proposals or subsequent classification/feature resampling, thereby encapsulating all computation within a single network (<xref ref-type="bibr" rid="B34">Liu et&#xa0;al., 2020</xref>). YOLO (<xref ref-type="bibr" rid="B51">Redmon et&#xa0;al., 2016</xref>), one of the most representative one-stage detectors, transforms the task of object detection into a regression problem by directly mapping image pixels to spatially separated bounding boxes and corresponding class probabilities. YOLO is designed for speed, capable of operating in real-time at 45 frames per second (FPS) by eliminating the region proposal generation process. On the other hand, FCOS (<xref ref-type="bibr" rid="B60">Tian et al., 2022</xref>) is an anchor box-free and proposal-free one-stage object detector. By eliminating the anchor box designs, FCOS avoids the complicated computation related to anchor boxes such as calculating overlapping during training and all hyper-parameters related to anchor boxes. In this study, FCOS serves as one of our base object detection models, chosen for its accessibility and extensive adoption within the field as evidenced by previous research (<xref ref-type="bibr" rid="B71">Zhang et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B30">Li et&#xa0;al., 2021</xref>).</p>
</sec>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>Semi-supervised Learning</title>
<p>Semi-supervised learning, a form of label-efficient learning, leverages unlabeled samples to augment the learning process (<xref ref-type="bibr" rid="B61">Van Engelen and Hoos, 2020</xref>; <xref ref-type="bibr" rid="B29">Li et&#xa0;al., 2023</xref>). Most existing semi-supervised learning works (<xref ref-type="bibr" rid="B58">Tarvainen and Valpola, 2017</xref>; <xref ref-type="bibr" rid="B5">Berthelot et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B65">Xie et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B54">Sohn et&#xa0;al., 2020a</xref>; <xref ref-type="bibr" rid="B66">Xu et&#xa0;al., 2021</xref>) can be categorized into consistency regularization where the prediction is consistent with different perturbations, and self-training that involves an iterative update process.</p>
<p>The teacher-student framework is one of the mainstream ways for semi-supervised object detection (<xref ref-type="bibr" rid="B54">Sohn et&#xa0;al., 2020a</xref>; <xref ref-type="bibr" rid="B66">Xu et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B38">Liu et&#xa0;al., 2021b</xref>; <xref ref-type="bibr" rid="B28">Li et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B9">Chen et&#xa0;al., 2022a</xref>) using the self-training approach, which is illustrated in <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3</bold>
</xref>. Initially, a &#x201c;teacher&#x201d; model is trained on the labeled samples using supervised learning. This trained &#x201c;teacher&#x201d; model is duplicated into a &#x201c;student&#x201d; model and employed to generate pseudo-labels for the unlabeled samples. Subsequently, a mixture of the most confidently selected pseudo-labeled samples and the original labeled samples are utilized to train a &#x201c;student&#x201d; model. Subsequently, the &#x201c;teacher&#x201d; model is updated with the &#x201c;student&#x201d; model using an Estimated Moving Average (EMA) strategy (<xref ref-type="bibr" rid="B58">Tarvainen and Valpola, 2017</xref>) according to the <xref ref-type="disp-formula" rid="eq1">Equation 1</xref>:</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Pipeline of the proposed semi-supervised weed detection framework.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1396568-g003.tif"/>
</fig>
<disp-formula id="eq1">
<label>(1)</label>
<mml:math display="block" id="M1">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mrow>
<mml:mtext>teacher</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mi>&#x3b1;</mml:mi>
<mml:mo>&#xb7;</mml:mo>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mrow>
<mml:mtext>teacher</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#xb7;</mml:mo>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mrow>
<mml:mtext>student</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <italic>&#x3b8;</italic>
<sub>teacher</sub> and <italic>&#x3b8;</italic>
<sub>student</sub> represent the parameters of the &#x201c;teacher&#x201d; and &#x201c;student&#x201d; models, respectively. The factor <italic>&#x3b1;</italic> determines the extent of the update. An <italic>&#x3b1;</italic> of 1 retains the original &#x201c;teacher&#x201d; model parameters, while an <italic>&#x3b1;</italic> of 0 fully replaces the &#x201c;teacher&#x201d; model with the &#x201c;student&#x201d; model. In this study, we use cross-validations and find that <italic>&#x3b1;</italic> = 0.99 is the optimal choice for the designed semi-supervised learning framework. The EMA strategy serves as a crucial mechanism to reduce variance (<xref ref-type="bibr" rid="B58">Tarvainen and Valpola, 2017</xref>). We apply weak augmentation approaches (e.g., horizontal flip, multi-scale training with a shorter size range [400, 1200], and scale jittering) to the student learning process and strong augmentation methods (e.g., randomly added gray scale, Gaussian blur, cutout patches (<xref ref-type="bibr" rid="B17">DeVries and Taylor, 2017</xref>)) to the teacher learning processes, respectively, to enhance the performance during training process (<xref ref-type="bibr" rid="B65">Xie et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B66">Xu et&#xa0;al., 2021</xref>). <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3</bold>
</xref> provides a visual representation of the described process.</p>
<p>This iterative process (steps 1-3) is repeated until the model achieves satisfactory performance. Upon completion of the model training, the &#x201c;student&#x201d; model is discarded, and only the &#x201c;teacher&#x201d; model is retained for inference. The versatility of self-training methods allows them to be integrated with any supervised learning-based approach, including one-stage and two-stage object detectors. In this study, we employ a self-training-based semi-supervised learning framework and assess two representative object detectors, Faster RCNN (<xref ref-type="bibr" rid="B52">Ren et&#xa0;al., 2015</xref>) and FCOS (<xref ref-type="bibr" rid="B60">Tian et al., 2022</xref>).</p>
<sec id="s2_3_1">
<label>2.3.1</label>
<title>Pseudo-labeling on detectors</title>
<p>It is important to obtain the most confident and accurate pseudo-labels in semi-supervised learning. Published works (<xref ref-type="bibr" rid="B55">Sohn et&#xa0;al., 2020b</xref>; <xref ref-type="bibr" rid="B72">Zhou et&#xa0;al., 2021a</xref>; <xref ref-type="bibr" rid="B38">Liu et&#xa0;al., 2021b</xref>) exploit the pseudo-labeling method to address semi-supervised object detection, and the majority of them concentrated on anchor-based detectors. Our focus, however, lies in introducing the generalization approach for both anchor-free and anchor-based detectors, drawing inspiration from (<xref ref-type="bibr" rid="B38">Liu et&#xa0;al., 2021b</xref>, <xref ref-type="bibr" rid="B39">2022</xref>).</p>
<p>We take the widely used FCOS model (<xref ref-type="bibr" rid="B60">Tian et al., 2022</xref>) as an example to demonstrate the semisupervised object detection tasks. FCOS comprises three prediction branches, classifier, centerness, and regressor, where the centerness score/branch dominates the bounding boxes score. However, the reliability of centerness scores in distinguishing foreground instances is questionable, particularly under conditions of limited label availability, as there is no supervision mechanism to suppress the centerness score for background instances within the centerness branch (<xref ref-type="bibr" rid="B31">Li et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B39">Liu et&#xa0;al., 2022</xref>). Consequently, although the centerness branch improves the anchor-free detector performance for the supervised training, it proves ineffective or even counterproductive for semi-supervised training scenarios (<xref ref-type="bibr" rid="B31">Li et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B39">Liu et&#xa0;al., 2022</xref>). To address this issue, our approach prioritizes pseudo-boxes based solely on classification scores (<xref ref-type="bibr" rid="B39">Liu et&#xa0;al., 2022</xref>). The classifier is trained with the hard labels (i.e., one-hot vector) with the box localization weighting. Finally, we use the standard label assignment method instead of center-sampling, which designates all elements within the bounding boxes as foreground and everything outside as background.</p>
</sec>
<sec id="s2_3_2">
<label>2.3.2</label>
<title>Unsupervised regression loss</title>
<p>Confidence thresholding has proven effective in prior studies (<xref ref-type="bibr" rid="B58">Tarvainen and Valpola, 2017</xref>; <xref ref-type="bibr" rid="B55">Sohn et&#xa0;al., 2020b</xref>; <xref ref-type="bibr" rid="B38">Liu et&#xa0;al., 2021b</xref>). However, depending solely on box confidence is insufficient for effectively eliminating misleading instances in box regression, since the &#x201c;teacher&#x201d; may still provide a contradictory regression to the ground-truth direction (<xref ref-type="bibr" rid="B11">Chen et&#xa0;al., 2017</xref>; <xref ref-type="bibr" rid="B53">Saputra et&#xa0;al., 2019</xref>). To address this challenge, we categorize the pseudo-labels into two groups: beneficial instances and misleading instances. We then leverage the relative prediction information between the &#x201c;student&#x201d; and the &#x201c;teacher&#x201d; to identify beneficial instances and filter out misleading ones during the training of the regression branch. We define the unsupervised regression loss by selecting beneficial instances where the &#x201c;teacher&#x201d; exhibits lower localization uncertainty than the Student by a margin of <italic>&#x3c3;</italic>, as shown in the <xref ref-type="disp-formula" rid="eq2">Equation 2</xref>:</p>
<disp-formula id="eq2">
<label>(2)</label>
<mml:math display="block" id="M2">
<mml:mrow>
<mml:msubsup>
<mml:mi>L</mml:mi>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>g</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:mtable>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msub>
<mml:mo>&#x2211;</mml:mo>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mrow>
<mml:mo>|</mml:mo>
<mml:mo>|</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mover accent="true">
<mml:mi>d</mml:mi>
<mml:mo>&#x2dc;</mml:mo>
</mml:mover>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
</mml:msubsup>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mover accent="true">
<mml:mi>d</mml:mi>
<mml:mo>&#x2dc;</mml:mo>
</mml:mover>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo>|</mml:mo>
<mml:mo>|</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mstyle>
<mml:mo>,</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mtext>if</mml:mtext>
<mml:mo>&#xa0;</mml:mo>
<mml:msubsup>
<mml:mi>&#x3b4;</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
</mml:msubsup>
<mml:mo>+</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
<mml:mtext>&#xa0;</mml:mtext>
<mml:mo>&#x2264;</mml:mo>
<mml:mtext>&#xa0;</mml:mtext>
<mml:msubsup>
<mml:mi>&#x3b4;</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mo>&#xa0;</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:mtext>otherwise</mml:mtext>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>The parameter <italic>&#x3c3;</italic> &#x2267; 0 represents a margin between the localization uncertainties of the &#x201c;teacher&#x201d; and the &#x201c;student&#x201d;, where the localization uncertainty is loosely associated with the deviation from the ground-truth labels. Specifically, <inline-formula>
<mml:math display="inline" id="im1">
<mml:mrow>
<mml:msubsup>
<mml:mi>&#x3b4;</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> represents the teacher&#x2019;s localization uncertainty, while <inline-formula>
<mml:math display="inline" id="im2">
<mml:mrow>
<mml:msubsup>
<mml:mi>&#x3b4;</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> represents the student&#x2019;s localization uncertainty. Furthermore, <inline-formula>
<mml:math display="inline" id="im3">
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:msubsup>
<mml:mi>d</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="true">&#x2dc;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula>
<mml:math display="inline" id="im4">
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:msubsup>
<mml:mi>d</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="true">&#x2dc;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula> are the regression predictions for &#x201c;teacher&#x201d; and &#x201c;student&#x201d;, respectively. For more details of the design for the unsupervised regression loss, please refer to <xref ref-type="bibr" rid="B39">Liu et&#xa0;al. (2022)</xref>.</p>
</sec>
</sec>
<sec id="s2_4">
<label>2.4</label>
<title>Performance evaluation metrics</title>
<p>In this evaluation, we rely on Average Precision (AP) as a primary metric, a measure derived from precision (P) and recall (R). AP summarizes the P(R) Curve to one scalar value. However, since AP is traditionally evaluated for each object category separately, we employ the mean Average Precision (mAP) metric (<xref ref-type="bibr" rid="B34">Liu et&#xa0;al., 2020</xref>) to provide a comprehensive assessment across all object categories. The mAP is calculated as the average of AP scores over all object categories, and both AP and mAP are determined using the following <xref ref-type="disp-formula" rid="eq3">Equations 3</xref>, <xref ref-type="disp-formula" rid="eq4">4</xref>:</p>
<disp-formula id="eq3">
<label>(3)</label>
<mml:math display="block" id="M3">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>=</mml:mo>
<mml:mstyle displaystyle="true">
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x222b;</mml:mo>
<mml:mn>0</mml:mn>
<mml:mn>1</mml:mn>
</mml:msubsup>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>R</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>R</mml:mi>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mstyle>
</mml:mrow>
</mml:math>
</disp-formula>
<disp-formula id="eq4">
<label>(4)</label>
<mml:math display="block" id="M4">
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>A</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mi>n</mml:mi>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:munderover>
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:msub>
<mml:mi>P</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mstyle>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <italic>n</italic> represents the number of weed classes, and mAP signifies the average AP across these classes. A higher area under the Precision-Recall (PR) curve indicates improved object detection accuracy. Moreover, we consider mAP@[0.5:0.95], reflecting the mean average precision across IoU thresholds ranging from 0.5 to 0.95. These metrics collectively offer a representative evaluation of the model&#x2019;s performance across varying detection thresholds, ensuring a comprehensive understanding of its object detection capabilities.</p>
</sec>
<sec id="s2_5">
<label>2.5</label>
<title>Experimental setups</title>
<p>In the process of model development and evaluation, the cotton weed dataset was partitioned into three subsets randomly. Specifically, for a comprehensive evaluation, the CottonWeedDet3 dataset was randomly partitioned into training, validation, and testing sets following a ratio of 65%, 20%, and 15%, resulting in subsets comprising 550, 170, and 128 images. Similarly, the CottonWeedDet12 dataset was also divided into training, validation, and testing subsets, with a distribution ratio of 65%, 20%, and 15%, respectively. This results in subsets comprising 3670, 1130, and 848 images. The validation set is used to select the optimal trained model, while the test set is utilized to evaluate the model&#x2019;s performance.</p>
<p>To expedite the model training process, we leveraged transfer learning (<xref ref-type="bibr" rid="B75">Zhuang et&#xa0;al., 2020</xref>) for all object detectors backbone, fine-tuning them with pre-trained weights obtained from the ImageNet dataset (<xref ref-type="bibr" rid="B16">Deng et&#xa0;al., 2009</xref>). The model was implemented based on Detectron2 (<xref ref-type="bibr" rid="B63">Wu et&#xa0;al., 2019</xref>). All models underwent training for 80k iterations, a duration deemed sufficient for effective modeling of the weed data. Stochastic Gradient Descent (SGD) was adopted as the optimizer, maintaining a momentum of 0.9 throughout the training process. The learning rate was selected as 0.01, and each batch contains 4 labeled images and 4 unlabeled images. We adopted the weak augmentation (horizontal flip, multi-scale training with a shorter size range [400, 1200] and scale jittering) for the &#x201c;student&#x201d;, and randomly add gray scale, Gasussian blur, cutout patches (<xref ref-type="bibr" rid="B17">DeVries and Taylor, 2017</xref>), and color jittering as the strong augmentation for the &#x201c;teacher&#x201d;. The computational setup included a server running Ubuntu 20.04, equipped with two Geforce RTX 2080Ti GPUs, each with 12 GB of memory, ensuring efficient model training and testing.</p>
</sec>
</sec>
<sec id="s3" sec-type="results">
<label>3</label>
<title>Results</title>
<p>In this section, we first evaluate the performance of various object detectors within the context of a semisupervised learning framework. Subsequently, we will delve into a detailed analysis of the performance exhibited by individual weed classes.</p>
<sec id="s3_1">
<label>3.1</label>
<title>Semi-supervised object detector comparison</title>
<p>
<xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4</bold>
</xref> illustrates the training curves for FCOS and Faster RCNN, utilizing various proportions of labeled samples on the two cotton weed datasets: CottonWeedDet3 and CottonWeedDet12. We evaluated each algorithm in both supervised and semi-supervised learning contexts. For example, the configuration represented as Faster RCNN-sup-5% refers to the Faster RCNN trained with supervised learning using 5% of labeled samples. Conversely, Faster RCNN-semi-5% is the same detector trained with semi-supervised learning using 5% of the labeled samples and 95% of the unlabeled samples.</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Training curves for FCOS and Faster RCNN with different proportions of labeled samples for two cotton weed datasets: CottonWeedDet3 and CottonWeedDet12. <bold>(A)</bold> Training curves for CottonWeedDet3 dataset <bold>(B)</bold> Training curves for CottonWeedDet12 dataset.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1396568-g004.tif"/>
</fig>
<p>It is evident from the results that semi-supervised learning outperforms its supervised counterparts on both datasets, given the exploitation of a large volume of unlabeled samples to bolster the training process. As an example, Faster RCNN-semi-5% achieves superior training performance compared to Faster RCNNsup-5%. Moreover, it is noteworthy that FCOS-semi-50% manages to attain performance comparable to that of FCOS-100% (where all samples are labeled) on the CottonWeedDet3 dataset. FCOS-semi-50% even surpasses FCOS-100% on the CottonWeedDet12 dataset, suggesting that with only half the labeling effort, we can achieve improved performance, which also showcases that semi-supervised learning can be more robust compared with the supervised learning (<xref ref-type="bibr" rid="B33">Liu et&#xa0;al., 2021a</xref>). Furthermore, CottonWeedDet12 shows significant performance superiority over CottonWeedDet3, largely due to the latter&#x2019;s smaller image dataset and the greater complexity of scenes within each image.</p>
<p>
<xref ref-type="table" rid="T1">
<bold>Tables&#xa0;1</bold>
</xref>, <xref ref-type="table" rid="T2">
<bold>2</bold>
</xref> summarize the test performance (measured by mAP@[0.5:0.95]) comparison between the supervised and semi-supervised learning approaches based on the Faster-RCNN and FCOS models on the CottonWeedDet3 and CottonWeedDet12 datasets, respectively. Across both datasets, FCOS consistently outperforms Faster-RCNN in both the semi-supervised and supervised learning contexts. These findings are in agreement with the observations drawn from the training curves illustrated in <xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4</bold>
</xref>. For any given proportion of labeled samples, the semi-supervised learning approaches are found to enhance the test performance. For instance, on the CottonWeedDet3 dataset, the Faster RCNN model using a semisupervised learning approach attains 86.70% and 93.73% of the performance of its supervised approach with only 20% and 50% of the samples labeled, respectively. Furthermore, it is worth highlighting that on the CottonWeedDet12 dataset, the FCOS model trained using semi-supervised learning with only 50% of labeled samples outperforms the test performance of the fully supervised approach, which uses 100% of the samples manually labeled. That is because semi-supervised learning can effectively leverage the vast amount of unlabeled samples, which may capture the inherent distribution of the data better than a limited set of labeled samples.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Testing performance (mAP@[0.5:0.95]) comparison between the supervised and semi-supervised based on Faster-RCNN and FCOS models on the CottonWeedDet3 dataset.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" rowspan="2" align="center">Algorithms</th>
<th valign="middle" rowspan="2" align="center">Supervision type</th>
<th valign="top" colspan="5" align="center">Proportion of labeled data for training</th>
</tr>
<tr>
<th valign="top" align="center">5%</th>
<th valign="top" align="center">10%</th>
<th valign="top" align="center">20%</th>
<th valign="top" align="center">50%</th>
<th valign="top" align="center">100%</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="2" align="center">Faster-RCNN</td>
<td valign="top" align="center">Supervised</td>
<td valign="top" align="center">21.14</td>
<td valign="top" align="center">31.90</td>
<td valign="top" align="center">42.65</td>
<td valign="top" align="center">50.51</td>
<td valign="middle" rowspan="2" align="center">56.75</td>
</tr>
<tr>
<td valign="top" align="center">Semi-supervised</td>
<td valign="top" align="center">29.33</td>
<td valign="top" align="center">40.14</td>
<td valign="top" align="center">49.20</td>
<td valign="top" align="center">53.19</td>
</tr>
<tr>
<td valign="middle" rowspan="2" align="center">FCOS</td>
<td valign="top" align="center">Supervised</td>
<td valign="top" align="center">27.37</td>
<td valign="top" align="center">42.20</td>
<td valign="top" align="center">52.42</td>
<td valign="top" align="center">59.84</td>
<td valign="middle" rowspan="2" align="center">62.80</td>
</tr>
<tr>
<td valign="top" align="center">Semi-supervised</td>
<td valign="top" align="center">38.17</td>
<td valign="top" align="center">47.93</td>
<td valign="top" align="center">55.79</td>
<td valign="top" align="center">61.32</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Testing performance (mAP@[0.5:0.95]) comparison between the supervised and semi-supervised based on Faster RCNN and FCOS models and CottonWeedDet12 dataset.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" rowspan="2" align="center">Algorithms</th>
<th valign="middle" rowspan="2" align="center">Supervision type</th>
<th valign="top" colspan="5" align="center">Proportion of labeled data for training</th>
</tr>
<tr>
<th valign="top" align="center">5%</th>
<th valign="top" align="center">10%</th>
<th valign="top" align="center">20%</th>
<th valign="top" align="center">50%</th>
<th valign="top" align="center">100%</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="2" align="center">Faster-RCNN</td>
<td valign="top" align="center">Supervised</td>
<td valign="top" align="center">45.02</td>
<td valign="top" align="center">61.18</td>
<td valign="top" align="center">68.29</td>
<td valign="top" align="center">75.97</td>
<td valign="middle" rowspan="2" align="center">80.47</td>
</tr>
<tr>
<td valign="top" align="center">Semi-supervised</td>
<td valign="top" align="center">53.08</td>
<td valign="top" align="center">70.21</td>
<td valign="top" align="center">75.15</td>
<td valign="top" align="center">78.83</td>
</tr>
<tr>
<td valign="middle" rowspan="2" align="center">FCOS</td>
<td valign="top" align="center">Supervised</td>
<td valign="top" align="center">62.28</td>
<td valign="top" align="center">72.99</td>
<td valign="top" align="center">79.14</td>
<td valign="top" align="center">83.87</td>
<td valign="middle" rowspan="2" align="center">86.47</td>
</tr>
<tr>
<td valign="top" align="center">Semi-supervised</td>
<td valign="top" align="center">76.91</td>
<td valign="top" align="center">83.43</td>
<td valign="top" align="center">85.28</td>
<td valign="top" align="center">87.26</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>
<xref ref-type="fig" rid="f5">
<bold>Figures&#xa0;5</bold>
</xref>, <xref ref-type="fig" rid="f6">
<bold>6</bold>
</xref> show selected images predicted using both supervised and semi-supervised FCOS for CottonWeedDet3 and CottonWeedDet12, respectively. In both figures, only 5% and 10% of labeled samples are utilized for training. Remarkably, the semi-supervised FOCS exhibits visually compelling predictions, especially for images featuring diverse and/or cluttered backgrounds, as well as those with densely populated weed instances. Notably, the semi-supervised learning approach demonstrates superior performance compared to the supervised learning approach. For instance, in <xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5</bold>
</xref>, the semi-supervised FOCS with 5% labeled samples produces better predictions than the supervised learning approach with only 5% labeled samples. This underscores the ability of semi-supervised learning to leverage valuable information from a large volume of unlabeled data.</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>Examples of images annotated with ground truth labels <bold>(A)</bold> and predicted labels <bold>(B)</bold> using semi-supervised FOCS for CottonWeedDet3.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1396568-g005.tif"/>
</fig>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>Comparing method results on CottonWeedDet12: <bold>(A, C)</bold> - supervised baseline, <bold>(B, D)</bold> semi-supervised FCOS.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1396568-g006.tif"/>
</fig>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Class-specific performance</title>
<p>
<xref ref-type="table" rid="T3">
<bold>Tables&#xa0;3</bold>
</xref>, <xref ref-type="table" rid="T4">
<bold>4</bold>
</xref> present the class-specific performance of the FCOS model on the CottonWeedDet3 and CottonWeedDet12 datasets, respectively. The instance count reflects the number of bounding boxes associated with each weed category within the test images. It is evident that the CottonWeedDet12 dataset exhibits a considerable imbalance, as indicated by the significantly uneven distribution of instances across various weed classes.</p>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Test performance (mAP@[0.5:0.95]) on a specific category of weeds on CottonWeedDet3.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" rowspan="2" align="left">Weeds</th>
<th valign="top" rowspan="2" align="center"># of instances</th>
<th valign="top" colspan="5" align="center">Proportion of labeled data for training</th>
</tr>
<tr>
<th valign="top" align="center">5%</th>
<th valign="top" align="center">10%</th>
<th valign="top" align="center">20%</th>
<th valign="top" align="center">50%</th>
<th valign="top" align="center">100%</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">PalmerAmaranth</td>
<td valign="top" align="center">100</td>
<td valign="top" align="center">48.33</td>
<td valign="top" align="center">52.18</td>
<td valign="top" align="center">60.82</td>
<td valign="top" align="center">63.25</td>
<td valign="top" align="center">62.70</td>
</tr>
<tr>
<td valign="top" align="left">MorningGlory</td>
<td valign="top" align="center">101</td>
<td valign="top" align="center">46.82</td>
<td valign="top" align="center">55.47</td>
<td valign="top" align="center">63.56</td>
<td valign="top" align="center">65.97</td>
<td valign="top" align="center">70.83</td>
</tr>
<tr>
<td valign="top" align="left">Carpetweed</td>
<td valign="top" align="center">93</td>
<td valign="top" align="center">19.38</td>
<td valign="top" align="center">36.14</td>
<td valign="top" align="center">42.99</td>
<td valign="top" align="center">54.73</td>
<td valign="top" align="center">54.87</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T4" position="float">
<label>Table&#xa0;4</label>
<caption>
<p>Test performance (mAP@[0.5:0.95]) on the specific category of weeds on CottonWeedDet12.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" rowspan="2" align="left">Weeds</th>
<th valign="top" rowspan="2" align="center"># of instances</th>
<th valign="top" colspan="5" align="center">Proportion of labeled data for training</th>
</tr>
<tr>
<th valign="top" align="center">5%</th>
<th valign="top" align="center">10%</th>
<th valign="top" align="center">20%</th>
<th valign="top" align="center">50%</th>
<th valign="top" align="center">100%</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Waterhemp</td>
<td valign="top" align="center">352</td>
<td valign="top" align="center">85.25</td>
<td valign="top" align="center">86</td>
<td valign="top" align="center">88.52</td>
<td valign="top" align="center">89.78</td>
<td valign="top" align="center">88.10</td>
</tr>
<tr>
<td valign="top" align="left">MorningGlory</td>
<td valign="top" align="center">201</td>
<td valign="top" align="center">83.82</td>
<td valign="top" align="center">85.75</td>
<td valign="top" align="center">87.40</td>
<td valign="top" align="center">89.71</td>
<td valign="top" align="center">88.03</td>
</tr>
<tr>
<td valign="top" align="left">Purslane</td>
<td valign="top" align="center">161</td>
<td valign="top" align="center">74.89</td>
<td valign="top" align="center">78.58</td>
<td valign="top" align="center">80.10</td>
<td valign="top" align="center">81.40</td>
<td valign="top" align="center">83.68</td>
</tr>
<tr>
<td valign="top" align="left">SpottedSpurge</td>
<td valign="top" align="center">122</td>
<td valign="top" align="center">77.12</td>
<td valign="top" align="center">81.49</td>
<td valign="top" align="center">83.59</td>
<td valign="top" align="center">85.79</td>
<td valign="top" align="center">82.78</td>
</tr>
<tr>
<td valign="top" align="left">Carpetweed</td>
<td valign="top" align="center">137</td>
<td valign="top" align="center">63.04</td>
<td valign="top" align="center">69.62</td>
<td valign="top" align="center">68.64</td>
<td valign="top" align="center">71.34</td>
<td valign="top" align="center">68.18</td>
</tr>
<tr>
<td valign="top" align="left">Ragweed</td>
<td valign="top" align="center">144</td>
<td valign="top" align="center">78.08</td>
<td valign="top" align="center">78.26</td>
<td valign="top" align="center">81.88</td>
<td valign="top" align="center">83.11</td>
<td valign="top" align="center">81.83</td>
</tr>
<tr>
<td valign="top" align="left">Eclipta</td>
<td valign="top" align="center">117</td>
<td valign="top" align="center">90.28</td>
<td valign="top" align="center">90.69</td>
<td valign="top" align="center">91.34</td>
<td valign="top" align="center">93.58</td>
<td valign="top" align="center">95.19</td>
</tr>
<tr>
<td valign="top" align="left">PricklySida</td>
<td valign="top" align="center">60</td>
<td valign="top" align="center">78.26</td>
<td valign="top" align="center">82.91</td>
<td valign="top" align="center">83.75</td>
<td valign="top" align="center">83.48</td>
<td valign="top" align="center">84.01</td>
</tr>
<tr>
<td valign="top" align="left">PalmerAmaranth</td>
<td valign="top" align="center">42</td>
<td valign="top" align="center">86.76</td>
<td valign="top" align="center">89.09</td>
<td valign="top" align="center">87.82</td>
<td valign="top" align="center">91.31</td>
<td valign="top" align="center">93.55</td>
</tr>
<tr>
<td valign="top" align="left">Sicklepod</td>
<td valign="top" align="center">31</td>
<td valign="top" align="center">94.27</td>
<td valign="top" align="center">96.56</td>
<td valign="top" align="center">97.09</td>
<td valign="top" align="center">97.01</td>
<td valign="top" align="center">96.43</td>
</tr>
<tr>
<td valign="top" align="left">Goosegrass</td>
<td valign="top" align="center">31</td>
<td valign="top" align="center">78.83</td>
<td valign="top" align="center">81.69</td>
<td valign="top" align="center">85.58</td>
<td valign="top" align="center">90.02</td>
<td valign="top" align="center">85.31</td>
</tr>
<tr>
<td valign="top" align="left">CutleafGroundcherry</td>
<td valign="top" align="center">15</td>
<td valign="top" align="center">32.33</td>
<td valign="top" align="center">80.59</td>
<td valign="top" align="center">87.70</td>
<td valign="top" align="center">90.59</td>
<td valign="top" align="center">90.50</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>On the CottonWeedDet3 dataset, the semi-supervised learning approaches demonstrate promising performance. Notably, the semi-supervised model trained with 50% of the labeled samples surpasses the performance of the fully supervised learning model, particularly for palmer amaranth weeds. However, the detection accuracy for carpetweed remains relatively low, attributed to its small size which poses an inherent challenge for recognition. A similar trend is observed in the performance metrics presented in <xref ref-type="table" rid="T4">
<bold>Table&#xa0;4</bold>
</xref> for the CottonWeedDet12 dataset.</p>
<p>Remarkably, on the CottonWeedDet12 dataset, the semi-supervised FCOS model trained with 50% and 20% of labeled samples outperforms the fully supervised model for 8 out of 12 and 6 out of 12 weed classes, respectively. Impressively, for the top 3 minority weed classes &#x2014; cutleaf groundcherry, goosegrass, and sicklepod &#x2014; the FCOS model delivers superior performance even with only 50% of the labeling costs compared to the supervised learning approach. This underscores the potential of semi-supervised learning models to effectively address class imbalance and provide superior performance even with fewer labeled samples.</p>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Comparative analysis: semi-supervised learning vs. ground truth inaccuracies</title>
<p>In the preceding discussions, we demonstrate the remarkable performance improvement achieved by semi-supervised learning, even with a limited number of labeled samples, surpassing the results of traditional supervised learning approaches. In <xref ref-type="fig" rid="f7">
<bold>Figure&#xa0;7</bold>
</xref>, we present image samples from CottonWeedDet12, showcasing both ground truth annotations and the predicted results obtained through the semi-supervised FCOS-10%. Notably, a discernible observation is the presence of inaccuracies and mislabels in the ground truth annotations, highlighting the challenges associated with manual labeling by human experts, including instances of noise and incorrect labels. The application of a semi-supervised learning approach demonstrates to be a potent solution in mitigating the above challenges, and effectively enhancing accuracy and rectifying ground truth inaccuracies.</p>
<fig id="f7" position="float">
<label>Figure&#xa0;7</label>
<caption>
<p>Image samples from CottonWeedDet12 with ground truth annotations <bold>(A)</bold> and predicted results with semi-supervised FCOS-10% <bold>(B)</bold>.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-15-1396568-g007.tif"/>
</fig>
</sec>
</sec>
<sec id="s4" sec-type="discussion">
<label>4</label>
<title>Discussions</title>
<sec id="s4_1">
<label>4.1</label>
<title>Key contributions</title>
<p>The field of multi-class weed detection and localization remains largely unexplored in the existing literature (<xref ref-type="bibr" rid="B14">Dang et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B50">Rai et&#xa0;al., 2023</xref>). In the transition to the next-generation machine vision-based weeding systems, the focus is progressively shifting towards attaining higher precision and instituting weed-specific controls. Concurrently, the capability to differentiate between various weed species and identify individual weed instances emerges as an increasingly critical requirement within these vision tasks. While significant progress has been made in the development of DL-based weed detection (<xref ref-type="bibr" rid="B18">dos Santos Ferreira et&#xa0;al., 2017</xref>; <xref ref-type="bibr" rid="B62">Wang et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B64">Wu et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B15">Dang et&#xa0;al., 2022</xref>, <xref ref-type="bibr" rid="B14">2023</xref>), these approaches typically rely heavily on expansive and manually-labeled image datasets, which makes these processes costly, prone to human error, and laboriously time-consuming. In our previous review on label-efficient learning in agriculture (<xref ref-type="bibr" rid="B29">Li et&#xa0;al., 2023</xref>), we presented various techniques aiming at reducing labeling costs and their respective applications in agricultural applications, including crop and weed management. Nevertheless, label-efficient technologies remain largely unexplored in the field of multi-class weed detection and localization. In this regard, this study stands as a unique contribution to the research community, specifically in the area of weed detection and control. By implementing semi-supervised learning, we introduce an innovative approach to alleviate the burden of labor-intensive labeling costs. Our evaluation includes both one-stage and two-stage object detectors on two open-source weed datasets, demonstrating that semi-supervised learning can significantly reduce labeling costs without substantially compromising performance. Additionally, it can even generate enhanced performance metrics.</p>
<p>The results of this study have positive implications for the use of phytosanitary products and precision agriculture. By improving the efficiency and accuracy of weed detection and localization, our approach can contribute to more targeted and effective use of phytosanitary products, thereby enhancing overall agricultural productivity and sustainability.</p>
</sec>
<sec id="s4_2">
<label>4.2</label>
<title>Limitations</title>
<p>While this research provides valuable insights, it does acknowledge certain limitations that pave the way for potential future enhancements. Although the primary objective of this research is not to evaluate all DL-based object detectors for weed detection within the semi-supervised learning framework, there are indeed several high-performing object detectors that are not evaluated in this study. These include one-stage detectors such as SSD (<xref ref-type="bibr" rid="B37">Liu et&#xa0;al., 2016</xref>), RetinaNet (<xref ref-type="bibr" rid="B32">Lin et&#xa0;al., 2017</xref>), EfficientDet (<xref ref-type="bibr" rid="B57">Tan et&#xa0;al., 2020</xref>) and YOLO series (<xref ref-type="bibr" rid="B14">Dang et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B59">Terven and Cordova-Esparza, 2023</xref>), as well as two-stage detectors like DINO (<xref ref-type="bibr" rid="B70">Zhang et&#xa0;al., 2022</xref>), CenterNetv2 (<xref ref-type="bibr" rid="B73">Zhou et&#xa0;al., 2021b</xref>), RTMDet (<xref ref-type="bibr" rid="B41">Lyu et&#xa0;al., 2022</xref>), and etc. We intend to test and incorporate these models into our continually updated benchmark as we refine and improve the semi-supervised learning framework through future efforts.</p>
<p>In the scope of this study, we work under the assumption that all unlabeled samples are drawn from the same distribution as the labeled samples. It is important to acknowledge that unlabeled data might include instances from unknown or unseen classes, presenting a challenge commonly known as the open-set challenge (<xref ref-type="bibr" rid="B12">Chen et&#xa0;al., 2020</xref>). This scenario may substantially compromise the efficacy of label-efficient learning. Consequently, we highlight a future investigation to delve into addressing out-ofdistribution (OOD) issues, employing advanced sample-specific selection strategies. The aim is to identify and subsequently downplay the significance or utilization of OOD samples (<xref ref-type="bibr" rid="B23">Guo et&#xa0;al., 2020</xref>). This planned exploration intends to enhance the generalization and robustness of our approach, ensuring its effectiveness in scenarios where the dataset contains samples from classes not encountered during the training phase, thereby contributing to a more resilient and versatile semi-supervised learning framework.</p>
</sec>
</sec>
<sec id="s5" sec-type="conclusion">
<label>5</label>
<title>Conclusion</title>
<p>In this study, we conducted an extensive evaluation of semi-supervised learning in the context of multi-class weed detection. Leveraging a set of labeled data alongside the unlabeled data for model training, our investigation focused on evaluating the efficacy of both one-stage and two-stage object detectors. The two datasets, CottonWeedDet3 and CottonWeedDet12, chosen for our study were meticulously curated to align with U.S. cotton production systems, ensuring the relevance of our findings to real-world agricultural scenarios. By leveraging semi-supervised learning, the&#xa0;labeling costs were significantly reduced, while only minimal impacts on the detection performance were observed. Additionally, by using the abundant unlabeled samples, the semi-supervised learning approach produced a more robust and accurate model, and it demonstrated the capability of mitigating noise and incorrect&#xa0;labels in the ground-truth annotations. The outcomes underscore the potential of semi-supervised learning as a cost-effective and efficient alternative approach for developing agricultural applications, particularly those requiring extensive data annotations.</p>
<p>In our future work, we will refine and improve the semi-supervised learning framework for weed detection by testing and incorporating more high-performing object detectors into our continually updated benchmark. In addition, we will address the open-set challenge, where unlabeled data may include instances from unknown or unseen classes, potentially compromising the efficacy of label-efficient learning. Future investigations will delve into addressing out-of-distribution (OOD) issues by employing advanced sample-specific selection strategies.</p>
</sec>
</body>
<back>
<sec id="s6" sec-type="data-availability">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material. Further inquiries can be directed to the corresponding author.</p>
</sec>
<sec id="s7" sec-type="author-contributions">
<title>Author contributions</title>
<p>JL: Writing &#x2013; review &amp; editing, Writing &#x2013; original draft, Visualization, Validation, Software, Methodology, Investigation, Formal analysis, Conceptualization. DC: Writing &#x2013; original draft, Formal analysis, Conceptualization. XY: Writing &#x2013; review &amp; editing, Investigation. ZL: Writing &#x2013; review &amp; editing, Supervision, Resources.</p>
</sec>
<sec id="s8" sec-type="funding-information">
<title>Funding</title>
<p>The author(s) declare that no financial support was received for the research, authorship, and/or publication of this article.</p>
</sec>
<sec id="s9" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s10" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<fn-group>
<fn id="fn1">
<label>1</label>
<p>
<ext-link ext-link-type="uri" xlink:href="https://github.com/JiajiaLi04/SemiWeeds">https://github.com/JiajiaLi04/SemiWeeds</ext-link>
</p>
</fn>
<fn id="fn2">
<label>2</label>
<p>CottonWeedDet3 dataset: <ext-link ext-link-type="uri" xlink:href="https://www.kaggle.com/datasets/yuzhenlu/cottonweeddet3">https://www.kaggle.com/datasets/yuzhenlu/cottonweeddet3</ext-link>
</p>
</fn>
<fn id="fn3">
<label>3</label>
<p>CottonWeedDet12 dataset: <ext-link ext-link-type="uri" xlink:href="https://zenodo.org/record/7535814">https://zenodo.org/record/7535814</ext-link>
</p>
</fn>
</fn-group>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ahmad</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Muhammad</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Ahmad</surname> <given-names>I.</given-names>
</name>
<name>
<surname>Ahmad</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Smith</surname> <given-names>M. L.</given-names>
</name>
<name>
<surname>Smith</surname> <given-names>L. N.</given-names>
</name>
<etal/>
</person-group>. (<year>2018</year>). <article-title>Visual features based boosted classification of weeds for real-time selective herbicide sprayer systems</article-title>. <source>Comput. Industry</source> <volume>98</volume>, <fpage>23</fpage>&#x2013;<lpage>33</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compind.2018.02.005</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bawden</surname> <given-names>O.</given-names>
</name>
<name>
<surname>Kulk</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Russell</surname> <given-names>R.</given-names>
</name>
<name>
<surname>McCool</surname> <given-names>C.</given-names>
</name>
<name>
<surname>English</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Dayoub</surname> <given-names>F.</given-names>
</name>
<etal/>
</person-group>. (<year>2017</year>). <article-title>Robot for weed species plant-specific management</article-title>. <source>J. Field Robotics</source> <volume>34</volume>, <fpage>1179</fpage>&#x2013;<lpage>1199</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/rob.21727</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Benchallal</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Hafiane</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Ragot</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Canals</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Convnext based semi-supervised approach with consistency regularization for weeds classification</article-title>. <source>Expert Syst. Appl.</source> <volume>239</volume>, <fpage>122222</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.eswa.2023.122222</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ben Hassen</surname> <given-names>T.</given-names>
</name>
<name>
<surname>El Bilali</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Impacts of the Russia-Ukraine war on global food security: towards more sustainable and resilient food systems</article-title>? <source>Foods</source> <volume>11</volume>, <fpage>2301</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/foods11152301</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Berthelot</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Carlini</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Goodfellow</surname> <given-names>I.</given-names>
</name>
<name>
<surname>Papernot</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Oliver</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Raffel</surname> <given-names>C. A.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Mixmatch: A holistic approach to semi-supervised learning</article-title>. <source>Adv. Neural Inf. Process. Syst.</source> <volume>32</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.5555/3454287.3454741</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bochkovskiy</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>C.-Y.</given-names>
</name>
<name>
<surname>Liao</surname> <given-names>H.-Y. M.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Yolov4: Optimal speed and accuracy of object detection</article-title>. <source>ArXiv paper</source>. <volume>arXiv preprint arXiv</volume>:<fpage>2004.10934</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.2004.10934</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Cai</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Fan</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Feris</surname> <given-names>R. S.</given-names>
</name>
<name>
<surname>Vasconcelos</surname> <given-names>N.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>A unified multi-scale deep convolutional neural network for fast object detection</article-title>,&#x201d; in <conf-name>Computer Vision&#x2013;ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11&#x2013;14, 2016, Proceedings, Part IV 14</conf-name>. (<publisher-name>Springer</publisher-name>), <fpage>354</fpage>&#x2013;<lpage>370</lpage>.</citation>
</ref>
<ref id="B8">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Caron</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Bojanowski</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Joulin</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Douze</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Deep clustering for unsupervised learning of visual features</article-title>,&#x201d; in <conf-name>Proceedings of the European conference on computer vision (ECCV)</conf-name>. (<publisher-loc>Munich, Germany</publisher-loc>: <publisher-name>Springer Link</publisher-name>), <fpage>132</fpage>&#x2013;<lpage>149</lpage>.</citation>
</ref>
<ref id="B9">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Xuan</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Song</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Xie</surname> <given-names>D.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>a). &#x201c;<article-title>Label matching semisupervised object detection</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition</conf-name>. (<publisher-loc>New Orleans, Louisiana</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>14381</fpage>&#x2013;<lpage>14390</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/CVPR52688.2022.01398</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Lu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Young</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2022</year>b). <article-title>Performance evaluation of deep transfer learning on multi-class identification of common weed species in cotton production systems</article-title>. <source>Comput. Electron. Agric.</source> <volume>198</volume>, <fpage>107091</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2022.107091</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Choi</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Han</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Chandraker</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Learning efficient object detection models with knowledge distillation</article-title>. <source>Adv. Neural Inf. Process. Syst.</source> <volume>30</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.5555/3294771.3294842</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Chen</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Gong</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Semi-supervised learning under class distribution mismatch</article-title>,&#x201d; in <conf-name>Proceedings of the AAAI Conference on Artificial Intelligence</conf-name>, <conf-loc>New York, USA</conf-loc>, Vol. <volume>34</volume>(<issue>04</issue>), <fpage>3569</fpage>&#x2013;<lpage>3576</lpage>.</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Coleman</surname> <given-names>G. R.</given-names>
</name>
<name>
<surname>Bender</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Walsh</surname> <given-names>M. J.</given-names>
</name>
<name>
<surname>Neve</surname> <given-names>P.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Image-based weed recognition and control: Can it select for crop mimicry</article-title>? <source>Weed Res.</source> <volume>63</volume>, <fpage>77</fpage>&#x2013;<lpage>82</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/wre.12566</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dang</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Lu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Z.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Yoloweeds: A novel benchmark of yolo object detectors for multi-class weed detection in cotton production systems</article-title>. <source>Comput. Electron. Agric.</source> <volume>205</volume>, <fpage>107655</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2023.107655</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Dang</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Lu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Zheng</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2022</year>). &#x201c;<article-title>Deepcottonweeds (dcw): a novel benchmark of yolo object detectors for weed detection in cotton production systems</article-title>,&#x201d; in <conf-name>2022 ASABE Annual International Meeting</conf-name>. (<publisher-loc>Houston, Texas</publisher-loc>: <publisher-name>American Society of Agricultural and Biological Engineers</publisher-name>), <volume>1</volume>.</citation>
</ref>
<ref id="B16">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Deng</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Dong</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Socher</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>L.-J.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Fei-Fei</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2009</year>). &#x201c;<article-title>Imagenet: A large-scale hierarchical image database</article-title>,&#x201d; in <conf-name>2009 IEEE conference on computer vision and pattern recognition</conf-name>. (<publisher-loc>Miami, Florida</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>248</fpage>&#x2013;<lpage>255</lpage>.</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>DeVries</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Taylor</surname> <given-names>G. W.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Improved regularization of convolutional neural networks with cutout</article-title>. <source>arXiv preprint arXiv:1708.04552</source>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1708.04552</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>dos Santos Ferreira</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Freitas</surname> <given-names>D. M.</given-names>
</name>
<name>
<surname>da Silva</surname> <given-names>G. G.</given-names>
</name>
<name>
<surname>Pistori</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Folhes</surname> <given-names>M. T.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Weed detection in soybean crops using convnets</article-title>. <source>Comput. Electron. Agric.</source> <volume>143</volume>, <fpage>314</fpage>&#x2013;<lpage>324</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2017.10.027</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>dos Santos Ferreira</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Freitas</surname> <given-names>D. M.</given-names>
</name>
<name>
<surname>da Silva</surname> <given-names>G. G.</given-names>
</name>
<name>
<surname>Pistori</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Folhes</surname> <given-names>M. T.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Unsupervised deep learning and semi-automatic data labeling in weed discrimination</article-title>. <source>Comput. Electron. Agric.</source> <volume>165</volume>, <fpage>104963</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2019.104963</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Farooq</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Jia</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Hu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Knowledge transfer via convolution neural networks for multi-resolution lawn weed classification</article-title>,&#x201d; in <conf-name>2019 10th Workshop on Hyperspectral Imaging and Signal Processing: Evolution in Remote Sensing (WHISPERS)</conf-name>. (<publisher-loc>Amsterdam, Netherlands</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>01</fpage>&#x2013;<lpage>05</lpage>.</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gerhards</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Christensen</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2003</year>). <article-title>Real-time weed detection, decision making and patch spraying in maize, sugarbeet, winter wheat and winter barley</article-title>. <source>Weed Res.</source> <volume>43</volume>, <fpage>385</fpage>&#x2013;<lpage>392</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1046/j.1365-3180.2003.00349.x</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Girshick</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2015</year>). &#x201c;<article-title>Fast r-cnn</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE international conference on computer vision</conf-name>. (<publisher-loc>Santiago, Chile</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>1440</fpage>&#x2013;<lpage>1448</lpage>.</citation>
</ref>
<ref id="B23">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Guo</surname> <given-names>L.-Z.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Z.-Y.</given-names>
</name>
<name>
<surname>Jiang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Y.-F.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>Z.-H.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Safe deep semi-supervised learning for unseen-class unlabeled data</article-title>,&#x201d; in <conf-name>International Conference on Machine Learning</conf-name>. (<publisher-loc>Vienna, Austria</publisher-loc>: <publisher-name>PMLR</publisher-name>), <fpage>3897</fpage>&#x2013;<lpage>3906</lpage>.</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hu</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Thomasson</surname> <given-names>J. A.</given-names>
</name>
<name>
<surname>Bagavathiannan</surname> <given-names>M. V.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>A powerful image synthesis and semisupervised learning pipeline for site-specific weed detection</article-title>. <source>Comput. Electron. Agric.</source> <volume>190</volume>, <fpage>106423</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2021.106423</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jiao</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Feng</surname> <given-names>Z.</given-names>
</name>
<etal/>
</person-group>. (<year>2019</year>). <article-title>A survey of deep learning-based object detection</article-title>. <source>IEEE Access</source> <volume>7</volume>, <fpage>128837</fpage>&#x2013;<lpage>128868</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ACCESS.2019.2939201</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Laborde</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Martin</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Swinnen</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Vos</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Covid-19 risks to global food security</article-title>. <source>Science</source> <volume>369</volume>, <fpage>500</fpage>&#x2013;<lpage>502</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1126/science.abc4765</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Law</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Deng</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Cornernet: Detecting objects as paired keypoints</article-title>,&#x201d; in <conf-name>Proceedings of the European conference on computer vision (ECCV)</conf-name>, <conf-loc>Munich, Germany</conf-loc>, <fpage>734</fpage>&#x2013;<lpage>750</lpage>.</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Shrivastava</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Davis</surname> <given-names>L. S.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Rethinking pseudo labels for semi-supervised object detection</article-title>. <source>Proc. AAAI Conf. Artif. Intell.</source> <volume>36</volume>, <fpage>1314</fpage>&#x2013;<lpage>1322</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1609/aaai.v36i2.20019</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Qi</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Morris</surname> <given-names>D.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). <article-title>Label-efficient learning in agriculture: A comprehensive review</article-title>. <source>arXiv preprint arXiv:2305.14691</source>. <volume>215</volume>, <fpage>108412</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2023.108412</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Hu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Tang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Generalized focal loss v2: Learning reliable localization quality estimation for dense object detection</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition</conf-name>. (<publisher-loc>Nashville, Tennessee</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>11632</fpage>&#x2013;<lpage>11641</lpage>.</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Hu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>J.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>Generalized focal loss: Learning qualified and distributed bounding boxes for dense object detection</article-title>. <source>Adv. Neural Inf. Process. Syst.</source> <volume>33</volume>, <fpage>21002</fpage>&#x2013;<lpage>21012</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.5555/3495724.3497487</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Lin</surname> <given-names>T.-Y.</given-names>
</name>
<name>
<surname>Goyal</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Girshick</surname> <given-names>R.</given-names>
</name>
<name>
<surname>He</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Dollar,&#xb4;</surname> <given-names>P.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Focal loss for dense object detection</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE international conference on computer vision</conf-name>. (<publisher-loc>Venice, Italy</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>2980</fpage>&#x2013;<lpage>2988</lpage>.</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>H.</given-names>
</name>
<name>
<surname>HaoChen</surname> <given-names>J. Z.</given-names>
</name>
<name>
<surname>Gaidon</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Ma</surname> <given-names>T.</given-names>
</name>
</person-group> (<year>2021</year>a). <article-title>Self-supervised learning is more robust to dataset imbalance</article-title>. <source>arXiv preprint arXiv:2110.05025</source>. doi: <pub-id pub-id-type="doi">10.48550/arXiv.2110.05025</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Ouyang</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Fieguth</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>X.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>Deep learning for generic object detection: A survey</article-title>. <source>Int. J. Comput. Vision</source> <volume>128</volume>, <fpage>261</fpage>&#x2013;<lpage>318</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11263-019-01247-4</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Jin</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Hu</surname> <given-names>C.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). <article-title>Semi-supervised learning and attention mechanism for weed detection in wheat</article-title>. <source>Crop Prot.</source> <volume>174</volume>, <fpage>106389</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.cropro.2023.106389</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Zhai</surname> <given-names>D.</given-names>
</name>
<name>
<surname>He</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Semi-supervised learning methods for weed detection in turf</article-title>. <source>Pest Manage. Sci</source>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/ps.7959</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Anguelov</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Erhan</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Szegedy</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Reed</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Fu</surname> <given-names>C.-Y.</given-names>
</name>
<etal/>
</person-group>. (<year>2016</year>). &#x201c;<article-title>Ssd: Single shot multibox detector</article-title>,&#x201d; in <conf-name>Computer Vision&#x2013;ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11&#x2013;14, 2016, Proceedings, Part I 14</conf-name>. (<publisher-name>Springer</publisher-name>), <fpage>21</fpage>&#x2013;<lpage>37</lpage>.</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>Y.-C.</given-names>
</name>
<name>
<surname>Ma</surname> <given-names>C.-Y.</given-names>
</name>
<name>
<surname>He</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Kuo</surname> <given-names>C.-W.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>P.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>b). <article-title>Unbiased teacher for semi-supervised object detection</article-title>. <source>arXiv preprint arXiv:2102.09480</source>.</citation>
</ref>
<ref id="B39">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>Y.-C.</given-names>
</name>
<name>
<surname>Ma</surname> <given-names>C.-Y.</given-names>
</name>
<name>
<surname>Kira</surname> <given-names>Z.</given-names>
</name>
</person-group> (<year>2022</year>). &#x201c;<article-title>Unbiased teacher v2: Semi-supervised object detection for anchor-free and anchor-based detectors</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition</conf-name>. (<publisher-loc>New Orleans, Louisiana</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>9819</fpage>&#x2013;<lpage>9828</lpage>.</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Young</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>A survey of public datasets for computer vision tasks in precision agriculture</article-title>. <source>Comput. Electron. Agric.</source> <volume>178</volume>, <fpage>105760</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2020.105760</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lyu</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>Y.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Rtmdet: An empirical study of designing real-time object detectors</article-title>. <source>arXiv preprint arXiv:2212.07784</source>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.2212.07784</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Manalil</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Coast</surname> <given-names>O.</given-names>
</name>
<name>
<surname>Werth</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Chauhan</surname> <given-names>B. S.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Weed management in cotton (gossypium hirsutum l.) through weed-crop competition: A review</article-title>. <source>Crop Prot.</source> <volume>95</volume>, <fpage>53</fpage>&#x2013;<lpage>59</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.cropro.2016.08.008</pub-id>
</citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Meyer</surname> <given-names>G. E.</given-names>
</name>
<name>
<surname>Neto</surname> <given-names>J. C.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Verification of color vegetation indices for automated crop imaging applications</article-title>. <source>Comput. Electron. Agric.</source> <volume>63</volume>, <fpage>282</fpage>&#x2013;<lpage>293</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2008.03.009</pub-id>
</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nong</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Fan</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Semi-supervised learning for weed and crop segmentation using uav imagery</article-title>. <source>Front. Plant Sci.</source> <volume>13</volume>, <elocation-id>927368</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2022.927368</pub-id>
</citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Norsworthy</surname> <given-names>J. K.</given-names>
</name>
<name>
<surname>Ward</surname> <given-names>S. M.</given-names>
</name>
<name>
<surname>Shaw</surname> <given-names>D. R.</given-names>
</name>
<name>
<surname>Llewellyn</surname> <given-names>R. S.</given-names>
</name>
<name>
<surname>Nichols</surname> <given-names>R. L.</given-names>
</name>
<name>
<surname>Webster</surname> <given-names>T. M.</given-names>
</name>
<etal/>
</person-group>. (<year>2012</year>). <article-title>Reducing the risks of herbicide resistance: best management practices and recommendations</article-title>. <source>Weed Sci.</source> <volume>60</volume>, <fpage>31</fpage>&#x2013;<lpage>62</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1614/WS-D-11-00155.1</pub-id>
</citation>
</ref>
<ref id="B46">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>O&#x2019;Mahony</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Campbell</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Carvalho</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Harapanahalli</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Hernandez</surname> <given-names>G. V.</given-names>
</name>
<name>
<surname>Krpalkova</surname> <given-names>L.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). &#x201c;<article-title>Deep learning vs. traditional computer vision</article-title>,&#x201d; in <conf-name>Advances in Computer Vision: Proceedings of the 2019 Computer Vision Conference (CVC)</conf-name>, Vol. <volume>11</volume> (<publisher-loc>Las Vegas, USA</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>128</fpage>&#x2013;<lpage>144</lpage>.</citation>
</ref>
<ref id="B47">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Oerke</surname> <given-names>E.-C.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>Crop losses to pests</article-title>. <source>J. Agric. Sci.</source> <volume>144</volume>, <fpage>31</fpage>&#x2013;<lpage>43</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1017/S0021859605005708</pub-id>
</citation>
</ref>
<ref id="B48">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Parra</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Marin</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Yousfi</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Rinc&#xf3;n</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Mauri</surname> <given-names>P. V.</given-names>
</name>
<name>
<surname>Lloret</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Edge detection for weed recognition in lawns</article-title>. <source>Comput. Electron. Agric.</source> <volume>176</volume>, <fpage>105684</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2020.105684</pub-id>
</citation>
</ref>
<ref id="B49">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rahman</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Lu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Performance evaluation of deep learning object detectors for weed detection for cotton</article-title>. <source>Smart Agric. Technol.</source> <volume>3</volume>, <fpage>100126</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.atech.2022.100126</pub-id>
</citation>
</ref>
<ref id="B50">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rai</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Ram</surname> <given-names>B. G.</given-names>
</name>
<name>
<surname>Schumacher</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Yellavajjala</surname> <given-names>R. K.</given-names>
</name>
<name>
<surname>Bajwa</surname> <given-names>S.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). <article-title>Applications of deep learning in precision weed management: A review</article-title>. <source>Comput. Electron. Agric.</source> <volume>206</volume>, <fpage>107698</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2023.107698</pub-id>
</citation>
</ref>
<ref id="B51">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Redmon</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Divvala</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Girshick</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Farhadi</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>You only look once: Unified, real-time object detection</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE conference on computer vision and pattern recognition</conf-name>. (<publisher-loc>Las Vegas, Nevada</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>779</fpage>&#x2013;<lpage>788</lpage>.</citation>
</ref>
<ref id="B52">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ren</surname> <given-names>S.</given-names>
</name>
<name>
<surname>He</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Girshick</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Faster r-cnn: Towards real-time object detection with region proposal networks</article-title>. <source>Adv. Neural Inf. Process. Syst.</source> <volume>28</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.5555/2969239.2969250</pub-id>
</citation>
</ref>
<ref id="B53">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Saputra</surname> <given-names>M. R. U.</given-names>
</name>
<name>
<surname>De Gusmao</surname> <given-names>P. P.</given-names>
</name>
<name>
<surname>Almalioglu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Markham</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Trigoni</surname> <given-names>N.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Distilling knowledge from a deep pose regressor network</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE/CVF international conference on computer vision</conf-name>. (<publisher-loc>Seoul, Korea</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>263</fpage>&#x2013;<lpage>272</lpage>.</citation>
</ref>
<ref id="B54">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sohn</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Berthelot</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Carlini</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Raffel</surname> <given-names>C. A.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>a). <article-title>Fixmatch: Simplifying semi-supervised learning with consistency and confidence</article-title>. <source>Adv. Neural Inf. Process. Syst.</source> <volume>33</volume>, <fpage>596</fpage>&#x2013;<lpage>608</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.5555/3495724.3495775</pub-id>
</citation>
</ref>
<ref id="B55">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sohn</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>C.-L.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>C.-Y.</given-names>
</name>
<name>
<surname>Pfister</surname> <given-names>T.</given-names>
</name>
</person-group> (<year>2020</year>b). <article-title>A simple semi-supervised learning framework for object detection</article-title>. <source>arXiv preprint arXiv:2005.04757</source>.</citation>
</ref>
<ref id="B56">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sportelli</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Apolo-Apolo</surname> <given-names>O. E.</given-names>
</name>
<name>
<surname>Fontanelli</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Frasconi</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Raffaelli</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Peruzzi</surname> <given-names>A.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). <article-title>Evaluation of yolo object detectors for weed detection in different turfgrass scenarios</article-title>. <source>Appl. Sci.</source> <volume>13</volume>, <fpage>8502</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/app13148502</pub-id>
</citation>
</ref>
<ref id="B57">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Tan</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Pang</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Le</surname> <given-names>Q. V.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Efficientdet: Scalable and efficient object detection</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE/CVF conference on computer vision and pattern recognition</conf-name>. (<publisher-loc>Seattle, Washington</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>10781</fpage>&#x2013;<lpage>10790</lpage>.</citation>
</ref>
<ref id="B58">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tarvainen</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Valpola</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Mean teachers are better role models: Weight-averaged consistency targets improve semi-supervised deep learning results</article-title>. <source>Adv. Neural Inf. Process. Syst.</source> <volume>30</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.5555/3294771.3294885</pub-id>
</citation>
</ref>
<ref id="B59">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Terven</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Cordova-Esparza</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>A comprehensive review of yolo: From yolov1 to yolov8 and beyond</article-title>. <source>Mach. Learn. Knowl. Extr.</source> <volume>5</volume>(<issue>4</issue>), <fpage>1680</fpage>&#x2013;<lpage>1716</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/make5040083</pub-id>
</citation>
</ref>
<ref id="B60">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Tian</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Chu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Wei</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Shen</surname> <given-names>C.</given-names>
</name>
</person-group> (<year>2022</year>). &#x201c;<article-title>Fully convolutional one-stage 3d object detection on lidar range images</article-title>. <source>Adv. Neural. Inf. Process. Syst.</source> (<publisher-loc>New Orleans, LA, USA</publisher-loc>: <publisher-name>Curran Associates Inc.</publisher-name>) <volume>35</volume>, <fpage>34899</fpage>&#x2013;<lpage>34911</lpage>.</citation>
</ref>
<ref id="B61">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Van Engelen</surname> <given-names>J. E.</given-names>
</name>
<name>
<surname>Hoos</surname> <given-names>H. H.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>A survey on semi-supervised learning</article-title>. <source>Mach. Learn.</source> <volume>109</volume>, <fpage>373</fpage>&#x2013;<lpage>440</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s10994-019-05855-6</pub-id>
</citation>
</ref>
<ref id="B62">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Wei</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>A review on weed detection using ground-based machine vision and image processing techniques</article-title>. <source>Comput. Electron. Agric.</source> <volume>158</volume>, <fpage>226</fpage>&#x2013;<lpage>240</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2019.02.005</pub-id>
</citation>
</ref>
<ref id="B63">
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>Wu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Kirillov</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Massa</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Lo</surname> <given-names>W.-Y.</given-names>
</name>
<name>
<surname>Girshick</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Detectron2</article-title>. Available online at: <uri xlink:href="https://github.com/facebookresearch/detectron2">https://github.com/facebookresearch/detectron2</uri>.</citation>
</ref>
<ref id="B64">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Kang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Ding</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Review of weed detection methods based on computer vision</article-title>. <source>Sensors</source> <volume>21</volume>, <elocation-id>3647</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/s21113647</pub-id>
</citation>
</ref>
<ref id="B65">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Xie</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Luong</surname> <given-names>M.-T.</given-names>
</name>
<name>
<surname>Hovy</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Le</surname> <given-names>Q. V.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Self-training with noisy student improves imagenet classification</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE/CVF conference on computer vision and pattern recognition</conf-name>. (<publisher-loc>Seattle, Washington</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>10687</fpage>&#x2013;<lpage>10698</lpage>.</citation>
</ref>
<ref id="B66">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Xu</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Hu</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Wei</surname> <given-names>F.</given-names>
</name>
<etal/>
</person-group>. (<year>2021</year>). &#x201c;<article-title>End-to-end semi-supervised object detection with soft teacher</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE/CVF International Conference on Computer Vision</conf-name>. (<publisher-loc>Montreal, BC, Canada</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>3060</fpage>&#x2013;<lpage>3069</lpage>.</citation>
</ref>
<ref id="B67">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Yang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Parikh</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Batra</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>Joint unsupervised learning of deep representations and image clusters</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE conference on computer vision and pattern recognition</conf-name>. (<publisher-loc>Las Vegas, Nevada</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>5147</fpage>&#x2013;<lpage>5156</lpage>.</citation>
</ref>
<ref id="B68">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Young</surname> <given-names>S. L.</given-names>
</name>
<name>
<surname>Meyer</surname> <given-names>G. E.</given-names>
</name>
<name>
<surname>Woldt</surname> <given-names>W. E.</given-names>
</name>
</person-group> (<year>2013</year>). &#x201c;<article-title>Future directions for automated weed management in precision agriculture</article-title>,&#x201d; in <conf-name>Automation: The future of weed control in cropping systems</conf-name>. (<publisher-name>Springer</publisher-name>), <fpage>249</fpage>&#x2013;<lpage>259</lpage>.</citation>
</ref>
<ref id="B69">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Schumann</surname> <given-names>A. W.</given-names>
</name>
<name>
<surname>Cao</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Sharpe</surname> <given-names>S. M.</given-names>
</name>
<name>
<surname>Boyd</surname> <given-names>N. S.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Weed detection in perennial ryegrass with deep learning convolutional neural network</article-title>. <source>Front. Plant Sci.</source> <volume>10</volume>, <elocation-id>1422</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2019.01422</pub-id>
</citation>
</ref>
<ref id="B70">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Su</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>J.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Dino: Detr with improved denoising anchor boxes for end-to-end object detection</article-title>. <source>arXiv preprint arXiv:2203.03605</source>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.2203.03605</pub-id>
</citation>
</ref>
<ref id="B71">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Chi</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Yao</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Lei</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>S. Z.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Bridging the gap between anchor-based and anchor-free detection via adaptive training sample selection</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE/CVF conference on computer vision and pattern recognition</conf-name>. (<publisher-loc>Seattle, Washington</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>9759</fpage>&#x2013;<lpage>9768</lpage>.</citation>
</ref>
<ref id="B72">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Qian</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2021</year>a). &#x201c;<article-title>Instant-teaching: An end-to-end semi-supervised object detection framework</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition</conf-name>. (<publisher-loc>Nashville, Tennessee</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>4081</fpage>&#x2013;<lpage>4090</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/CVPR46437.2021.00407</pub-id>
</citation>
</ref>
<ref id="B73">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Koltun</surname> <given-names>V.</given-names>
</name>
<name>
<surname>Kr&#xe4;henb&#xfc;hl</surname> <given-names>P.</given-names>
</name>
</person-group> (<year>2021</year>b). <article-title>Probabilistic two-stage detection</article-title>. <source>arXiv preprint arXiv:2103.07461</source>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/CVPR46437.2021.00407</pub-id>
</citation>
</ref>
<ref id="B74">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhuo</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Krahenbuhl</surname> <given-names>P.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Bottom-up object detection by grouping extreme and center points</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE/CVF conference on computer vision and pattern recognition</conf-name>. (<publisher-loc>Long Beach, CA</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>850</fpage>&#x2013;<lpage>859</lpage>.</citation>
</ref>
<ref id="B75">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhuang</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Qi</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Duan</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Xi</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>H.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>A comprehensive survey on transfer learning</article-title>. <source>Proc. IEEE</source> <volume>109</volume>, <fpage>43</fpage>&#x2013;<lpage>76</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/PROC.5</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>