<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Plant Sci.</journal-id>
<journal-title>Frontiers in Plant Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Plant Sci.</abbrev-journal-title>
<issn pub-type="epub">1664-462X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpls.2023.1211075</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Plant Science</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Local refinement mechanism for improved plant leaf segmentation in cluttered backgrounds</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Ma</surname>
<given-names>Ruihan</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="author-notes" rid="fn003">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1871141"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Fuentes</surname>
<given-names>Alvaro</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="author-notes" rid="fn003">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/551374"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Yoon</surname>
<given-names>Sook</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/595546"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Lee</surname>
<given-names>Woon Yong</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Kim</surname>
<given-names>Sang Cheol</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Kim</surname>
<given-names>Hyongsuk</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/833601"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Park</surname>
<given-names>Dong Sun</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/567101"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Department of Electronics Engineering, Jeonbuk National University</institution>, <addr-line>Jeonbuk</addr-line>, <country>Republic of Korea</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Core Research Institute for Intelligent Robots, Jeonbuk National University</institution>, <addr-line>Jeonbuk</addr-line>, <country>Republic of Korea</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Department of Computer Engineering, Mokpo National University</institution>, <addr-line>Jeonnam</addr-line>, <country>Republic of Korea</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Department of Food Engineering Research, Intelligent Robot Studio Co. Ltd.</institution>, <addr-line>Gyeonggi-do</addr-line>, <country>Republic of Korea</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Mario Cunha, University of Porto, Portugal</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Ricardo Santos, Federal University of Mato Grosso do Sul, Brazil; Parvathaneni Naga Srinivasu, Prasad V. Potluri Siddhartha Institute of Technology, India</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Sang Cheol Kim, <email xlink:href="mailto:sckim7777@jbnu.ac.kr">sckim7777@jbnu.ac.kr</email>; Hyongsuk Kim, <email xlink:href="mailto:hskim@jbnu.ac.kr">hskim@jbnu.ac.kr</email>
</p>
</fn>
<fn fn-type="equal" id="fn003">
<p>&#x2020;These authors have contributed equally to this work</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>30</day>
<month>08</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2023</year>
</pub-date>
<volume>14</volume>
<elocation-id>1211075</elocation-id>
<history>
<date date-type="received">
<day>22</day>
<month>05</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>08</day>
<month>08</month>
<year>2023</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2023 Ma, Fuentes, Yoon, Lee, Kim, Kim and Park</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Ma, Fuentes, Yoon, Lee, Kim, Kim and Park</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Plant phenotyping is a critical field in agriculture, aiming to understand crop growth under specific conditions. Recent research uses images to describe plant characteristics by detecting visual information within organs such as leaves, flowers, stems, and fruits. However, processing data in real field conditions, with challenges such as image blurring and occlusion, requires improvement. This paper proposes a deep learning-based approach for leaf instance segmentation with a local refinement mechanism to enhance performance in cluttered backgrounds. The refinement mechanism employs Gaussian low-pass and High-boost filters to enhance target instances and can be applied to the training or testing dataset. An instance segmentation architecture generates segmented masks and detected areas, facilitating the derivation of phenotypic information, such as leaf count and size. Experimental results on a tomato leaf dataset demonstrate the system&#x2019;s accuracy in segmenting target leaves despite complex backgrounds. The investigation of the refinement mechanism with different kernel sizes reveals that larger kernel sizes benefit the system&#x2019;s ability to generate more leaf instances when using a High-boost filter, while prediction performance decays with larger Gaussian low-pass filter kernel sizes. This research addresses challenges in real greenhouse scenarios and enables automatic recognition of phenotypic data for smart agriculture. The proposed approach has the potential to enhance agricultural practices, ultimately leading to improved crop yields and productivity.</p>
</abstract>
<kwd-group>
<kwd>deep learning</kwd>
<kwd>leaf instance segmentation</kwd>
<kwd>cluttered background</kwd>
<kwd>filtering</kwd>
<kwd>plant phenotyping</kwd>
</kwd-group>
<counts>
<fig-count count="12"/>
<table-count count="4"/>
<equation-count count="4"/>
<ref-count count="46"/>
<page-count count="17"/>
<word-count count="6992"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-in-acceptance</meta-name>
<meta-value>Technical Advances in Plant Science</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>Understanding the growth processes of plants is essential for optimizing crop cultivation conditions (<xref ref-type="bibr" rid="B21">Hilty et&#xa0;al., 2021</xref>). The interpretation of crop responses is often tied to environmental and nutritional factors, and visual observations of plant development play a significant role in this understanding (<xref ref-type="bibr" rid="B20">Heuvelink, 2005</xref>). These visual cues offer tangible evidence of a plant&#x2019;s well-being and the effects of different conditions on its growth. However, comprehending the intricate processes involved in plant growth and development is not a trivial task. It demands a high level of expertise and intuition, acquired through experience and dedicated study. Researchers, agronomists, and farmers continually strive to deepen their knowledge of plant growth processes and develop innovative approaches to harness this understanding for sustainable and efficient agricultural practices (<xref ref-type="bibr" rid="B7">Costa et&#xa0;al., 2019</xref>).</p>
<p>Plant development processes, including stems, leaves, flowers, and fruit ripening, directly impact plant yield, quality, and quantity of final products. Phenotyping becomes indispensable in identifying these changes and understanding plant responses (<xref ref-type="bibr" rid="B32">Pieruschka and Schurr, 2019</xref>). For example, in tomato plants, critical phenotyping variables such as leaf color, shape, size, and stem diameter offer insights into the plant&#x2019;s health, stress levels, and the potential presence of diseases or pests (<xref ref-type="bibr" rid="B17">Geelen et&#xa0;al., 2018</xref>).</p>
<p>Recent advances in computer vision and deep learning have prompted significant interest in plant-related research (<xref ref-type="bibr" rid="B7">Costa et&#xa0;al., 2019</xref>). Previous studies have successfully employed techniques (<xref ref-type="bibr" rid="B28">Liu and Wang, 2021</xref>) such as image classification, object detection, and instance segmentation for tasks such as detecting diseases and pests (<xref ref-type="bibr" rid="B29">Mohanty et&#xa0;al., 2016</xref>; <xref ref-type="bibr" rid="B13">Fuentes et&#xa0;al., 2017</xref>; <xref ref-type="bibr" rid="B14">Fuentes et&#xa0;al., 2018</xref>; <xref ref-type="bibr" rid="B23">Jiang et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B15">Fuentes et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B9">Dong et&#xa0;al., 2022</xref>), counting leaves (<xref ref-type="bibr" rid="B12">Farjon et&#xa0;al., 2021</xref>), and detecting fruits (<xref ref-type="bibr" rid="B1">Afonso et&#xa0;al., 2020</xref>). In relation to our research, <xref ref-type="bibr" rid="B8">Das et&#xa0;al. (2023)</xref> proposed an ensemble segmentation model with UNet as the base encoder&#x2013;decoder for detecting coleoptile emergence time, showcasing its potential for phenotyping applications. Similarly, <xref ref-type="bibr" rid="B44">Yang et&#xa0;al. (2020)</xref> utilized the Mask Region-based Convolutional Neural Network (Mask R-CNN) architecture for leaf segmentation. The researchers conducted thorough investigations to identify optimal hyperparameters for both segmentation and classification techniques. Despite these significant achievements, the challenge of deploying systems in real-world scenarios with diverse variables and cluttered backgrounds persists (<xref ref-type="bibr" rid="B3">Barbedo, 2018</xref>).</p>
<p>In real-world scenarios, plant leaves often overlap or get occluded by other elements, making it challenging for segmentation models to accurately distinguish individual instances (<xref ref-type="bibr" rid="B46">Zhang and Zhang, 2023</xref>). Additionally, variations in lighting, shadows, and image quality, with issues like blurred leaves and noise in the images can impact the model&#x2019;s ability to extract meaningful features for accurate segmentation (<xref ref-type="bibr" rid="B34">Rzanny et&#xa0;al., 2017</xref>). Moreover, the limited availability of annotated training data for specific plant species (<xref ref-type="bibr" rid="B43">Xu et&#xa0;al., 2023</xref>) and growth stages poses a significant challenge in achieving robust and generalized segmentation models (<xref ref-type="bibr" rid="B30">Okyere et&#xa0;al., 2023</xref>). Furthermore, existing methods may struggle with instances of varying sizes and shapes, leading to incomplete or inaccurate segmentation results (<xref ref-type="bibr" rid="B44">Yang et&#xa0;al., 2020</xref>). Addressing these problems is critical to advancing the field of plant leaf instance segmentation and enabling applications in precision agriculture and automated plant phenotyping.</p>
<p>To address these technical gaps, this paper proposes a systematic deep learning-based approach for leaf instance segmentation in cluttered backgrounds. The study investigates the application of a filter-based instance refinement mechanism to enhance leaf instance segmentation, exploring its application on both training and testing data. <xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref> showcases the segmentation process of plant leaves within a cluttered greenhouse background. The proposed approach employs a refinement mechanism based that operates locally on target areas, leading to enhanced recognition of individual leaf instances. This refinement step is crucial for overcoming challenges related to occlusion, blurriness, and focus commonly encountered in real-world data collection scenarios. The output of the segmentation process provides segmented masks and bounding box information for each detected leaf instance. Leveraging these results, further processing is conducted to derive essential phenotypic characteristics, including the accurate counting of leaves and the determination of their respective areas. This comprehensive approach not only successfully identifies and segments plant leaves amidst cluttered backgrounds but also enables the extraction of critical phenotypic information that offers valuable insights into the plant&#x2019;s health, growth, and overall performance. The results obtained from this figure demonstrate the effectiveness and potential of the proposed method for advancing plant phenotyping in greenhouse environments, contributing to the optimization of agricultural practices and crop management.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Overview of the proposed framework for instance segmentation of plant leaves in cluttered greenhouse backgrounds. It incorporates a refinement mechanism that operates locally on target areas, leading to enhanced recognition of individual leaf instances. The output results from this process allow us to derive essential phenotypic characteristics, including the accurate counting of leaves and the determination of their respective areas.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1211075-g001.tif"/>
</fig>
<p>The contributions of this work are summarized as follows:</p>
<list list-type="simple">
<list-item>
<p>&#x2212; A deep learning-based method for segmenting plant leaf instances, with instance segmentation and mask detection, is proposed and thoroughly validated on experiments conducted on our tomato plant dataset.</p>
</list-item>
<list-item>
<p>&#x2212; The introduction of a simple yet effective local refinement mechanism based on filtering techniques applied locally to the leaf instances significantly improves the robustness of data used for training and testing, overcoming challenges related to data collection such as occlusion, blurriness, and focus.</p>
</list-item>
<list-item>
<p>&#x2212; Our study offers a practical method for plant phenotyping using RGB images from real greenhouse environments, providing insights into data utilization for this application.</p>
</list-item>
</list>
<p>The rest of the paper is organized as follows: Related works on leaf instance segmentation and plant phenotyping techniques are reviewed in Section 2. The proposed method and strategy are introduced in Section 3. Experimental results, both qualitative and quantitative, are presented in Section 4. Finally, Section 5 concludes the research and outlines potential directions for future work.</p>
</sec>
<sec id="s2">
<label>2</label>
<title>Related works</title>
<p>This section presents an overview of the techniques used for leaf segmentation and plant phenotyping, including both traditional approaches and deep learning-based studies.</p>
<sec id="s2_1">
<label>2.1</label>
<title>Traditional techniques for plant phenotyping</title>
<p>Plant phenotyping is a critical field in agriculture, providing valuable insights into crop growth and characteristics (<xref ref-type="bibr" rid="B39">Walter et&#xa0;al., 2015</xref>). Traditional methods have been utilized in this domain, including manual measurements of plant organ features and machine vision techniques for data collection (<xref ref-type="bibr" rid="B25">Kolhar and Jagtap, 2021</xref>). For instance, <xref ref-type="bibr" rid="B33">Praveen Kumar and Domnic (2019)</xref> employed statistical-based image enhancement, graph-based leaf region extraction, and circular Hough Transform for leaf counting. <xref ref-type="bibr" rid="B45">Zhang et&#xa0;al., 2018</xref> explored plant segmentation using contour techniques and hand-crafted features, while <xref ref-type="bibr" rid="B38">Tian et&#xa0;al., 2019</xref> used an adaptive K-means algorithm for tomato leaf image segmentation. Although these methods can be effective in controlled scenarios, their performance might be limited when applied in real-world situations with diverse variations and challenges.</p>
<p>As agriculture often involves cluttered backgrounds, occlusions, varying lighting conditions, and other complexities, these traditional approaches may struggle to handle the level of intricacy present in real-life environments. Consequently, the adoption of learnable approaches, such as deep learning, becomes more appropriate for tackling these challenging conditions (<xref ref-type="bibr" rid="B41">Xiong et&#xa0;al., 2021</xref>).</p>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Leaf instance segmentation in cluttered backgrounds</title>
<p>In recent years, there has been a growing demand for systematic plant phenotyping, leading to increased interest in utilizing deep learning and computer vision-based techniques for image-based plant analysis (<xref ref-type="bibr" rid="B7">Costa et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B16">Fuentes et&#xa0;al., 2019</xref>). The main objective is to extract meaningful features from specific plant organs, such as leaves, flowers, stems, and fruits, to effectively characterize and evaluate their condition (<xref ref-type="bibr" rid="B37">Singh et&#xa0;al., 2018</xref>). Detection or segmentation architectures are commonly employed to provide detailed information at the instance level, such as bounding boxes (<xref ref-type="bibr" rid="B9">Dong et&#xa0;al., 2022</xref>) or masks (<xref ref-type="bibr" rid="B42">Xu et&#xa0;al., 2022</xref>), which prove valuable for applications such as plant disease and pest detection, as well as leaf, flower, or fruit counting.</p>
<p>The Leaf Segmentation Challenge (LSC) (<xref ref-type="bibr" rid="B35">Scharr et&#xa0;al., 2015</xref>) and the Workshop on Computer Vision Problems of Plant Phenotyping (CVPP) (<xref ref-type="bibr" rid="B36">Scharr et&#xa0;al., 2017</xref>) have significantly advanced plant phenotyping research. These initiatives aimed to develop state-of-the-art techniques for automatically obtaining phenotyping characteristics, with a particular focus on counting the number of leaves. As part of these efforts, they introduced new datasets with annotation labels for leaves and plants, inspiring various studies to address the challenge. For example, some researchers proposed methods for leaf segmentation using information like leaf borders, color, and texture features (<xref ref-type="bibr" rid="B31">Pape and Klukas, 2015</xref>), while others introduced neural network architectures for leaf counting (<xref ref-type="bibr" rid="B2">Aich and Stavness, 2017</xref>). Despite having limited training data, these approaches achieved satisfactory results. To tackle the issue of limited data availability, <xref ref-type="bibr" rid="B26">Kuznichov et&#xa0;al. (2019)</xref> explored data augmentation techniques to create synthetic samples based on existing data.</p>
<p>In the realm of plant segmentation with complex backgrounds, significant contributions have been made in recent years. For instance, <xref ref-type="bibr" rid="B44">Yang et&#xa0;al. (2020)</xref> employed Mask R-CNN with a VGG-16 feature extractor for leaf segmentation in complicated backgrounds, achieving a performance of 91.5%. The dataset used in their study consisted of images with clear leaf information, making leaves easily distinguishable from the background. Similarly, <xref ref-type="bibr" rid="B4">Br et&#xa0;al. (2021)</xref> proposed a segmentation method based on leaf images was proposed to identify the attributes of plant diseases. The researchers used a comprehensive dataset of various plant leaf images and developed a two-stream deep learning framework that accurately segments plants and counts leaves of different sizes and shapes. In <xref ref-type="bibr" rid="B11">Fan et&#xa0;al. (2022)</xref>, the researchers introduced an auxiliary binary mask from the segmentation stream to enhance counting performance, reducing the impact of complex backgrounds. More recently, <xref ref-type="bibr" rid="B27">Lin et&#xa0;al. (2023)</xref> proposed a self-supervised semantic segmentation model that groups semantically similar pixels based on self-contained information, enabling a color-based leaf segmentation algorithm to identify leaf regions jointly. Furthermore, they introduced a self-supervised color correction model for images captured under complex illumination conditions.</p>
<p>While substantial progress has been made in plant leaf segmentation, most of the work has focused on outdoor environments, primarily due to the availability of datasets. In contrast, our research focuses on complex real-world greenhouse environments of tomato plants, where challenges such as leaf occlusions and varying scales are prevalent. To address these issues, we introduced a refinement mechanism based on filtering techniques, aiming to enhance the robustness of leaf instance segmentation and overcome the problem of image blurring. Our approach contributes to the advancement of plant phenotyping in challenging greenhouse settings and holds potential implications for agricultural practices and automation.</p>
</sec>
</sec>
<sec id="s3">
<label>3</label>
<title>Proposed method</title>
<p>This section provides a detailed explanation of the proposed approach and the techniques utilized for segmenting leaf instances in cluttered backgrounds. The primary architecture takes an input image and generates output results in the form of leaf instance masks. A pivotal aspect of our method is the data refinement mechanism, which enhances the robustness of the images used for both training and testing. This is achieved by locally applying filtering techniques to each target leaf instance. The implementation involves two distinct stages: one for training data and another for test data. An overview of the implementation process is illustrated in <xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref>.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Overview of the proposed approach for plant leaf instance segmentation in cluttered backgrounds. The model encompasses two key elements: a refining mechanism directly applied to the data used for training or testing, and an instance segmentation architecture responsible for generating accurate leaf instances in the images.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1211075-g002.tif"/>
</fig>
<sec id="s3_1">
<label>3.1</label>
<title>Dataset description</title>
<p>In this study, we created a dataset specifically designed for the segmentation of leaf instances and the analysis of cluttered backgrounds. The dataset comprises 372 images of tomato plants, captured using multiple camera devices in various greenhouse environments. The images were taken under changing lighting conditions and feature diverse backgrounds. Each photo was captured parallel to the plants, encompassing surrounding areas as depicted in <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3A</bold>
</xref>. The dataset exhibits complexities such as (1) variations in target leaf sizes and appearances, (2) different levels of leaf occlusion, and (3) blurred regions caused by camera movement and focus.</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Examples of the tomato plants dataset, showcasing the images of the plants <bold>(A)</bold> alongside their corresponding mask annotations <bold>(B)</bold>. The mask annotations were applied to the foreground leaves, encompassing both clear and blurred samples, to provide comprehensive ground-truth data for the segmentation task.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1211075-g003.tif"/>
</fig>
<p>For generating ground-truth data, leaf regions were meticulously annotated using masks, regardless of their visual appearance, encompassing both well-defined and blurred samples. The annotations were performed manually utilizing an available toolbox for mask segmentation, as shown in <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3B</bold>
</xref>. Overall, the annotations encompass 3,636 instances, with 2,045 instances allocated to the training set, 641 to the validation set, and 950 to the test set.</p>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Instance segmentation architecture</title>
<p>Leaf instance segmentation has been implemented using Mask R-CNN (<xref ref-type="bibr" rid="B19">He et&#xa0;al., 2017</xref>) as the core architecture. Mask R-CNN is a two-stage framework designed for both instance segmentation and object detection tasks. It leverages a Feature Pyramid Network (FPN) as its backbone to extract essential features from input images. In the first stage, a Region Proposal Network (RPN) generates Region of Interest (RoI) proposals, while in the second stage, Mask R-CNN predicts bounding boxes, class labels, and masks for each RoI. The overall architecture for leaf instance segmentation is illustrated in <xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4</bold>
</xref>.</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Instance segmentation architecture based on Mask R-CNN.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1211075-g004.tif"/>
</fig>
<p>During training, the end-to-end instance segmentation model aims to minimize the multi-task loss for each sampled RoI, which is composed of three components: classification loss <inline-formula>
<mml:math display="inline" id="im1">
<mml:mrow>
<mml:msub>
<mml:mtext mathvariant="bold-italic">L</mml:mtext>
<mml:mrow>
<mml:mtext mathvariant="bold-italic">cls</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">(</mml:mo>
<mml:msub>
<mml:mtext mathvariant="bold-italic">p</mml:mtext>
<mml:mrow><mml:mtext mathvariant="bold-italic">i</mml:mtext></mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mtext mathvariant="bold-italic">p</mml:mtext>
<mml:mrow><mml:mtext mathvariant="bold-italic">i</mml:mtext></mml:mrow>
<mml:mo>*</mml:mo>
</mml:msubsup>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, bounding box regression <inline-formula>
<mml:math display="inline" id="im2">
<mml:mrow>
<mml:msub>
<mml:mtext mathvariant="bold-italic">L</mml:mtext>
<mml:mrow>
<mml:mtext mathvariant="bold-italic">bbox</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">(</mml:mo>
<mml:mtext mathvariant="bold-italic">t</mml:mtext>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mtext mathvariant="bold-italic">t</mml:mtext>
<mml:mrow><mml:mtext mathvariant="bold-italic">i</mml:mtext></mml:mrow>
<mml:mrow><mml:mo>*</mml:mo></mml:mrow>
</mml:msubsup>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, and mask loss <bold>
<italic>L<sub>mask</sub>
</italic>
</bold> as shown in Equation (1).</p>
<disp-formula>
<label>(1)</label>
<mml:math display="block" id="M1">
<mml:mrow>
<mml:mtext mathvariant="bold-italic">L</mml:mtext>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mtext mathvariant="bold-italic">L</mml:mtext>
<mml:mrow>
<mml:mtext mathvariant="bold-italic">cls</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mtext mathvariant="bold-italic">L</mml:mtext>
<mml:mrow>
<mml:mtext mathvariant="bold-italic">bbo
</mml:mtext>
</mml:mrow>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mtext mathvariant="bold-italic">L</mml:mtext>
<mml:mrow>
<mml:mtext mathvariant="bold-italic">mask</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</disp-formula>
<p>The classification loss is a logarithmic loss over two classes (object or not object) and is computed based on the output score <bold>
<italic>p<sub>i</sub>
</italic>
</bold> of the classification branch for each anchor <italic>i</italic> and its corresponding ground-truth label <inline-formula>
<mml:math display="inline" id="im3">
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold-italic">p</mml:mtext>
<mml:mrow><mml:mtext mathvariant="bold-italic">i</mml:mtext></mml:mrow>
<mml:mrow><mml:mo>*</mml:mo></mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>The regression loss is activated only when the anchor contains an object. It computes the difference between the predicted bounding box parameters <bold>
<italic>t<sub>i</sub>
</italic>
</bold> and the ground-truth parameters <inline-formula>
<mml:math display="inline" id="im4">
<mml:mrow>
<mml:msubsup>
<mml:mtext mathvariant="bold-italic">t</mml:mtext>
<mml:mrow><mml:mtext mathvariant="bold-italic">i</mml:mtext></mml:mrow>
<mml:mrow><mml:mo>*</mml:mo></mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>, which include four variables <bold>[<italic>t<sub>x</sub>
</italic>,<italic>t<sub>y</sub>
</italic>,<italic>t<sub>w</sub>
</italic>,<italic>t<sub>h</sub>
</italic>
</bold>], where <bold>(<italic>x</italic>,<italic>y</italic>)</bold> are the coordinates of the bounding box center, and its width and height <bold>(<italic>w</italic>,<italic>h</italic>)</bold>.</p>
<p>The mask loss is an average binary cross-entropy loss applied to the dedicated mask branch. As an instance segmentation approach, <bold>
<italic>L<sub>mask</sub>
</italic>
</bold> utilizes the classification branch to allow the network to generate masks for each class separately, avoiding confusion among different categories.</p>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Proposed local refinement mechanism</title>
<p>During data collection for our application, camera focus and blur were the most common image quality issues. These issues had a significant impact, particularly when dealing with cluttered background conditions and defining target areas accurately. Our research aims to address this challenge by introducing a &#x201c;local refinement mechanism,&#x201d; a simple yet effective technique that enhances the robustness of training and test data. The goal is to enable the system to accurately segment leaves regardless of background information.</p>
<p>After obtaining the annotated dataset, we applied the local refinement mechanism to the instances in both the training and test data. The main methods involved using Gaussian low-pass filtering and High-boost filtering, either independently or in combination, to improve the system&#x2019;s recognition capabilities.</p>
<sec id="s3_3_1">
<label>3.3.1</label>
<title>Gaussian low-pass filter</title>
<p>GLPF allows transmitting signals with lower frequency, thereby helping to reduce noise and blurring regions in the image (<xref ref-type="bibr" rid="B18">Gonzalez and Woods, 2018</xref>). It smooths the image by averaging nearby pixels within a local region, reducing the disparity between pixel values. The effect of image blurring results is larger, as the smoothing mask also becomes larger. The GLPF generates blurring instance regions to assess the model&#x2019;s ability to segment leaves under these conditions. Equation (2) specifies a GLPF:</p>
<disp-formula>
<label>(2)</label>
<mml:math display="block" id="M2">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>G</mml:mi>
</mml:msub>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>y</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mi>&#x3c0;</mml:mi>
<mml:msup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:mfrac>
<mml:msup>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msup>
<mml:mi>x</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>+</mml:mo>
<mml:msup>
<mml:mi>y</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:msup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <italic>x</italic> is the distance from the center on the horizontal axis, <italic>y</italic> is the distance from the center to the vertical axis, and &#x3c3; is the standard deviation of the Gaussian distribution.</p>
</sec>
<sec id="s3_3_2">
<label>3.3.2</label>
<title>High-boost filter</title>
<p>HBF emphasizes high-frequency image details without eliminating low-frequency components. It sharpens the image and enhances edges (<xref ref-type="bibr" rid="B18">Gonzalez and Woods, 2018</xref>). Multiplying the original image by an amplification factor <italic>A</italic> yields the definition of an HBF. The value of <italic>A</italic> determines the nature of the HBF, where higher values lead to brighter backgrounds, resulting in noise enhancement and image sharpening. Equation (3) defines the HBF:</p>
<disp-formula>
<label>(3)</label>
<mml:math display="block" id="M3">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mi>H</mml:mi>
<mml:mi>B</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>y</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo stretchy="false">)</mml:mo>
<mml:mi>f</mml:mi>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>y</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">(</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>y</mml:mi>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</disp-formula>
<p>where <italic>A</italic> represents the amplification factor, and <italic>f<sub>hp</sub>
</italic> is a high-pass filter. We applied the HBF locally to leaf instances to improve their regions&#x2019; sharpness, facilitating leaf boundary detection, especially in cases with occlusion. We experimented with different kernel sizes to find the optimal value for our approach.</p>
<p>We devised two scenarios for applying the refinement mechanism:</p>
<list list-type="simple">
<list-item>
<p>&#x2212; Scenario 1: We aimed to determine whether applying the refinement mechanism enhances the robustness of features in the training dataset, as shown in <xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5B</bold>
</xref>.</p>
</list-item>
<list-item>
<p>&#x2212; Scenario 2: We applied the refinement mechanism to the test data to assess whether the features from the training dataset effectively handle changes in the test data, as shown in <xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5B</bold>
</xref>.</p>
</list-item>
</list>
<p>We evaluated the system&#x2019;s response to these changes by applying the local refinement filter with different kernel sizes. <xref ref-type="fig" rid="f5">
<bold>Figures&#xa0;5C, D</bold>
</xref> illustrate example images after applying the GLPF and HBF, respectively. In Section 4, we present the qualitative and quantitative results of our approach. Additional specific illustrations of the applied local refinement mechanism can be found in <xref ref-type="fig" rid="f10">
<bold>Figures&#xa0;A1</bold>
</xref> and <xref ref-type="fig" rid="f11">
<bold>A2</bold>
</xref> of the Appendix. These figures showcase how the mechanism is implemented on both the training and test datasets.</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>Application of the local refinement mechanism either on the training dataset <bold>(A)</bold> or the test dataset <bold>(B)</bold>. The impact of the filters with different kernel sizes on the images is demonstrated in the examples presented in <bold>(C, D)</bold> for the Gaussian low-pass and High-boost filters, respectively. [See <xref ref-type="fig" rid="f10">
<bold>Figures&#xa0;A1</bold>
</xref> and <xref ref-type="fig" rid="f11">
<bold>A2</bold>
</xref> in the Appendix for more detailed representations of the schemes in <bold>(A, B)</bold>].</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1211075-g005.tif"/>
</fig>
<p>To avoid overfitting, data augmentation techniques were employed to increase the number of images in the training dataset on the two aforementioned cases. From this point onwards, we will use the abbreviation (ATD) to refer to the augmented training dataset. We used both online and offline data augmentation, including intensity and geometric transformations. Specifically, online data augmentation was executed during training, applying operations such as horizontal flip, Gaussian blur, brightness and contrast enhancement, and pixel loss. Offline data augmentation, performed as a separate process to the entire dataset before training, generated more images using techniques such as brightness and contrast enhancement, pixel dropout, horizontal flipping, rotation, and random combinations of all of them.</p>
</sec>
</sec>
<sec id="s3_4">
<label>3.4</label>
<title>Evaluation metrics</title>
<p>We evaluated the performance of the proposed model using the Intersection Over Union (IoU) thresholding operation and the mean Average Precision (mAP) metric (<xref ref-type="bibr" rid="B10">Everingham et&#xa0;al., 2009</xref>). The standard MS COCO metrics were used for instance segmentation and bounding box detection. The mAP is calculated by computing the AP for each class and then averaging across all classes, taking into account the trade-off between precision and recall, and considering false positives (FPs) and false negatives (FNs). Equation (4) presents the formula for the mAP calculation.</p>
<disp-formula>
<label>(4)</label>
<mml:math display="block" id="M4">
<mml:mrow>
<mml:mtext mathvariant="bold-italic">mAP</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mtext mathvariant="bold-italic">N</mml:mtext>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mtext mathvariant="bold-italic">i</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mtext>N</mml:mtext>
</mml:munderover>
<mml:mrow>
<mml:mtext mathvariant="bold-italic">A</mml:mtext>
<mml:msub>
<mml:mtext mathvariant="bold-italic">P</mml:mtext>
<mml:mrow><mml:mtext mathvariant="bold-italic">i</mml:mtext></mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mstyle>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Our primary focus in this evaluation was on the system&#x2019;s ability to accurately identify leaf instances and potentially predict more leaf samples than those available in the training dataset. We present the results of our experiments in the following section to support our claims.</p>
</sec>
</sec>
<sec id="s4">
<label>4</label>
<title>Experimental results</title>
<p>In this section, we provide the implementation details and present both quantitative and qualitative experimental results on the tomato plants dataset. These evaluations demonstrate the performance of our applied strategy in real-field scenarios.</p>
<sec id="s4_1">
<label>4.1</label>
<title>Implementation details</title>
<p>For our implementation, we fine-tuned the model end to end using a pre-trained model on the MS-COCO dataset. To train the network, we utilized Stochastic Gradient Descent (SGD) along with the Adam optimizer, setting the learning rate to 0.000125, momentum to 0.9, and weight decay to 1e-4. After training the model for 50 epochs, we obtained the final instance segmentation weights. The training process was conducted on a computer equipped with 4 GPUs Titan RTX.</p>
<p>The original images had a size of (4,032 and 3,024), and we resized the input images to (1,333 and 1,000). For implementation, we used the PyTorch framework, where the input tensor size was (6, 3, 1,333, and 1,000), which corresponds to the batch size, number of channels, width, and height, respectively. The first layer of the network used a 7 &#xd7; 7 kernel size with a stride of 2. In the following convolutional layers, the kernel size was predominantly 3 &#xd7; 3, and the stride was either 1 or 2, depending on the layer. In the Feature Pyramid Newtok (FPN), 1 &#xd7; 1 and 3 &#xd7; 3 convolutional layers were used. ReLU was applied after each convolutional layer to introduce non-linearity into the model. In the final stage of Mask R-CNN, a sigmoid activation function was used in the mask branch. The training curves of the model are presented in <xref ref-type="fig" rid="f12">
<bold>Figure&#xa0;A3</bold>
</xref> in the Appendix.</p>
</sec>
<sec id="s4_2">
<label>4.2</label>
<title>Backbone feature extractor</title>
<p>We initiated our experiments by comparing the performance of different backbone architectures, namely, ResNet-18, ResNet-34, ResNet-50, and ResNet-101, to determine the most suitable one for our specific application. For this comparison, we directly trained the model using the original images without applying the local refinement mechanism on the leaf instances. The results of this evaluation are presented in <xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>. Among the tested networks, ResNet-50 demonstrated the highest performance in segmenting instance leaves, achieving an IoU &gt; 0.5 of 91.6%. Our findings indicated that Mask R-CNN benefited significantly from deeper networks, particularly ResNet-50. As a result, we selected this architecture as the baseline backbone to conduct further experiments.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Backbone architecture.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" rowspan="2" align="center">Model</th>
<th valign="middle" colspan="3" align="center">Segmentation</th>
<th valign="top" colspan="3" align="center">Bounding box detection</th>
</tr>
<tr>
<th valign="middle" align="center">AP<sub>50</sub>
</th>
<th valign="middle" align="center">AP<sub>75</sub>
</th>
<th valign="middle" align="center">AP<sub>50&#x2013;95</sub>
</th>
<th valign="middle" align="center">AP<sub>50</sub>
</th>
<th valign="middle" align="center">AP<sub>75</sub>
</th>
<th valign="middle" align="center">AP<sub>50&#x2013;95</sub>
</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">ResNet-18</td>
<td valign="middle" align="center">73.0</td>
<td valign="middle" align="center">37.2</td>
<td valign="middle" align="center">38.6</td>
<td valign="top" align="center">72.6</td>
<td valign="top" align="center">29.9</td>
<td valign="top" align="center">35.6</td>
</tr>
<tr>
<td valign="middle" align="center">ResNet-34</td>
<td valign="middle" align="center">82.1</td>
<td valign="middle" align="center">51.6</td>
<td valign="middle" align="center">48.1</td>
<td valign="top" align="center">81.4</td>
<td valign="top" align="center">48.8</td>
<td valign="top" align="center">46.7</td>
</tr>
<tr>
<td valign="middle" align="center">ResNet-50</td>
<td valign="middle" align="center">91.6</td>
<td valign="middle" align="center">83.4</td>
<td valign="middle" align="center">74.5</td>
<td valign="top" align="center">91.3</td>
<td valign="top" align="center">81.4</td>
<td valign="top" align="center">71.6</td>
</tr>
<tr>
<td valign="middle" align="center">ResNet-101</td>
<td valign="middle" align="center">90.2</td>
<td valign="middle" align="center">75.6</td>
<td valign="middle" align="center">67.2</td>
<td valign="top" align="center">89.0</td>
<td valign="top" align="center">69.2</td>
<td valign="top" align="center">60.9</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s4_3">
<label>4.3</label>
<title>Refinement mechanism applied to the training dataset</title>
<p>In this experiment, we focused on evaluating the first scenario presented in Section 3.3 and illustrated in <xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5A</bold>
</xref>. The goal was to assess the impact of the refinement mechanism when applied to the local leaf instances of the training dataset, with the intention of emulating the presence of blurry leaves in the data. By introducing blurriness, we aimed to generate the necessary features that would allow the model to perform well on the original test dataset, which contains instances of leaves with clearer visual appearance.</p>
<p>To achieve this, we utilized a GLPF in two different configurations:</p>
<sec id="s4_3_1">
<label>4.3.1</label>
<title>Refinement mechanism applied to the augmented training dataset</title>
<p>In this configuration, the model was trained on the augmented training dataset, which included instances of leaves with varying levels of blurriness introduced through the GLPF. The objective here was to assess the model&#x2019;s ability to generalize effectively on the test data, which comprises images of original uncorrupted leaves. <xref ref-type="fig" rid="f10">
<bold>Figure&#xa0;A1 A</bold>
</xref> in the Appendix illustrates the implemented strategy for this scenario.</p>
</sec>
<sec id="s4_3_2">
<label>4.3.2</label>
<title>Refinement mechanism applied to the augmented training dataset and combined with the original samples</title>
<p>In this case, we combined the blurred dataset with the original augmented dataset. The purpose was to provide the model with more detailed features of the target areas, and the refinement mechanism acted as a type of data augmentation technique. However, for our specific task, we aimed to examine its impact as part of a partially corrupted dataset. <xref ref-type="fig" rid="f10">
<bold>Figure&#xa0;A1 B</bold>
</xref> in the Appendix shows the strategy implemented for this configuration.</p>
<p>To comprehensively evaluate the model&#x2019;s performance under different settings, we conducted a thorough analysis involving the number of predicted masks corresponding to leaves and the AP on the test dataset. This evaluation was carried out by applying various kernel sizes for the GLPF, which introduced multiple levels of blurring in the training data. To ensure the reliability of our findings, we conducted three rounds of model training and calculated the standard deviation.</p>
<p>The results presented in <xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref> unveiled two prominent trends: In the first scenario, where the refinement mechanism was applied solely to the ATD, we observed a slight reduction in AP. However, an interesting phenomenon occurred; the model seemed to learn to associate the noise generated by applying the GLPF. Consequently, while the AP decreased slightly, the number of detected masks increased. This intriguing observation suggests that the model acquired enhanced capabilities to handle such blurred data during training, thereby becoming more robust against such changes.</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Results of the refinement mechanism applied to the training dataset.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" rowspan="2" align="center">Model</th>
<th valign="middle" rowspan="2" align="center">Kernel size</th>
<th valign="middle" rowspan="2" align="center">Predicted masks</th>
<th valign="middle" colspan="3" align="center">Segmentation</th>
<th valign="top" colspan="3" align="center">Bounding box detection</th>
</tr>
<tr>
<th valign="middle" align="center">AP<sub>50</sub>
</th>
<th valign="middle" align="center">AP<sub>75</sub>
</th>
<th valign="middle" align="center">AP<sub>50&#x2013;95</sub>
</th>
<th valign="middle" align="center">AP<sub>50</sub>
</th>
<th valign="middle" align="center">AP<sub>75</sub>
</th>
<th valign="middle" align="center">AP<sub>50&#x2013;95</sub>
</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="center">Baseline</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="top" align="center">889 &#xb1; 11</td>
<td valign="middle" align="center">91.5 &#xb1; 0.4</td>
<td valign="middle" align="center">83.5 &#xb1; 0.5</td>
<td valign="middle" align="center">74.5 &#xb1; 0.1</td>
<td valign="top" align="center">91.2 &#xb1; 0.3</td>
<td valign="top" align="center">80.9 &#xb1; 1.4</td>
<td valign="top" align="center">71.4 &#xb1; 0.7</td>
</tr>
<tr>
<td valign="middle" rowspan="3" align="center">GLPF on applied to the ATD</td>
<td valign="middle" align="center">5</td>
<td valign="top" align="center">897 &#xb1; 3</td>
<td valign="middle" align="center">91.9 &#xb1; 0.1</td>
<td valign="middle" align="center">83.3 &#xb1; 1.8</td>
<td valign="middle" align="center">75.0 &#xb1; 0.3</td>
<td valign="top" align="center">91.3 &#xb1; 0.8</td>
<td valign="top" align="center">80.4 &#xb1; 1.1</td>
<td valign="top" align="center">71.4 &#xb1; 0.3</td>
</tr>
<tr>
<td valign="middle" align="center">7</td>
<td valign="top" align="center">909 &#xb1; 4</td>
<td valign="middle" align="center">91.3 &#xb1; 0.2</td>
<td valign="middle" align="center">83.3 &#xb1; 1.0</td>
<td valign="middle" align="center">74.6 &#xb1; 0.3</td>
<td valign="top" align="center">91.2 &#xb1; 0.2</td>
<td valign="top" align="center">80.5 &#xb1; 0.3</td>
<td valign="top" align="center">71.1 &#xb1; 0.4</td>
</tr>
<tr>
<td valign="middle" align="center">9</td>
<td valign="top" align="center">915 &#xb1; 4</td>
<td valign="middle" align="center">91.5 &#xb1; 0.1</td>
<td valign="middle" align="center">83.3 &#xb1; 0.5</td>
<td valign="middle" align="center">74.6 &#xb1; 0.2</td>
<td valign="top" align="center">91.1 &#xb1; 0.2</td>
<td valign="top" align="center">80.3 &#xb1; 1.2</td>
<td valign="top" align="center">71.1 &#xb1; 0.1</td>
</tr>
<tr>
<td valign="middle" rowspan="3" align="center">GLPF applied to the ATD + OI</td>
<td valign="middle" align="center">5</td>
<td valign="top" align="center">863 &#xb1; 4</td>
<td valign="middle" align="center">89.7 &#xb1; 0.4</td>
<td valign="middle" align="center">82.2 &#xb1; 0.8</td>
<td valign="middle" align="center">74.4 &#xb1; 0.1</td>
<td valign="top" align="center">88.9 &#xb1; 0.6</td>
<td valign="top" align="center">79.3 &#xb1; 1.0</td>
<td valign="top" align="center">0.9 &#xb1; 0.2</td>
</tr>
<tr>
<td valign="middle" align="center">7</td>
<td valign="top" align="center">865 &#xb1; 7</td>
<td valign="middle" align="center">89.7 &#xb1; 0.3</td>
<td valign="middle" align="center">82.2 &#xb1; 1.1</td>
<td valign="middle" align="center">74.2 &#xb1; 0.2</td>
<td valign="top" align="center">89.4 &#xb1; 0.2</td>
<td valign="top" align="center">79.2 &#xb1; 0.6</td>
<td valign="top" align="center">70.8 &#xb1; 0.1</td>
</tr>
<tr>
<td valign="middle" align="center">9</td>
<td valign="middle" align="center">870 &#xb1; 2</td>
<td valign="middle" align="center">90.1 &#xb1; 0.8</td>
<td valign="middle" align="center">82.2 &#xb1; 1.0</td>
<td valign="middle" align="center">74.3 &#xb1; 0.6</td>
<td valign="top" align="center">89.2 &#xb1; 0.3</td>
<td valign="top" align="center">79.6 &#xb1; 0.2</td>
<td valign="top" align="center">71.2 &#xb1; 0.9</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>ATD, Augmented training dataset.</p>
</fn>
<fn>
<p>OI, Original images.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>In contrast, the second scenario, where original data were combined with the ATD, revealed a different outcome. Here, the performance of the model decreased, accompanied by a decline in the number of predicted masks. This decline can be attributed to the model&#x2019;s primary focus on recognizing clear data. Consequently, when confronted with blurred data, the model became frequently confused, leading to a drop in performance. As the kernel size for the GLPF increased and blurring became more severe, this confusion further exacerbated the model&#x2019;s inability to accurately segment leaves.</p>
<p>These findings strongly indicate that the blurring data introduced by the GLPF, when applied to the training dataset, significantly contributed to making the model robust against blurring effects in the data. Consequently, this adaptation played a vital role in improving the model&#x2019;s ability to accurately segment leaves. <xref ref-type="fig" rid="f6">
<bold>Figure&#xa0;6</bold>
</xref> provided some qualitative examples of the model&#x2019;s performance, further highlighting the challenges and limitations posed by introducing blurriness in the training dataset.</p>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>Example qualitative results on the tomato plant dataset. <bold>(A)</bold> Original images. <bold>(B)</bold> Ground truth (actual annotations). <bold>(C)</bold> Predicted results on the original images. <bold>(D)</bold> Predicted results using Gaussian low-pass filter on the training dataset. <bold>(E)</bold> Predicted results using the High-boost filter on the test dataset. The visual comparison highlights how different approaches, such as applying filters to the training or test datasets, influence the model&#x2019;s predictions.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1211075-g006.tif"/>
</fig>
<p>This study showcased the significance of the refinement mechanism, particularly when applied to the ATD, in enhancing the model&#x2019;s robustness against blurriness in the data, leading to improved leaf instance segmentation performance. However, caution is required when combining original and blurred data during training, as it may adversely affect the model&#x2019;s ability to handle blurriness. These insights have practical implications for real-world applications.</p>
</sec>
</sec>
<sec id="s4_4">
<label>4.4</label>
<title>Refinement mechanism applied to the test data</title>
<p>In the previous experiment, we applied the refinement mechanism to the training data, which resulted in a decline in performance. To address this challenge, we conducted two additional experiments, focusing on the test dataset to explore alternative solutions. These experiments correspond to the second scenario outlined in Section 3.3 and <xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5B</bold>
</xref>, and their outcomes are summarized in <xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref>.</p>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Results of the refinement mechanism applied as postprocessing.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" rowspan="2" align="center">Method</th>
<th valign="middle" rowspan="2" align="center">Kernel size</th>
<th valign="middle" rowspan="2" align="center">Predicted masks</th>
<th valign="middle" colspan="3" align="center">Segmentation</th>
<th valign="top" colspan="3" align="center">Bounding box detection</th>
</tr>
<tr>
<th valign="middle" align="center">AP<sub>50</sub>
</th>
<th valign="middle" align="center">AP<sub>75</sub>
</th>
<th valign="middle" align="center">AP<sub>50&#x2013;95</sub>
</th>
<th valign="middle" align="center">AP<sub>50</sub>
</th>
<th valign="middle" align="center">AP<sub>75</sub>
</th>
<th valign="middle" align="center">AP<sub>50&#x2013;95</sub>
</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="center">Baseline</td>
<td valign="middle" align="center">
<bold>-</bold>
</td>
<td valign="top" align="center">889 &#xb1; 11</td>
<td valign="top" align="center">91.5 &#xb1; 0.4</td>
<td valign="top" align="center">83.5 &#xb1; 0.5</td>
<td valign="top" align="center">74.5 &#xb1; 0.1</td>
<td valign="top" align="center">91.2 &#xb1; 0.3</td>
<td valign="top" align="center">80.9 &#xb1; 1.4</td>
<td valign="top" align="center">71.4 &#xb1; 0.7</td>
</tr>
<tr>
<td valign="middle" rowspan="3" align="center">GLPF applied to the test dataset</td>
<td valign="middle" align="center">5</td>
<td valign="top" align="center">869 &#xb1; 15</td>
<td valign="top" align="center">91.2 &#xb1; 1.1</td>
<td valign="top" align="center">82.6 &#xb1; 1.3</td>
<td valign="top" align="center">73.9 &#xb1; 0.5</td>
<td valign="top" align="center">90.4 &#xb1; 0.5</td>
<td valign="top" align="center">79.5 &#xb1; 0.4</td>
<td valign="top" align="center">70.9 &#xb1; 0.4</td>
</tr>
<tr>
<td valign="middle" align="center">7</td>
<td valign="top" align="center">840 &#xb1; 38</td>
<td valign="top" align="center">90.4 &#xb1; 1.1</td>
<td valign="top" align="center">81.6 &#xb1; 0.8</td>
<td valign="top" align="center">72.7 &#xb1; 0.4</td>
<td valign="top" align="center">89.7 &#xb1; 1.2</td>
<td valign="top" align="center">75.8 &#xb1; 1.5</td>
<td valign="top" align="center">72.8 &#xb1; 0.4</td>
</tr>
<tr>
<td valign="middle" align="center">9</td>
<td valign="top" align="center">811 &#xb1; 48</td>
<td valign="top" align="center">89.2 &#xb1; 1.0</td>
<td valign="top" align="center">81.1 &#xb1; 1.6</td>
<td valign="top" align="center">71.4 &#xb1; 0.5</td>
<td valign="top" align="center">88.7 &#xb1; 2.0</td>
<td valign="top" align="center">77.8 &#xb1; 1.0</td>
<td valign="top" align="center">68.5 &#xb1; 0.9</td>
</tr>
<tr>
<td valign="middle" rowspan="3" align="center">HBF applied to the test dataset</td>
<td valign="middle" align="center">5</td>
<td valign="top" align="center">901 &#xb1; 4</td>
<td valign="top" align="center">92.0 &#xb1; 0.8</td>
<td valign="top" align="center">83.9 &#xb1; 0.7</td>
<td valign="top" align="center">75.1 &#xb1; 0.3</td>
<td valign="top" align="center">91.8 &#xb1; 0.5</td>
<td valign="top" align="center">81.6 &#xb1; 0.7</td>
<td valign="top" align="center">72.0 &#xb1; 0.3</td>
</tr>
<tr>
<td valign="middle" align="center">7</td>
<td valign="top" align="center">909 &#xb1; 13</td>
<td valign="top" align="center">92.3 &#xb1; 0.8</td>
<td valign="top" align="center">83.8&#xb1;1.2</td>
<td valign="top" align="center">75.3&#xb1;0.6</td>
<td valign="top" align="center">91.9&#xb1;1.2</td>
<td valign="top" align="center">82.1&#xb1;2.6</td>
<td valign="top" align="center">72.9&#xb1;2.5</td>
</tr>
<tr>
<td valign="middle" align="center">9</td>
<td valign="middle" align="center">918 &#xb1; 2</td>
<td valign="top" align="center">92.7 &#xb1; 0.9</td>
<td valign="top" align="center">84.5&#xb1;1.3</td>
<td valign="top" align="center">75.9&#xb1;1.1</td>
<td valign="top" align="center">92.6&#xb1;1.5</td>
<td valign="top" align="center">82.8&#xb1;3.3</td>
<td valign="top" align="center">73.5&#xb1;3.8</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>In this experiment, we employed the refinement mechanism in two different configurations:</p>
<sec id="s4_4_1">
<label>4.4.1</label>
<title>Refinement mechanism with GLPF applied to the test dataset</title>
<p>The objective of this experiment was to assess how the presence of instance blurriness in the test data influences the model&#x2019;s predictions. As revealed by the results in <xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref>, increasing the kernel size of the GLPF had an adverse effect on both AP and the number of predicted masks. Larger kernel sizes caused the RoIs to become more blurred, resulting in a challenging situation for the model to accurately detect the presence of leaves. The leaves tended to merge with the background, leading to a reduction in overall performance.</p>
</sec>
<sec id="s4_4_2">
<label>4.4.2</label>
<title>Refinement mechanism with HBF applied to the test dataset</title>
<p>In this case, we sought to determine whether applying HBF to the test data, utilizing the refined instances, could enhance the prediction of leaf samples (see <xref ref-type="fig" rid="f11">
<bold>Figure&#xa0;A2 A</bold>
</xref> in the Appendix for the implemented strategy). As indicated in <xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref>, by locally applying HBF, the system predicted more leaves, a favorable outcome for downstream processing to obtain phenotypic data. Notably, the AP also improved for both segmentation and bounding box detection, signifying an overall enhancement in performance compared with the baseline.</p>
<p>The results of these experiments demonstrate the advantageous impact of the refinement mechanism, particularly when using HBF. The HBF approach enabled the model to capture more intricate information, resulting in an increased number of correctly predicted leaf instances. While the application of GLPF had a detrimental impact due to increased blurriness, the usage of HBF significantly improved the prediction of leaf instances, contributing to a more effective and precise segmentation.</p>
<p>
<xref ref-type="fig" rid="f6">
<bold>Figure&#xa0;6E</bold>
</xref> provides an example of a qualitative result, showcasing the visual impact of the strategy on the model&#x2019;s predictions. This illustration further supports the effectiveness of using the refinement mechanism with HBF in improving leaf instance segmentation in the tomato plant dataset.</p>
</sec>
</sec>
<sec id="s4_5">
<label>4.5</label>
<title>Effects of the implemented strategies</title>
<sec id="s4_5_1">
<label>4.5.1</label>
<title>Effect of the refined data by HBF</title>
<p>To gain further insights into the contribution and impact of the refinement mechanism, we conducted an in-depth analysis using both GLPF and HBF on the test dataset. First, we applied a GLPF to the test dataset, generating fuzzy instances, and then consecutively applied an HBF to the same areas. For this analysis, we utilized the weights of the model trained with the original augmented images to make predictions on the test data. (See <xref ref-type="fig" rid="f11">
<bold>Figure&#xa0;A2 B</bold>
</xref> in the Appendix for the implemented strategy).</p>
<p>
<xref ref-type="fig" rid="f7">
<bold>Figure&#xa0;7A</bold>
</xref> illustrates the changes in the predicted leaf instances based on the size of the HBF core, taking into account the accepted level of blur given by the GLPF. It becomes evident that the model started to benefit from an HBF kernel size greater than 7 &#xd7; 7 while being constrained by a GLPF kernel size of 3 &#xd7; 3 or 5 &#xd7; 5. Furthermore, a trade-off between blurriness and refinement was observed. Larger HBF kernel sizes, such as 15 &#xd7; 15, exhibited better performance, generating more accurately segmented leaves than those present in the original test data. Additionally, we computed the average change rate (ave) for the GLPF kernel sizes, and it became apparent that the model was generally influenced by more significant levels of blurriness provided by the GLPF.</p>
<fig id="f7" position="float">
<label>Figure&#xa0;7</label>
<caption>
<p>Effects of the implemented refinement strategy on the predicted leaf instances. <bold>(A)</bold> Effect of HBF and GLPF kernel sizes: A kernel size of 15 &#xd7; 15 positively influenced the model&#x2019;s performance, resulting in more segmented regions compared with the original test dataset. The &#x201c;ave&#x201d; value represents the average change rate across all kernel sizes. <bold>(B)</bold> Effect of GLPF kernel sizes: The level of blurriness had a negative impact on the number of predicted samples. Larger kernel sizes resulted in reduced presence of predicted leaves. <bold>(C)</bold> Improved segmentation of leaves through HBF on GLPF-filtered instances: HBF significantly enhanced the segmentation of leaves, based on the ground-truth labels in the test data, even when blurriness was present in the GLPF-filtered samples.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1211075-g007.tif"/>
</fig>
</sec>
<sec id="s4_5_2">
<label>4.5.2</label>
<title>Effect of the blurred data by GLPF</title>
<p>The effect of the blurred data by the GLPF is depicted in <xref ref-type="fig" rid="f7">
<bold>Figure&#xa0;7B</bold>
</xref>, showing the corresponding impact of applying GLPF on the instances of the test data. We used the results obtained with different kernel sizes to measure the changes in predicted leaf instances. Consistent with the findings in Section 4.4, it was observed that the level of blur introduced by the GLPF, based on its kernel size, negatively affected the number of predicted masks. As a result, larger values of kernel size led to a reduction in the presence of predicted leaves.</p>
</sec>
<sec id="s4_5_3">
<label>4.5.3</label>
<title>Effect of the refinement mechanism on the prediction of ground-truth labels</title>
<p>
<xref ref-type="fig" rid="f7">
<bold>Figure&#xa0;7C</bold>
</xref> complements the aforementioned analysis by showing the performance gain of the predicted instances compared with the ground truth of the test data. The application of HBF substantially improved the predictions regardless of the presence of blur samples. The performance enhancement was found to be dependent on the size of the kernel. Specifically, a 15 &#xd7; 15 kernel size positively influenced the final results, effectively overcoming the issues caused by GLPF blurring effects.</p>
<p>To visually illustrate the effects of the refinement mechanism on the test data with GLPF and HBF, we present qualitative examples in <xref ref-type="fig" rid="f8">
<bold>Figures&#xa0;8</bold>
</xref> and <xref ref-type="fig" rid="f9">
<bold>9</bold>
</xref>. The figures showcase two cases: one with multiple leaves (<xref ref-type="fig" rid="f8">
<bold>Figure&#xa0;8</bold>
</xref>) and the other with few leaves (<xref ref-type="fig" rid="f9">
<bold>Figure&#xa0;9</bold>
</xref>). Notably, the use of GLPF and HBF resulted in contrasting performance. While larger kernel sizes of the GLPF negatively impacted the prediction of the ground truth, the larger kernel sizes of the HBF proved beneficial by increasing the number of correctly predicted samples without compromising performance. The HBF effectively enhanced the clarity of RoIs and counteracted the blurring effects of GLPF. Consequently, the model segmented more leaves when the HBF was applied. However, it is important to note that this outcome was highly dependent on the size of the kernel used by the HBF filter.</p>
<fig id="f8" position="float">
<label>Figure&#xa0;8</label>
<caption>
<p>Example results of applying <bold>(A)</bold> GLPF and <bold>(B)</bold> HBF on the test data using an image with multiple leaves. As the GLPF kernel size increased, the prediction performance declined. However, with HBF, the system benefited from larger kernel sizes, resulting in the generation of more accurately segmented leaf instances.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1211075-g008.tif"/>
</fig>
<fig id="f9" position="float">
<label>Figure&#xa0;9</label>
<caption>
<p>Example results of applying <bold>(A)</bold> GLPF and <bold>(B)</bold> HBF on the test data using an image with few leaves. As the GLPF kernel size increased, the prediction performance declined. However, with HBF, the system benefited from larger kernel sizes, resulting in the generation of more accurately segmented leaf instances.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1211075-g009.tif"/>
</fig>
</sec>
</sec>
<sec id="s4_6">
<label>4.6</label>
<title>Comparison with other state-of-the-art architectures</title>
<p>In order to thoroughly assess the effectiveness of our refinement mechanism, we conducted comparative experiments using the HBF on the test data alongside other state-of-the-art methods such as PointRend (<xref ref-type="bibr" rid="B24">Kirillov et&#xa0;al., 2020</xref>), Mask Scoring R-CNN (<xref ref-type="bibr" rid="B22">Huang et&#xa0;al., 2019</xref>), CARAFE (<xref ref-type="bibr" rid="B40">Wang et&#xa0;al., 2020</xref>), Hybrid Task Cascade (HTC) (<xref ref-type="bibr" rid="B40">Wang et&#xa0;al., 2020</xref>), Cascade R-CNN (<xref ref-type="bibr" rid="B6">Cai and Vasconcelos, 2018</xref>), and Mask R-CNN (<xref ref-type="bibr" rid="B19">He et&#xa0;al., 2017</xref>). To ensure fair comparisons, all models were based on the Albumentation transformations method, with (w) and without (w/o) the inclusion of our refinement strategy (<xref ref-type="bibr" rid="B5">Buslaev et al., 2020</xref>).</p>
<p>The experimental results, presented in <xref ref-type="table" rid="T4">
<bold>Table&#xa0;4</bold>
</xref>, clearly demonstrate that the proposed refinement strategy significantly improved the performance of all implemented models. Regarding segmentation metrics, Mask R-CNN with the refinement strategy achieved the highest performance with an AP of 92.7% when IoU &gt; 0.5. The HTC model also exhibited comparable capabilities with an AP<sub>50</sub> score of 92.1% when using our strategy. Notably, the Cascade R-CNN model exhibited the highest improvement of 3.2% after incorporating our refinement mechanism.</p>
<table-wrap id="T4" position="float">
<label>Table&#xa0;4</label>
<caption>
<p>Comparison with other state-of-the-art methods.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" rowspan="2" align="center">Model</th>
<th valign="top" colspan="2" align="center">Refinement</th>
<th valign="middle" colspan="3" align="center">Segmentation</th>
<th valign="middle" colspan="3" align="center">Bounding box detection</th>
</tr>
<tr>
<th valign="top" align="center">w/o</th>
<th valign="top" align="center">w</th>
<th valign="middle" align="center">AP<sub>50</sub>
</th>
<th valign="middle" align="center">AP<sub>75</sub>
</th>
<th valign="middle" align="center">AP<sub>50&#x2013;95</sub>
</th>
<th valign="middle" align="center">AP<sub>50</sub>
</th>
<th valign="middle" align="center">AP<sub>75</sub>
</th>
<th valign="middle" align="center">AP<sub>50&#x2013;95</sub>
</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="2" align="center">Point Rend</td>
<td valign="top" align="center">&#x2713;</td>
<td valign="top" align="center"/>
<td valign="middle" align="center">86.9</td>
<td valign="middle" align="center">82.0</td>
<td valign="middle" align="center">76.4</td>
<td valign="middle" align="center">85.8</td>
<td valign="middle" align="center">80.6</td>
<td valign="middle" align="center">74.1</td>
</tr>
<tr>
<td valign="top" align="center"/>
<td valign="top" align="center">&#x2713;</td>
<td valign="middle" align="center">88.8</td>
<td valign="middle" align="center">85.0</td>
<td valign="middle" align="center">78.4</td>
<td valign="middle" align="center">88.6</td>
<td valign="middle" align="center">82.7</td>
<td valign="middle" align="center">76.1</td>
</tr>
<tr>
<td valign="middle" rowspan="2" align="center">Mask scoring R-CNN</td>
<td valign="top" align="center">&#x2713;</td>
<td valign="top" align="center"/>
<td valign="middle" align="center">86.3</td>
<td valign="middle" align="center">79.4</td>
<td valign="middle" align="center">72.4</td>
<td valign="middle" align="center">69.0</td>
<td valign="middle" align="center">86.5</td>
<td valign="middle" align="center">76.9</td>
</tr>
<tr>
<td valign="top" align="center"/>
<td valign="top" align="center">&#x2713;</td>
<td valign="middle" align="center">87.4</td>
<td valign="middle" align="center">80.5</td>
<td valign="middle" align="center">73.6</td>
<td valign="middle" align="center">87.7</td>
<td valign="middle" align="center">79.7</td>
<td valign="middle" align="center">70.8</td>
</tr>
<tr>
<td valign="middle" rowspan="2" align="center">CARAFE</td>
<td valign="top" align="center">&#x2713;</td>
<td valign="top" align="center"/>
<td valign="middle" align="center">89.2</td>
<td valign="middle" align="center">84.4</td>
<td valign="middle" align="center">76.8</td>
<td valign="middle" align="center">88.1</td>
<td valign="middle" align="center">82.3</td>
<td valign="middle" align="center">74.9</td>
</tr>
<tr>
<td valign="top" align="center"/>
<td valign="top" align="center">&#x2713;</td>
<td valign="middle" align="center">91.2</td>
<td valign="middle" align="center">85.7</td>
<td valign="middle" align="center">78.2</td>
<td valign="middle" align="center">90.0</td>
<td valign="middle" align="center">84.7</td>
<td valign="middle" align="center">76.2</td>
</tr>
<tr>
<td valign="middle" rowspan="2" align="center">Cascade R-CNN</td>
<td valign="top" align="center">&#x2713;</td>
<td valign="top" align="center"/>
<td valign="middle" align="center">86.6</td>
<td valign="middle" align="center">82.9</td>
<td valign="middle" align="center">75.9</td>
<td valign="middle" align="center">85.5</td>
<td valign="middle" align="center">80.0</td>
<td valign="middle" align="center">75.1</td>
</tr>
<tr>
<td valign="top" align="center"/>
<td valign="top" align="center">&#x2713;</td>
<td valign="middle" align="center">89.8</td>
<td valign="middle" align="center">86.1</td>
<td valign="middle" align="center">78.7</td>
<td valign="middle" align="center">89.3</td>
<td valign="middle" align="center">83.0</td>
<td valign="middle" align="center">78.0</td>
</tr>
<tr>
<td valign="middle" rowspan="2" align="center">Hybrid task cascade</td>
<td valign="top" align="center">&#x2713;</td>
<td valign="top" align="center"/>
<td valign="middle" align="center">91.8</td>
<td valign="middle" align="center">85.5</td>
<td valign="middle" align="center">77.9</td>
<td valign="middle" align="center">91.4</td>
<td valign="middle" align="center">81.9</td>
<td valign="middle" align="center">75.0</td>
</tr>
<tr>
<td valign="top" align="center"/>
<td valign="top" align="center">&#x2713;</td>
<td valign="middle" align="center">92.1</td>
<td valign="middle" align="center">86.0</td>
<td valign="middle" align="center">78.4</td>
<td valign="middle" align="center">91.7</td>
<td valign="middle" align="center">83.3</td>
<td valign="middle" align="center">75.7</td>
</tr>
<tr>
<td valign="middle" rowspan="2" align="center">Mask R-CNN</td>
<td valign="top" align="center">&#x2713;</td>
<td valign="top" align="center"/>
<td valign="middle" align="center">91.6</td>
<td valign="middle" align="center">83.4</td>
<td valign="middle" align="center">74.5</td>
<td valign="middle" align="center">91.3</td>
<td valign="middle" align="center">81.4</td>
<td valign="middle" align="center">71.6</td>
</tr>
<tr>
<td valign="top" align="center"/>
<td valign="top" align="center">&#x2713;</td>
<td valign="middle" align="center">92.7</td>
<td valign="middle" align="center">84.5</td>
<td valign="middle" align="center">75.9</td>
<td valign="middle" align="center">92.6</td>
<td valign="middle" align="center">82.8</td>
<td valign="middle" align="center">73.5</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>In terms of bounding box detection, our improved Mask R-CNN achieved the top score with an AP<sub>50</sub> of 92.6%. Among the models, Mask Scoring R-CNN displayed the most substantial improvement in performance, with an AP<sub>50</sub> score of 87.7%, representing an increase of approximately 18.7%. Overall, all models experienced performance gains through the application of our refinement strategy, demonstrating its effectiveness in enhancing leaf instance segmentation in cluttered background conditions.</p>
</sec>
</sec>
<sec id="s5" sec-type="conclusion">
<label>5</label>
<title>Conclusion</title>
<p>This paper introduced an approach for leaf instance segmentation based on deep learning, specifically this research represents a significant step forward in the domain of leaf instance segmentation, offering an innovative and effective approach to tackle the challenges associated with cluttered backgrounds and varying image quality. Through the integration of a local refinement mechanism, we have demonstrated improvements in the accuracy and robustness of leaf instance segmentation. Our proposed refinement mechanism, incorporating Gaussian low-pass and HBF, serves as a key driver behind the effectiveness of our approach. The ability to apply this mechanism either during training or on the test dataset highlights its versatility and adaptability to different scenarios. The refined feature representations within leaf instances enabled the model to better distinguish target leaves, even in the presence of blurriness and cluttered backgrounds. Our qualitative and quantitative experimental results performed on our tomato leaf dataset reinforced the reliability and accuracy of our system in data from real-world greenhouse scenarios. The ability to accurately segment target leaves despite challenging conditions, such as occlusion and overlapping, highlights the potential applications of our approach in plant phenotyping.</p>
</sec>
<sec id="s6" sec-type="data-availability">
<title>Data availability statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation. For details contact at <email xlink:href="mailto:afuentes@jbnu.ac.kr">afuentes@jbnu.ac.kr</email>.</p>
</sec>
<sec id="s7" sec-type="author-contributions">
<title>Author contributions</title>
<p>RM performed the experiments. AF collaborated on the framework design, and data acquisition, and wrote the manuscript. DP and SY advised on the system&#x2019;s design, analyzed the strategies, and supervised its development. SK, HK, and WL collaborated on the project and its implementation. All authors contributed to the article and approved the submitted version.</p>
</sec>
</body>
<back>
<sec id="s8" sec-type="funding-information">
<title>Funding</title>
<p>This research was supported by Basic Science Research Program through the National Research Foundation of Korea (NRF) funded by the Ministry of Education (No. 2019R1A6A1A09031717); by Korea Institute of Planning and Evaluation for Technology in Food, Agriculture and Forestry (IPET) and Korea Smart Farm R&amp;D Foundation (KosFarm) through Smart Farm Innovation Technology Development Program, funded by Ministry of Agriculture, Food and Rural Affairs (MAFRA) and Ministry of Science and ICT(MSIT), Rural Development Administration (RDA)(1545027569); and in part by the Agricultural Science and Technology Development Cooperation Research Program (RS-2021-RD009890).</p>
</sec>
<sec id="s9" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>Author WL is employed by Intelligent Robot Studio Co. Ltd.</p>
<p>The remaining authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s10" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Afonso</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Fonteijn</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Fiorentin</surname> <given-names>F. S.</given-names>
</name>
<name>
<surname>Lensink</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Mooij</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Faber</surname> <given-names>N.</given-names>
</name>
<etal/>
</person-group>. (<year>2020</year>). <article-title>Tomato fruit detection and counting in greenhouses using deep learning</article-title>. <source>Front. Plant Sci.</source> <volume>11</volume>, <elocation-id>1759</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/FPLS.2020.571299/BIBTEX</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Aich</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Stavness</surname> <given-names>I.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Leaf counting with deep convolutional and deconvolutional networks</article-title> <conf-name>Proceedings - 2017 IEEE International Conference on Computer Vision Workshops, ICCVW 2017</conf-name>. <fpage>2080</fpage>&#x2013;<lpage>2089</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ICCVW.2017.244</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Barbedo</surname> <given-names>J. G. A.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Factors influencing the use of deep learning for plant disease recognition</article-title>. <source>Biosyst. Eng.</source> <volume>172</volume>, <fpage>84</fpage>&#x2013;<lpage>91</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/J.BIOSYSTEMSENG.2018.05.013</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Br</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Av</surname> <given-names>S. H.</given-names>
</name>
<name>
<surname>Ashok</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Diseased leaf segmentation from complex background using indices based histogram</article-title>,&#x201d; in <conf-name>Proceedings of the 6th International Conference on Communication and Electronics Systems, ICCES 2021</conf-name>. <fpage>1502</fpage>&#x2013;<lpage>1507</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ICCES51350.2021.9489112</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Buslaev</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Iglovikov</surname> <given-names>V. I.</given-names>
</name>
<name>
<surname>Khvedchenya</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Parinov</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Druzhinin</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Kalinin</surname> <given-names>A. A</given-names>
</name>
</person-group>. (<year>2020</year>). <article-title>Albumentations: fast and flexible image augmentations</article-title>. <source>Inf.</source> <volume>11</volume> (<issue>2</issue>), <elocation-id>125</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/INFO11020125</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Cai</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Vasconcelos</surname> <given-names>N.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Cascade R-CNN: delving into high quality object detection</article-title>,&#x201d; in <conf-name>2018 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)</conf-name>. <fpage>6154</fpage>&#x2013;<lpage>6162</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/CVPR.2018.00644</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Costa</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Schurr</surname> <given-names>U.</given-names>
</name>
<name>
<surname>Loreto</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Menesatti</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Carpentier</surname> <given-names>S</given-names>
</name>
</person-group>. (<year>2019</year>). <article-title>Plant phenotyping research trends, a science mapping approach</article-title>. <source>Front. Plant Sci.</source> <volume>9</volume>, <elocation-id>426195</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/FPLS.2018.01933/BIBTEX</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Das</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Das Choudhury</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Das</surname> <given-names>A. K.</given-names>
</name>
<name>
<surname>Samal</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Awada</surname> <given-names>T.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>EmergeNet: A novel deep-learning based ensemble segmentation model for emergence timing detection of coleoptile</article-title>. <source>Front. Plant Sci.</source> <volume>14</volume>, <elocation-id>1084778</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/FPLS.2023.1084778/BIBTEX</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dong</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Fuentes</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Yoon</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>M. H.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Data-centric annotation analysis for plant disease detection: Strategy, consistency, and performance</article-title>. <source>Front. Plant Sci.</source> <volume>13</volume>, <elocation-id>1037655</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/FPLS.2022.1037655/BIBTEX</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Everingham</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Van Gool</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Williams</surname> <given-names>C. K. I.</given-names>
</name>
<name>
<surname>Winn</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zisserman</surname> <given-names>A.</given-names>
</name>
<etal/>
</person-group>. (<year>2009</year>). <article-title>The pascal visual object classes (VOC) challenge</article-title>. <source>Int. J. Comput. Vision</source> <volume>88</volume> (<issue>2</issue>), <fpage>303</fpage>&#x2013;<lpage>338</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/S11263-009-0275-4</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fan</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Tjahjadi</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Das Choudhury</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Ye</surname> <given-names>Q</given-names>
</name>
</person-group>. (<year>2022</year>). <article-title>A segmentation-guided deep learning framework for leaf counting</article-title>. <source>Front. Plant Sci.</source> <volume>13</volume>, <elocation-id>844522</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/FPLS.2022.844522/BIBTEX</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Farjon</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Itzhaky</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Khoroshevsky</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Bar-Hillel</surname> <given-names>A</given-names>
</name>
</person-group>. (<year>2021</year>). <article-title>Leaf counting: fusing network components for improved accuracy</article-title>. <source>Front. Plant Sci.</source> <volume>12</volume>, <elocation-id>1063</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/FPLS.2021.575751/BIBTEX</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fuentes</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Yoon</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Kim</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Park</surname> <given-names>D</given-names>
</name>
</person-group>. (<year>2017</year>). <article-title>A robust deep-learning-based detector for real-time tomato plant diseases and pests recognition</article-title>. <source>Sensors</source> <volume>17</volume> (<issue>9</issue>), <elocation-id>2022</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/s17092022</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fuentes</surname> <given-names>A. F.</given-names>
</name>
<name>
<surname>Yoon</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Park</surname> <given-names>D. S</given-names>
</name>
</person-group>. (<year>2018</year>). <article-title>High-performance deep neural network-based tomato plant diseases and pests diagnosis system with refinement filter bank</article-title>. <source>Front. Plant Sci.</source> <volume>9</volume>, <elocation-id>1162</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/FPLS.2018.01162/BIBTEX</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fuentes</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Yoon</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>M. H.</given-names>
</name>
<name>
<surname>Park</surname> <given-names>D. S.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Improving accuracy of tomato plant disease diagnosis based on deep learning with explicit control of hidden classes</article-title>. <source>Front. Plant Sci.</source> <volume>12</volume>, <elocation-id>2938</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/FPLS.2021.682230/BIBTEX</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fuentes</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Yoon</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Park</surname> <given-names>D. S.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Deep learning-based phenotyping system with glocal description of plant AnoMalies and symptoms</article-title>. <source>Front. Plant Sci.</source> <volume>10</volume>, <elocation-id>460700</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/FPLS.2019.01321/BIBTEX</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Geelen</surname> <given-names>P. A. M.</given-names>
</name>
<name>
<surname>Voogt</surname> <given-names>J. O.</given-names>
</name>
<name>
<surname>van Weel</surname> <given-names>P. A.</given-names>
</name>
</person-group> (<year>2018</year>). <source>Plant Empowerment: The Basic Principles: how an Integrated Approach Based on Physics and Plant Physiology Leads to a Balanced Growing Method for Protected Crops Resulting in Healthy Resilient Plants, High Yield and Quality, Low Energy Costs and Economic</source> (<publisher-loc>The Netherlands</publisher-loc>: <publisher-name>LetsGrow.com</publisher-name>).</citation>
</ref>
<ref id="B18">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Gonzalez</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Woods</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2018</year>). <source>Digital Image Processing</source>. <edition>4th edn</edition> (<publisher-loc>New York</publisher-loc>: <publisher-name>Pearson</publisher-name>).</citation>
</ref>
<ref id="B19">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>He</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Gkioxari</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Dollar</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Girshick</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Mask R-CNN</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE International Conference on Computer Vision</conf-name>, <conf-date>2017</conf-date>. <fpage>2980</fpage>&#x2013;<lpage>2988</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ICCV.2017.322</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Heuvelink</surname> <given-names>E.</given-names>
</name>
</person-group> (<year>2005</year>). <source>Tomatoes</source>. <edition>13th edn</edition> (<publisher-loc>CABI</publisher-loc>: <publisher-name>CABI Publishing</publisher-name>). doi:&#xa0;<pub-id pub-id-type="doi">10.1079/9780851993966.0000</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hilty</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Muller</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Pantin</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Leuzinger</surname> <given-names>S</given-names>
</name>
</person-group>. (<year>2021</year>). <article-title>Plant growth: the what, the how, and the why</article-title>. <source>New Phytol.</source> <volume>232</volume> (<issue>1</issue>), <fpage>25</fpage>&#x2013;<lpage>41</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/NPH.17610</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Huang</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Gong</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>X</given-names>
</name>
</person-group>. (<year>2019</year>). &#x201c;<article-title>Mask scoring R-CNN</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition</conf-name>, <conf-date>2019-June</conf-date>. <fpage>6402</fpage>&#x2013;<lpage>6411</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/CVPR.2019.00657</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Jiang</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>S</given-names>
</name>
</person-group>. (<year>2020</year>). &#x201c;<article-title>A tomato leaf diseases classification method based on deep learning</article-title>,&#x201d; in <conf-name>Proceedings of the 32nd Chinese Control and Decision Conference, CCDC 2020</conf-name>. <fpage>1446</fpage>&#x2013;<lpage>1450</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/CCDC49329.2020.9164457</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Kirillov</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>He</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Girshick</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>PointRend: image segmentation as rendering</article-title>,&#x201d; in <conf-name>2020 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)</conf-name>. <fpage>9796</fpage>&#x2013;<lpage>9805</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/CVPR42600.2020.00982</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kolhar</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Jagtap</surname> <given-names>J.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Plant trait estimation and classification studies in plant phenotyping using machine vision &#x2013; A review</article-title>. <source>Inf. Process. Agric.</source> <volume>10</volume>, <fpage>114</fpage>&#x2013;<lpage>135</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/J.INPA.2021.02.006</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Kuznichov</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Zvirin</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Honen</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Kimmel</surname> <given-names>R.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Data augmentation for leaf segmentation and counting tasks in rosette plants</article-title>,&#x201d; in <conf-name>IEEE Computer Society Conference on Computer Vision and Pattern Recognition Workshops</conf-name>, <conf-date>2019-June</conf-date>. <fpage>2580</fpage>&#x2013;<lpage>2589</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/CVPRW.2019.00314</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lin</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>C. T.</given-names>
</name>
<name>
<surname>Adams</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Kouzani</surname> <given-names>A. Z.</given-names>
</name>
<name>
<surname>Jiang</surname> <given-names>R.</given-names>
</name>
<name>
<surname>He</surname> <given-names>L.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). <article-title>Self-supervised leaf segmentation under complex lighting conditions</article-title>. <source>Pattern Recognit.</source> <volume>135</volume>, <elocation-id>109021</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/J.PATCOG.2022.109021</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Plant diseases and pests detection based on deep learning: a review</article-title>. <source>Plant Methods</source> <volume>17</volume> (<issue>1</issue>), <fpage>1</fpage>&#x2013;<lpage>18</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/S13007-021-00722-9/TABLES/4</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mohanty</surname> <given-names>S. P.</given-names>
</name>
<name>
<surname>Hughes</surname> <given-names>D. P.</given-names>
</name>
<name>
<surname>Salath&#xe9;</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Using deep learning for image-based plant disease detection</article-title>. <source>Front. Plant Sci.</source> <volume>7</volume>, 1&#x2013;10. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2016.01419</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Okyere</surname> <given-names>F. G.</given-names>
</name>
<name>
<surname>Cudjoe</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Sadeghi-Tehran</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Virlet</surname> <given-names>N.</given-names>
</name>
<name>
<surname>Riche</surname> <given-names>A. B.</given-names>
</name>
<name>
<surname>Castle</surname> <given-names>M.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). <article-title>Machine learning methods for automatic segmentation of images of field- and glasshouse-based plants for high-throughput phenotyping</article-title>. <source>Plants</source> <volume>12</volume> (<issue>10</issue>), <fpage>2035</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/PLANTS12102035/S1</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Pape</surname> <given-names>J.-M.</given-names>
</name>
<name>
<surname>Klukas</surname> <given-names>C.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Utilizing machine learning approaches to improve the prediction of leaf counts and individual leaf segmentation of rosette plant images</article-title>. in <conf-name>Proceedings of the Computer Vision Problems in Plant Phenotyping (CVPPP)</conf-name> <volume>3</volume>, <fpage>1</fpage>&#x2013;<lpage>12</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.5244/C.29.CVPPP.3</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pieruschka</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Schurr</surname> <given-names>U.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Plant phenotyping: Past, present, and future</article-title>. <source>Plant Phenomics</source> <volume>2019</volume>, <fpage>7507131</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.34133/2019/7507131</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Praveen Kumar</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Domnic</surname> <given-names>S.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Image based leaf segmentation and counting in rosette plants</article-title>. <source>Inf. Process. Agric.</source> <volume>6</volume> (<issue>2</issue>), <fpage>233</fpage>&#x2013;<lpage>246</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/J.INPA.2018.09.005</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rzanny</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Seeland</surname> <given-names>M.</given-names>
</name>
<name>
<surname>W&#xe4;ldchen</surname> <given-names>J.</given-names>
</name>
<name>
<surname>M&#xe4;der</surname> <given-names>P</given-names>
</name>
</person-group>. (<year>2017</year>). <article-title>Acquiring and preprocessing leaf images for automated plant identification: Understanding the tradeoff between effort and information gain</article-title>. <source>Plant Methods</source> <volume>13</volume> (<issue>1</issue>), <fpage>1</fpage>&#x2013;<lpage>11</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/S13007-017-0245-8/FIGURES/8</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Scharr</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Minervini</surname> <given-names>M.</given-names>
</name>
<name>
<surname>French</surname> <given-names>A. P.</given-names>
</name>
<name>
<surname>Klukas</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Kramer</surname> <given-names>D. M.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>X.</given-names>
</name>
<etal/>
</person-group>. (<year>2015</year>). <article-title>Leaf segmentation in plant phenotyping: a collation study</article-title>. <source>Mach. Vision Appl.</source> <volume>27</volume> (<issue>4</issue>), <fpage>585</fpage>&#x2013;<lpage>606</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/S00138-015-0737-3</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Scharr</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Pridmore</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Tsaftaris</surname> <given-names>S. A.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Computer vision problems in plant phenotyping, CVPPP 2017: introduction to the CVPPP 2017 workshop papers</article-title>,&#x201d; in <conf-name>Proceedings - 2017 IEEE International Conference on Computer Vision Workshops, ICCVW 2017</conf-name>, <conf-date>2018-January</conf-date>. <fpage>2020</fpage>&#x2013;<lpage>2021</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ICCVW.2017.236</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Singh</surname> <given-names>A. K.</given-names>
</name>
<name>
<surname>Ganapathysubramanian</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Sarkar</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Singh</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Deep learning for plant stress phenotyping: trends and future perspectives</article-title>. <source>Trends Plant Sci.</source> <volume>23</volume> (<issue>10</issue>), <fpage>883</fpage>&#x2013;<lpage>898</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/J.TPLANTS.2018.07.004</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tian</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zeng</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Evans</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Segmentation of tomato leaf images based on adaptive clustering number of K-means algorithm</article-title>. <source>Comput. Electron. Agric.</source> <volume>165</volume>, <elocation-id>104962</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/J.COMPAG.2019.104962</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Walter</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Liebisch</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Hund</surname> <given-names>A.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Plant phenotyping: From bean weighing to image analysis</article-title>. <source>Plant Methods</source> <volume>11</volume> (<issue>1</issue>), <fpage>1</fpage>&#x2013;<lpage>11</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/S13007-015-0056-8/FIGURES/3</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Xu</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Loy</surname> <given-names>C. C.</given-names>
</name>
<name>
<surname>Lin</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>CARAFE: content-aware ReAssembly of FEatures</article-title>&#x2019;. In <conf-name>Proceedings of the IEEE/CVF international conference on computer vision</conf-name> (pp. <fpage>3007</fpage>&#x2013;<lpage>3016</lpage>).</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xiong</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Shu</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>Z.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>A review of plant phenotypic image recognition technology based on deep learning</article-title>. <source>Electronics</source> <volume>10</volume> (<issue>1</issue>), <elocation-id>81</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/ELECTRONICS10010081</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Yoon</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Fuentes</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Park</surname> <given-names>D. S.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Style-consistent image translation: a novel data augmentation paradigm to improve plant disease recognition</article-title>. <source>Front. Plant Sci.</source> <volume>12</volume>, <elocation-id>773142</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/FPLS.2021.773142/BIBTEX</pub-id>
</citation>
</ref>
<ref id="B43">
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>Xu</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Kim</surname> <given-names>H.</given-names>
</name>
<name>
<surname>Yang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Fuentes</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Meng</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Yoon</surname> <given-names>S.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>) <source>Embrace Limited and Imperfect Training Datasets: Opportunities and Challenges in Plant Disease Recognition Using Deep Learning</source>. Available at: <uri xlink:href="https://arxiv.org/abs/2305.11533v2">https://arxiv.org/abs/2305.11533v2</uri> (Accessed <access-date>31 July 2023</access-date>).</citation>
</ref>
<ref id="B44">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Zhong</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>F.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Leaf segmentation and classification with a complicated background using deep learning</article-title>. <source>Agronomy</source> <volume>10</volume> (<issue>11</issue>), <elocation-id>1721</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/AGRONOMY10111721</pub-id>
</citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Kong</surname> <given-names>F</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Han</surname> <given-names>S.</given-names>
</name>
<name>
<surname>and Zhai</surname> <given-names>Z</given-names>
</name>
</person-group>. (<year>2018</year>). <article-title>Automatic image segmentation method for cotton leaves with disease under natural environment</article-title>. <source>J. Integr. Agric.</source> <volume>17</volume> (<issue>8</issue>), <fpage>1800</fpage>&#x2013;<lpage>1814</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/S2095-3119(18)61915-X</pub-id>
</citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>C.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Modified U-Net for plant diseased leaf image segmentation</article-title>. <source>Comput. Electron. Agric.</source> <volume>204</volume>, <elocation-id>107511</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/J.COMPAG.2022.107511</pub-id>
</citation>
</ref>
</ref-list>
<app-group>
<app>
<title>Appendix</title>
<fig id="f10" position="float">
<label>Figure&#xa0;A1</label>
<caption>
<p>Local refinement mechanism (GLPF) applied to the augmented training dataset. <bold>(A)</bold> Applying the mechanism to the original images. <bold>(B)</bold> Combining the blurred dataset with the original images. The number inside the parenthesis shows the number of images.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1211075-g010.tif"/>
</fig>
<fig id="f11" position="float">
<label>Figure&#xa0;A2</label>
<caption>
<p>Local refinement mechanism applied to the test dataset. <bold>(A)</bold> HBF. <bold>(B)</bold> GLPF followed by HBF. The number inside the parenthesis shows the number of images.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1211075-g011.tif"/>
</fig>
<fig id="f12" position="float">
<label>Figure&#xa0;A3</label>
<caption>
<p>Training curves of the model.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-14-1211075-g012.tif"/>
</fig>
</app>
</app-group>
</back>
</article>