<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Plant Sci.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Plant Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Plant Sci.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">1664-462X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpls.2025.1598414</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Cluster segmentation and stereo vision-based apple localization algorithm for robotic harvesting</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Wang</surname><given-names>Jianxia</given-names></name>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project-administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Sun</surname><given-names>Wenbing</given-names></name>
<xref ref-type="corresp" rid="c001"><sup>*</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3007792/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
</contrib>
</contrib-group>
<aff id="aff1"><institution>College of Cyber Security, Tarim University</institution>, <city>Alar</city>,&#xa0;<country country="cn">China</country></aff>
<author-notes>
<corresp id="c001"><label>*</label>Correspondence: Wenbing Sun, <email xlink:href="mailto:qoug265@163.com">qoug265@163.com</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2025-11-27">
<day>27</day>
<month>11</month>
<year>2025</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2025</year>
</pub-date>
<volume>16</volume>
<elocation-id>1598414</elocation-id>
<history>
<date date-type="received">
<day>24</day>
<month>03</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>04</day>
<month>11</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>20</day>
<month>10</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2025 Wang and Sun.</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Wang and Sun</copyright-holder>
<license>
<ali:license_ref start_date="2025-11-27">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>Automated apple harvesting is hindered by clustered fruits, varying illumination, and inconsistent depth perception in complex orchard environments. While deep learning models such as Faster R-CNN and YOLO provide accurate 2D detection, they require large annotated datasets and high computational resources, and often lack the precise 3D localisation required for robotic picking.</p>
</sec>
<sec>
<title>Methods</title>
<p>This study proposes an enhanced K-Means clustering segmentation algorithm integrated with a stereo-vision system for accurate 3D apple localisation. Multi-feature fusion combining colour, morphology, and texture descriptors was applied to improve segmentation robustness. A block-matching stereo model was used to compute disparity and derive 3D coordinates. The method was evaluated against Faster R-CNN, YOLOv7, Mask R-CNN, SSD, DBSCAN, MISA, and HCA using metrics including Recognition Accuracy (RA), mean Average Precision (mAP), Mean Coordinate Deviation (MCD), Correct Recognition Rate (CRR), Frames Per Second (FPS), and depth-localisation error.</p>
</sec>
<sec>
<title>Results</title>
<p>The proposed method achieved &gt;91% detection accuracy and &lt;1% localisation error across challenging orchard conditions. Compared with Faster R-CNN, it maintained higher RA and lower MCD under high fruit overlap and variable lighting. Depth estimation achieved errors between 0.4%&#x2013;0.97% at 800&#x2013;1100 mm distances, confirming high spatial accuracy. The proposed model exceeded YOLOv7, SSD, FCN, and Mask R-CNN in F1-score, mAP, and FPS during complex lighting, occlusion, wind disturbance, and dense fruit distributions.</p>
</sec>
<sec>
<title>Discussion and Conclusion</title>
<p>The clustering-based stereo-vision framework provides stable 3D localisation and robust segmentation without large training datasets or high-performance hardware. Its low computational demand and strong performance under diverse orchard conditions make it suitable for real-time robotic harvesting. Future work will focus on large-scale orchard deployment, parallel optimisation, and adaptation to additional fruit types.</p>
</sec>
</abstract>
<kwd-group>
<kwd>apple detection</kwd>
<kwd>stereo vision system</kwd>
<kwd>orchard robotics/robotic harvesting</kwd>
<kwd>clustering-based segmentation</kwd>
<kwd>3D localization</kwd>
<kwd>precision agriculture</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declare financial support was received for the research and/or publication of this article. This research was supported by the projects &#x201c;Machine Learning-Based Vision System for Automatic Apple Harvesting&#x201d; (No. TDZKSS202137) and &#x201c;Medical Fabric Intelligent Management System Based on the Internet&#x201d; (No. TDZKSS202135).</funding-statement>
</funding-group>
<counts>
<fig-count count="11"/>
<table-count count="1"/>
<equation-count count="5"/>
<ref-count count="56"/>
<page-count count="13"/>
<word-count count="4840"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Sustainable and Intelligent Phytoprotection</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>The apple is one of the most popular fruit crops, ranking second in global fruit production. Harvesting apples remains a crucial yet demanding operation because it requires substantial labor and time (<xref ref-type="bibr" rid="B34">Qu et&#xa0;al., 2015</xref>; <xref ref-type="bibr" rid="B19">Jia et&#xa0;al., 2020</xref>). Traditional harvesting methods rely primarily on manual workforces, resulting in increased expenses, workforce shortages, and inconsistent quality and efficiency. Researchers have extensively investigated automated fruit detection and harvesting technologies that utilize machine vision and clustering-based segmentation to boost efficiency and precision (<xref ref-type="bibr" rid="B44">Tu et&#xa0;al., 2010</xref>; <xref ref-type="bibr" rid="B19">Jia et&#xa0;al., 2020</xref>).</p>
<p>In recent years, deep learning techniques such as YOLO, SSD, Faster R-CNN, and Mask R-CNN have been widely applied in fruit detection and recognition (<xref ref-type="bibr" rid="B32">Onishi et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B3">Biffi et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B19">Jia et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B55">Zhang et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B50">Xiao et&#xa0;al., 2023</xref>, <xref ref-type="bibr" rid="B50">2023</xref>). These systems fall into two categories: single-stage models (e.g., YOLO, SSD), which directly predict object locations and classes for faster processing, and two-stage models (e.g., Faster R-CNN, Mask R-CNN), which first propose candidate regions to improve classification and bounding accuracy (<xref ref-type="bibr" rid="B43">Tianjing and Mhamed, 2024</xref>; <xref ref-type="bibr" rid="B39">Shi et&#xa0;al., 2025</xref>) (<xref ref-type="bibr" rid="B26">Likas et&#xa0;al., 2003</xref>; <xref ref-type="bibr" rid="B47">Wang et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B29">Mhamed et&#xa0;al., 2024</xref>; <xref ref-type="bibr" rid="B43">Tianjing and Mhamed, 2024</xref>). Recent studies have demonstrated the potential of UAV-based phenotyping and machine learning approaches for monitoring crop traits and yield in tomato and quinoa, highlighting the growing role of computer vision in precision agriculture (<xref ref-type="bibr" rid="B23">Johansen et&#xa0;al., 2019</xref>, <xref ref-type="bibr" rid="B22">2020</xref>; <xref ref-type="bibr" rid="B21">Jiang et&#xa0;al., 2022a</xref>). Deep learning enhances fruit detection by extracting key colour, shape, and texture features for segmentation and recognition. However, accuracy in orchards is hindered by variable lighting, foliage cover, and clustered fruit. Moreover, reliance on large datasets, high computational demands, and long training times limits their practical use in apple harvesting (<xref ref-type="bibr" rid="B47">Wang et&#xa0;al., 2022</xref>). Moreover, they often produce only 2D bounding boxes, lacking the precise in-depth information needed for robotic harvesting. These constraints limit their suitability for real-time field deployment.</p>
<p>Beyond fruit detection, deep learning has advanced applications in remote sensing, radar imaging, and ecological monitoring (<xref ref-type="bibr" rid="B11">Guan et&#xa0;al., 2025</xref>). Recent studies on PolSAR ship detection (<xref ref-type="bibr" rid="B7">Gao et&#xa0;al., 2023a</xref>), scattering-aware networks, few-shot SAR classification (<xref ref-type="bibr" rid="B9">Gao et&#xa0;al., 2023b</xref>, <xref ref-type="bibr" rid="B8">2024</xref>), and multi-source data fusion highlights its versatility in complex detection tasks (<xref ref-type="bibr" rid="B38">Shen et&#xa0;al., 2024</xref>; <xref ref-type="bibr" rid="B54">Zhang et&#xa0;al., 2024</xref>). These cross-domain advances reinforce the relevance of developing efficient and adaptable methods for automated fruit detection and localization.</p>
<p>An alternative to deep learning is clustering-based segmentation. K-Means clustering is an unsupervised learning method that groups pixels by feature similarity, enabling effective fruit segmentation under complex orchard conditions ( (<xref ref-type="bibr" rid="B26">Likas et&#xa0;al., 2003</xref>; <xref ref-type="bibr" rid="B30">Na et&#xa0;al., 2010</xref>). K-Means delivers rapid and sturdy segmentation, which stands out from other methods like Fuzzy C-Means and DBSCAN, which require more computation and struggle with noise (<xref ref-type="bibr" rid="B40">Song et&#xa0;al., 2013</xref>; <xref ref-type="bibr" rid="B17">Jamel and Akay, 2019</xref>; <xref ref-type="bibr" rid="B16">Ikotun et&#xa0;al., 2023</xref>). Previous studies have applied K-Means for apple recognition (<xref ref-type="bibr" rid="B48">Wang Dandan et&#xa0;al., 2015</xref>). While some researchers utilized integrated extremum methods for fruit positioning (<xref ref-type="bibr" rid="B19">Jia et&#xa0;al., 2020</xref>). Recent studies further refined segmentation with fuzzy C-means (<xref ref-type="bibr" rid="B37">Sarbaini et&#xa0;al., 2022</xref>) CNN-based semantic segmentation (<xref ref-type="bibr" rid="B35">Ramadhani et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B47">Wang et&#xa0;al., 2022</xref>), and monocular vision approaches (<xref ref-type="bibr" rid="B56">Zubair et&#xa0;al., 2024</xref>). However, the challenge of achieving robust performance in real orchard conditions with limited data remains (<xref ref-type="bibr" rid="B53">Yang et&#xa0;al., 2012</xref>).</p>
<p>This study presents an enhanced K-Means clustering segmentation algorithm combined with multi-feature fusion (colour, morphology, and texture) and stereo vision for accurate 3D localization. The approach is designed to reduce misclassification and provide depth information critical for robotic harvesting. Unlike deep learning methods, the proposed system emphasizes computational efficiency, real-time applicability, and reduced training data requirements, making it well suited to practical orchard deployment. The method is comprehensively evaluated against state-of-the-art models, including Faster R-CNN, YOLOv7, and Mask R-CNN, and demonstrates superior accuracy, reduced coordinate deviation, and stable performance across different camera angles.</p>
</sec>
<sec id="s2" sec-type="materials|methods">
<label>2</label>
<title>Materials and methods</title>
<p>The experimental setup consists of a four-arm parallel picking robot equipped with a high-precision vision system and a 3D stereo camera (1920 &#xd7; 1080 pixels; Model: Hikvision MV-DL2125-04H-R) for apple detection and localization. The 3D camera was mounted at the front end of the robotic arm. Computational processing was performed on a high-performance computer running an Intel i7&#x2013;12700 processor, ensuring efficient execution of clustering, segmentation, and localization tasks. Apple images were collected from a commercial orchard with diverse lighting conditions (morning, noon, evening), varying shading levels, and different apple clustering patterns to ensure a representative dataset. A dataset comprising 4,200 sample images of Aksu apples, a variety cultivated in Aksu Prefecture, Xinjiang, China, was collected. The dataset includes 2,200 images of red apples against green foliage and 2,000 images of green apples against green foliage. Each apple within the images was manually annotated using a circle-fitting method to ensure precise localization and segmentation. The dataset was split into an 8:2 ratio, with 80% used for training and 20% for testing. This choice ensured sufficient data for training while maintaining an independent set for performance evaluation. As the proposed method is based on clustering and does not require iterative hyperparameter optimization, no separate validation set was used. A similar adjustment of dataset splitting has been discussed in previous studies with small datasets (<xref ref-type="bibr" rid="B1">Ashtiani et&#xa0;al., 2021</xref>). Each image was manually annotated using LabelImg software, and apples were labelled based on their position, size, and occlusion level. To improve the model&#x2019;s robustness, data augmentation was applied. Random rotation (&#xb1; 15&#xb0;), brightness variation (&#xb1; 20%), and Gaussian noise were introduced to simulate real-world orchard variability caused by lighting changes, fruit occlusion, and viewing angle differences. This process reduced the risk of overfitting and enabled better generalization to unseen samples. Similar to findings in postharvest imaging studies (<xref ref-type="bibr" rid="B18">Javanmardi and Ashtiani, 2025</xref>), such augmentation strategies enhance dataset diversity and improve the reliability of classification models.</p>
<p>In the next section, Equations describe standard image preprocessing operations, clustering formulations, stereo vision disparity and depth estimation, and evaluation metrics are based on established methods documented in (<xref ref-type="bibr" rid="B13">Hartigan and Wong, 1979</xref>; <xref ref-type="bibr" rid="B14">Hartley and Zisserman, 2003</xref>; <xref ref-type="bibr" rid="B10">Gonzales and Woods, 2018</xref>). The enhanced K-means clustering and stereo vision localization method was implemented using standard Python and OpenCV libraries, with all parameters reported in this study. The dataset cannot be made publicly available due to restrictions, but a representative subset or implementation details are available from the corresponding author upon reasonable request.</p>
<sec id="s2_1">
<label>2.1</label>
<title>Optimization of apple image segmentation using enhanced K-Means</title>
<p>Combining morphological processing, feature optimization, and colour space analysis, a modified K-Means clustering method was constructed. Enhanced colour sensitivity was achieved by converting RGB to HSI, using the H component for exceptional target-background difference. Images were filtered using Gaussian and median filtering techniques to reduce noise (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Equation 2</bold></xref>) and then transformed to greyscale to ensure consistency under varying illumination conditions (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Equation 1</bold></xref>).</p>
<p>Then, we extracted the HSI colour space that is highly sensitive to apple colour for segmentation purposes using <xref ref-type="disp-formula" rid="eq1">Equation 1</xref>. The RGB colour space illustrated variations in colour intensity and brightness, whereas the HSI space replicated human visual perception abilities. As <xref ref-type="fig" rid="f1"><bold>Figure&#xa0;1</bold></xref> shows, the RGB to HSI conversion turned unit square data into a bicone. A 3D camera captured apple image features and stored them as RGB grayscale values, ensuring enhanced consistency for segmentation under variable lighting conditions.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Conversion method from RGB to HSI color space.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1598414-g001.tif">
<alt-text content-type="machine-generated">Diagram depicting a system for apple detection. It involves an apple detection target linked to &#x201c;Color and depth characteristics,&#x201d; which connects to a 3D camera. The 3D camera feeds into a &#x201c;Visual identity system,&#x201d; connected to &#x201c;Graph neural network model,&#x201d; which ultimately operates a robotic arm.</alt-text>
</graphic></fig>
<disp-formula id="eq1"><label>(1)</label>
<mml:math display="block" id="M1"><mml:mrow><mml:mi>H</mml:mi><mml:mo>=</mml:mo><mml:mi>arctan</mml:mi><mml:mfenced><mml:mrow><mml:mfrac><mml:mrow><mml:msqrt><mml:mn>3</mml:mn></mml:msqrt><mml:mo stretchy="false">(</mml:mo><mml:mi>G</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mi>B</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn><mml:mi>R</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mi>G</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mi>B</mml:mi></mml:mrow></mml:mfrac></mml:mrow></mml:mfenced></mml:mrow></mml:math>
</disp-formula>
<p>Where <italic>H</italic> indicates component values.</p>
<p>The H component proved useful for separating apples from the background. The conventional K-Means method did, however, show errors, including mis-segmentation in challenging environments. To improve accuracy and robustness, the algorithm was enhanced through an adaptive selection of the initial clustering centers (<xref ref-type="disp-formula" rid="eq2">Equations 2</xref>, <xref ref-type="disp-formula" rid="eq3">3</xref>). The updated clustering method minimized intra-cluster variance (<xref ref-type="disp-formula" rid="eq5">Equation 5</xref>).</p>
<disp-formula id="eq2"><label>(2)</label>
<mml:math display="block" id="M2"><mml:mrow><mml:msub><mml:mi>C</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mi>a</mml:mi><mml:mi>r</mml:mi><mml:mi>g</mml:mi><mml:mi>m</mml:mi><mml:mi>a</mml:mi><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>P</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msub><mml:mstyle displaystyle="true"><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mi>N</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:munder></mml:mstyle><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:mo>&#x2225;</mml:mo><mml:mi>H</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2212;</mml:mo><mml:mi>H</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>j</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2225;</mml:mo></mml:mrow></mml:mfrac></mml:mrow></mml:math>
</disp-formula>
<p>Where <italic>C<sub>k</sub></italic> denotes the initial center of the k class; <italic>P<sub>(i)</sub></italic>denotes the set of points; <italic>N <sub>(i)</sub></italic> denotes the set of domain points; <italic>H</italic> (<italic>i</italic>) and <italic>H</italic> (<italic>j</italic>) represent the feature vectors or attribute values of pixels <italic>i</italic> and <italic>j</italic>.</p>
<disp-formula id="eq3"><label>(3)</label>
<mml:math display="block" id="M3"><mml:mrow><mml:mi>D</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msup><mml:mi>x</mml:mi><mml:mn>0</mml:mn></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mi>y</mml:mi><mml:mn>0</mml:mn></mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:msqrt><mml:mrow><mml:mstyle displaystyle="true"><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>m</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>n</mml:mi></mml:munderover></mml:mstyle><mml:msub><mml:mi>w</mml:mi><mml:mi>m</mml:mi></mml:msub><mml:mo>&#xb7;</mml:mo><mml:msup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>F</mml:mi><mml:mi>m</mml:mi></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:msup><mml:mi>x</mml:mi><mml:mn>0</mml:mn></mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>F</mml:mi><mml:mi>m</mml:mi></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:msup><mml:mi>y</mml:mi><mml:mn>0</mml:mn></mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mn>2</mml:mn></mml:msup></mml:mrow></mml:msqrt></mml:mrow></mml:math>
</disp-formula>
<p>Where <italic>D</italic> (<italic>x</italic><sup>0</sup>, <italic>y</italic><sup>0</sup>) is the Euclidean distance between the pixel point <italic>x</italic><sup>0</sup> and <italic>y</italic><sup>0</sup> and wm for the feature weights; <italic>n</italic> denotes the total dimension of the feature space; <italic>F<sub>m</sub></italic> (<italic>x</italic><sup>0</sup>) and <italic>F<sub>m</sub></italic> (<italic>y</italic><sup>0</sup>) represent the pixel intensities in pixels <italic>x</italic><sup>0</sup> and <italic>y</italic><sup>0</sup> in the m<sup>th</sup> dimension, respectively.</p>
<p>The segmentation results underwent morphological processing, eliminating small noise elements and restoring target edges (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Equation 3</bold></xref>). Boundary extraction utilized erosion to isolate object edges, as shown in <xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2</bold></xref>. Connected region calculation was performed using <xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Equation 4</bold></xref> to obtain complete target information.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Morphological boundary extraction through erosion and subtraction. Small artifacts are removed, and clean object edges are restored for clustering.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1598414-g002.tif">
<alt-text content-type="machine-generated">Diagram comparing RGB and Hexacone color models. The left shows an RGB cube with axes labeled red, green, and blue, depicting grayscale and colors like cyan and magenta. The right shows a hexacone with primary and secondary colors arranged in ovals, indicating hue with an arrow.</alt-text>
</graphic></fig>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Multi-feature model for apple recognition and 3D positioning</title>
<p>Following segmentation and clustering, apple centroids were precisely recognized by integrating colour, morphology, and texture features. Stereo vision technology and 3D camera calibration principles were used to map apples from 2D image coordinates to 3D spatial coordinates, providing accurate positional data for the harvesting robot. <xref ref-type="fig" rid="f3"><bold>Figure&#xa0;3</bold></xref> displays the calibration principle for the stereo vision system and the 3D camera. The stereo vision system and 3D camera underwent calibration to synchronize the vision coordinate system with the robot coordinate system, which enabled precise target recognition and localization.</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Schematic of the robotic apple detection system integrating a 3D camera, a visual identity module, and a graph neural network for precise recognition.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1598414-g003.tif">
<alt-text content-type="machine-generated">Flowchart depicting a process for optimizing features from apple images and depth information. It starts with input and preprocessing, followed by multi-feature extraction and fusion. Subsequent stages include feature weight assignment, multi-feature fusion strategy, and feature standardization. The process concludes with outputting optimized fused features for localization and spatial representation.</alt-text>
</graphic></fig>
<p>Single-feature detection showed high vulnerability to environmental conditions, including lighting and noise levels. Therefore, a multi-feature fusion approach was employed to enhance detection robustness and accuracy. Composite feature values determined target areas based on colour, texture, and morphology weights (<xref ref-type="disp-formula" rid="eq4">Equation 4</xref>).</p>
<disp-formula id="eq4"><label>(4)</label>
<mml:math display="block" id="M4"><mml:mrow><mml:mi>T</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>x</mml:mi><mml:mo>,</mml:mo><mml:mi>y</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:msub><mml:mi>&#x3b1;</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mi>H</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>x</mml:mi><mml:mo>,</mml:mo><mml:mi>y</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x3b1;</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mtext>GLCM</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:mi>x</mml:mi><mml:mo>,</mml:mo><mml:mi>y</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x3b1;</mml:mi><mml:mn>3</mml:mn></mml:msub><mml:mtext>Shape</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:mi>x</mml:mi><mml:mo>,</mml:mo><mml:mi>y</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>Where <italic>T</italic> (<italic>x</italic>, <italic>y</italic>) is the composite feature value, which is used to determine whether the pixel point belongs to the target area or not; <italic>&#x3b1;</italic><sub>1</sub>, <italic>&#x3b1;</italic><sub>2</sub> and <italic>&#x3b1;</italic><sub>3</sub> are the weight coefficients, corresponding to the weights of colour, texture and morphological features, respectively. The values of &#x3b1;&amp;#x2081;, &#x3b1;&amp;#x2082;, and &#x3b1;&amp;#x2083; were empirically tuned using the training dataset, selecting the combination that achieved the best segmentation and detection performance under varying orchard conditions. <italic>H</italic> (<italic>x</italic>, <italic>y</italic>) indicates a colour feature; GLCM (<italic>x</italic>, <italic>y</italic>) denotes the grayscale covariance matrix, which is used to extract texture features; Shape (<italic>x</italic>, <italic>y</italic>) represents morphological features.</p>
<p><xref ref-type="fig" rid="f4"><bold>Figure&#xa0;4</bold></xref> illustrates the multi-feature fusion approach for apple image analysis, which involves analyzing multiple pose features from apples and extracting essential features after bias removal to enhance centroid recognition and localization. We calculated the center of mass using the weighted average of pixel coordinates within the region, as described in <xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Equation 5</bold></xref>. Internal and external camera parameters were calibrated using <xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Equation 6</bold></xref>.</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Algorithm pipeline showing preprocessing, multi-feature extraction, feature weighting, fusion, and 3D localization outputs, with results illustrated in <bold>(a)</bold> the proposed algorithm and <bold>(b)</bold> the MISA method.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1598414-g004.tif">
<alt-text content-type="machine-generated">Two line graphs depict the proportion in percentage versus angle degree with vertical form for four apple trees, labeled one through four. Graph (a) illustrates the proposed algorithm, while graph (b) shows the MISA algorithm. Both graphs display a peak at forty-five degrees followed by a decline, leveling out after ninety degrees, with similar patterns for each tree. Different line styles represent each tree for comparison.</alt-text>
</graphic></fig>
<p>The block-matching algorithm extracted parallax values to solve positional discrepancies between left and right camera images (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Equation 7</bold></xref>). Depth information was then calculated using parallax values and triangulation principles (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Equation 8</bold></xref>). Real-world coordinates were derived by mapping the center of mass and depth information to the camera&#x2019;s coordinate system (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Equation 9</bold></xref>).</p>
<p>The problem of environmental occlusion was solved by applying morphological techniques combined with depth interpolation methods (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Equation 10</bold></xref>). Localization accuracy was further enhanced by adjusting camera parameters and refining feature fusion weights based on localization error (<xref ref-type="disp-formula" rid="eq5">Equation 5</xref>).</p>
<p>Three-dimensional localization accuracy was tested by taking depth measurements at six points on apple corners at distances ranging from 800 mm to 1100 mm. The difference between real and calculated depth values was assessed, while morphological and depth interpolation techniques minimized errors (<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Equation 10</bold></xref>).</p>
<disp-formula id="eq5"><label>(5)</label>
<mml:math display="block" id="M5"><mml:mrow><mml:mi>E</mml:mi><mml:mo>=</mml:mo><mml:msqrt><mml:mrow><mml:msup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mtext>real</mml:mtext></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mtext>calc</mml:mtext></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mn>2</mml:mn></mml:msup><mml:mo>+</mml:mo><mml:msup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>Y</mml:mi><mml:mrow><mml:mtext>real</mml:mtext></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>Y</mml:mi><mml:mrow><mml:mtext>calc</mml:mtext></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mn>2</mml:mn></mml:msup><mml:mo>+</mml:mo><mml:msup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>Z</mml:mi><mml:mrow><mml:mtext>real</mml:mtext></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>Z</mml:mi><mml:mrow><mml:mtext>calc</mml:mtext></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mn>2</mml:mn></mml:msup></mml:mrow></mml:msqrt></mml:mrow></mml:math>
</disp-formula>
<p>Where <italic>E</italic> represents positioning error and (<italic>X</italic><sub>real</sub>, <italic>Y</italic><sub>real</sub>, <italic>Z</italic><sub>real</sub>) are the actual coordinates and (<italic>X</italic><sub>calc</sub>, <italic>Y</italic><sub>calc</sub>, <italic>Z</italic><sub>calc</sub>) are the calculated coordinates.</p>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>Benchmark comparisons and performance evaluation</title>
<p>Benchmarking the proposed model against several state-of-the-art methods allowed for a comprehensive performance evaluation. The selected benchmarks include widely recognized and validated techniques in fruit detection and segmentation research. Faster Region-Based Convolutional Neural Network (Faster R-CNN), You Only Look Once version 7 (YOLOv7), and Masked Region-Based Convolutional Neural Network (Mask R-CNN) are leading deep learning models known for their high detection accuracy. Density-Based Spatial Clustering of Applications with Noise (DBSCAN), Mean-Shift Image Segmentation Algorithm (MISA), and Superpixel Segmentation Algorithm (SSA) are commonly used clustering and segmentation methods designed to handle spatial variation and noise. These methods were chosen to ensure a balanced comparison between deep learning and clustering-based approaches.</p>
<p>The segmentation performance was compared using Mean Coordinate Deviation (MCD) and Correct Recognition Rate (CRR) as evaluation metrics. For object detection and spatial localization, the proposed model was evaluated against YOLOv7, Single Shot MultiBox Detector (SSD), Fully Convolutional Networks (FCN), and Mask R-CNN under four real-world conditions: complex illumination, fruit occlusion, dynamic oscillation, and dense target distribution. Performance was measured using Recognition Accuracy (RA), mean Average Precision (mAP), and Frames Per Second (FPS). Additionally, the model&#x2019;s stability was assessed across different camera angles (0&#xb0;, 15&#xb0;, 30&#xb0;, and 45&#xb0;) by comparing it with the Hierarchical Clustering Algorithm (HCA) and Region Growing Segmentation Algorithm (RGSA) using the standard deviation of recognition accuracy.</p>
<p>The proposed model was comprehensively evaluated using RA for detection accuracy, MCD for spatial precision, CRR for segmentation accuracy, F1-score for detection reliability, mAP for overall detection performance, FPS for real-time efficiency, and standard deviation for stability under varying conditions. These metrics collectively demonstrate the model&#x2019;s accuracy, robustness, and practical efficiency for automated apple detection.</p>
</sec>
</sec>
<sec id="s3" sec-type="results">
<label>3</label>
<title>Results</title>
<p>The proposed clustering-based segmentation and 3D localization algorithm demonstrated consistent superiority in detection precision and spatial localization under diverse orchard conditions. <xref ref-type="fig" rid="f5"><bold>Figure&#xa0;5</bold></xref> illustrates the variation in RA and MCD under different lighting and occlusion levels. The proposed method maintained an average accuracy above 91%, while Faster R-CNN exhibited a pronounced decline when fruit overlaps exceeded 40%. In contrast, our algorithm achieved lower MCD values (&#x2264; 0.3%), indicating more stable spatial localization across both daytime and nighttime datasets. (<xref ref-type="fig" rid="f5"><bold>Figure&#xa0;5</bold></xref>). Moreover, the consistently reduced MCD values throughout all collecting distances indicate better localization accuracy of the proposed algorithm (<xref ref-type="fig" rid="f6"><bold>Figures&#xa0;6A, B</bold></xref>). <xref ref-type="fig" rid="f6"><bold>Figures&#xa0;6C and D</bold></xref> demonstrate that the proposed method consistently maintains a CRR above 90%, outperforming DBSCAN across varying overlap rates. The depth estimation accuracy of the stereo vision system was evaluated by comparing it with YOLOv7 and SSD across four different scenarios: complex lighting conditions, fruit occlusion, dynamic oscillation conditions, and dense target distributions. Across all four tested scenarios, the suggested model showed better recall and precision than YOLOv7 and SSD (<xref ref-type="fig" rid="f7"><bold>Figure&#xa0;7</bold></xref>).</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>Detection accuracy (RA) and mean coordinate deviation (MCD) of the proposed clustering algorithm and Faster R-CNN under different overlap rates, illustrated for <bold>(a)</bold> MCD during the day, <bold>(b)</bold> MCD during the night, <bold>(c)</bold> RA during the day, and <bold>(d)</bold> RA during the night.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1598414-g005.tif">
<alt-text content-type="machine-generated">Four line graphs comparing two algorithms, Faster R-CNN and a proposed algorithm, based on MCD and RA metrics. Graph (a) shows MCD increases with overlap rate during the day. Graph (b) shows a similar trend at night. Graph (c) displays stable RA during the day, while graph (d) shows RA decreases slightly at night.</alt-text>
</graphic></fig>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>Comparison between the proposed algorithm and DBSCAN across different collection distances (900&#x2013;1700 mm), shown for <bold>(a)</bold> MCD under 40 images, <bold>(b)</bold> MCD under 45 images, <bold>(c)</bold> CRR under 40 images, and <bold>(d)</bold> CRR under 45 images.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1598414-g006.tif">
<alt-text content-type="machine-generated">Four charts compare DBSCAN and a proposed algorithm. Top left: Bar chart of MCD% under 40 images, showing MCD decreasing as collection distance increases. Top right: Similar bar chart under 45 images. Bottom left: Line chart of CRR% under 40 images, showing higher performance for the proposed algorithm across overlap rates. Bottom right: Similar CRR comparison under 45 images. Each chart highlights consistent performance trends for both algorithms.</alt-text>
</graphic></fig>
<fig id="f7" position="float">
<label>Figure&#xa0;7</label>
<caption>
<p>Precision&#x2013;Recall comparison of YOLOv7, SSD, and the proposed model under different field conditions, including <bold>(a)</bold> complex lighting, <bold>(b)</bold> apple occlusion, <bold>(c)</bold> dynamic oscillation, and <bold>(d)</bold> multi-target dense distribution environments.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1598414-g007.tif">
<alt-text content-type="machine-generated">Four precision-recall curves comparing YOLOv7, SSD, and a proposed model. Panel (a) displays a complex lighting environment, panel (b) an apple occlusion environment, panel (c) a dynamic oscillation environment, and panel (d) a multi-target dense distribution environment. The proposed model consistently shows superior performance, maintaining high precision across varying recall levels.</alt-text>
</graphic></fig>
<p>Depth estimation accuracy was further validated, achieving a maximum localization error of 0.97% across 800&#x2013;1100 mm collection distances (<xref ref-type="fig" rid="f8"><bold>Figure&#xa0;8</bold></xref>). Errors ranged from 0.4&#x2013;0.65% at 800 mm and 0.4&#x2013;0.5% at 1000 mm, with only slight increases to 0.73&#x2013;0.79% at 1100 mm. All deviations remained below 1%, confirming high-precision depth estimation suitable for robotic harvesting applications. As shown in <xref ref-type="fig" rid="f9"><bold>Figures&#xa0;9A, B</bold></xref>, the proposed algorithm outperformed MISA in detecting apple orientations on four trees at 0&#xb0;, 45&#xb0;, 90&#xb0;, and 180&#xb0;. It achieved the highest detection rate (&gt; 40%) at 45&#xb0;, while no apples were detected at 180&#xb0;, where MISA showed greater variation and overlap, indicating reduced stability. Results for multiple algorithms at the 45&#xb0; orientation are summarized in <xref ref-type="table" rid="T1"><bold>Table&#xa0;1</bold></xref>. The proposed method achieved the highest recognition accuracy (93%), correctly identifying 39 apples, followed by the CNN model (88%). The template-matching (TM) approach had the lowest accuracy (70%, 28 apples correctly identified).</p>
<fig id="f8" position="float">
<label>Figure&#xa0;8</label>
<caption>
<p>Effective focal-length standard deviation of the stereo vision system under different numbers of images per sheet, evaluated for <bold>(a)</bold> camera angle 0&#xb0;, <bold>(b)</bold> camera angle 15&#xb0;, <bold>(c)</bold> camera angle 30&#xb0;, and <bold>(d)</bold> camera angle 45&#xb0;, comparing the proposed model with HCA and RGSA.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1598414-g008.tif">
<alt-text content-type="machine-generated">Four line graphs compare the effective focal length standard deviation per pixel against the number of images per sheet, at different camera angles. Each graph includes plots for HCA, RGSA, and a proposed model. The y-axis represents the standard deviation, and the x-axis represents the number of images. (a) shows a camera angle of 0 degrees, (b) 15 degrees, (c) 30 degrees, and (d) 45 degrees. Across all angles, the proposed model consistently shows lower deviations compared to HCA and RGSA as the number of images increases.</alt-text>
</graphic></fig>
<fig id="f9" position="float">
<label>Figure&#xa0;9</label>
<caption>
<p>Comparison of apple detection performance among FCN, Mask R-CNN, and the proposed model under different field conditions, including <bold>(a)</bold> changes in lighting, <bold>(b)</bold> dense fruit distribution, <bold>(c)</bold> wind disturbance, and <bold>(d)</bold> mixed fruit types, evaluated using F1-score, mAP, and FPS.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1598414-g009.tif">
<alt-text content-type="machine-generated">Bar charts comparing the performance of three models: FCN, Mask R-CNN, and a proposed model across four scenarios: changes in lighting, dense distribution of fruits, wind disturbance, and mixed fruit types. Each chart displays F1-score, mAP, and FPS. The proposed model generally shows higher performance in F1-score and mAP across all scenarios.</alt-text>
</graphic></fig>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Comparative performance of various algorithms in apple posture recognition.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Identification methods</th>
<th valign="middle" align="center">Recognition accuracy (%)</th>
<th valign="middle" align="center">Apples correctly identified (count)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">Template Matching (TM)</td>
<td valign="middle" align="center">70</td>
<td valign="middle" align="center">28</td>
</tr>
<tr>
<td valign="middle" align="center">Support Vector Machine</td>
<td valign="middle" align="center">75</td>
<td valign="middle" align="center">30</td>
</tr>
<tr>
<td valign="middle" align="center">Bayesian Classification</td>
<td valign="middle" align="center">78</td>
<td valign="middle" align="center">31</td>
</tr>
<tr>
<td valign="middle" align="center">Convolutional Neural Network (CNN)</td>
<td valign="middle" align="center">88</td>
<td valign="middle" align="center">36</td>
</tr>
<tr>
<td valign="middle" align="center">Decision Tree</td>
<td valign="middle" align="center">84</td>
<td valign="middle" align="center">34</td>
</tr>
<tr>
<td valign="middle" align="center">Proposed Method</td>
<td valign="middle" align="center">93</td>
<td valign="middle" align="center">39</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>In four real-world orchard scenarios, the proposed model was compared with FCN and Mask R-CNN (<xref ref-type="fig" rid="f10"><bold>Figure&#xa0;10</bold></xref>). It consistently outperformed both, achieving an F1-score of 92% under varied illumination (<xref ref-type="fig" rid="f10"><bold>Figure&#xa0;10A</bold></xref>) and an mAP of 91% for densely clustered fruits (<xref ref-type="fig" rid="f10"><bold>Figure&#xa0;10B</bold></xref>). Under wind disturbance (<xref ref-type="fig" rid="f10"><bold>Figure&#xa0;10C</bold></xref>), it maintained the highest frame rate per second (FPS), demonstrating strong real-time efficiency. Across multi-fruit orchard conditions (<xref ref-type="fig" rid="f10"><bold>Figure&#xa0;10D</bold></xref>), the model again achieved the highest mAP, confirming its robustness and adaptability. <xref ref-type="fig" rid="f11"><bold>Figure&#xa0;11</bold></xref> shows that the proposed model maintained the lowest standard deviation across all camera angles (0&#xb0;&#x2013;45&#xb0;), stabilizing after about 25 images. Even at 45&#xb0;, where deviation slightly increased for all models, it remained the most stable, confirming reliable performance under varying camera orientations.</p>
<fig id="f10" position="float">
<label>Figure&#xa0;10</label>
<caption>
<p>Measuring distance and relative error of the proposed stereo-vision depth estimation system across different collection distances, evaluated at <bold>(a)</bold> 800 mm, <bold>(b)</bold> 900 mm, <bold>(c)</bold> 1000 mm, and <bold>(d)</bold> 1100 mm, based on measurements from six corner points in the calibration board.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1598414-g010.tif">
<alt-text content-type="machine-generated">Four line graphs showing measuring distances and relative errors for collection distances of 800mm, 900mm, 1000mm, and 1100mm. Each graph has two lines: a solid line for measuring distance and a dashed line for relative error. The x-axis is labeled corner number, ranging from one to six. Each graph title specifies the collection distance.</alt-text>
</graphic></fig>
<fig id="f11" position="float">
<label>Figure&#xa0;11</label>
<caption>
<p>Model stability across camera angles (0&#xb0;&#x2013;45&#xb0;) compared with HCA and RGSA.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-16-1598414-g011.tif">
<alt-text content-type="machine-generated">A flowchart showing the process of image boundary detection. The original image A is a large green grid. Corrosive elements B is a smaller green grid. Applying B to A results in image C, which has a smaller green area. Subtracting C from A creates the boundary image highlighted in red.</alt-text>
</graphic></fig>
<p>The proposed clustering-based stereo-vision approach achieved &gt; 91% detection accuracy, &lt; 1% localization error, and stable performance under varying lighting and camera angles, all with a modest dataset. These results demonstrate its suitability for real-time, low-cost robotic harvesting, offering reliable detection and positioning without extensive training or high computational demand&#x2014;an effective solution for autonomous orchard operations in precision agriculture.</p>
</sec>
<sec id="s4" sec-type="discussion">
<label>4</label>
<title>Discussion</title>
<p>Accurate segmentation is crucial for precise apple detection in challenging orchard environments (<xref ref-type="bibr" rid="B24">Kang and Chen, 2020</xref>). The improved MCD and RA values indicate that multi-feature fusion with adaptive K-means clustering increases robustness to lighting changes and occlusion. Deep-learning models such as Faster R-CNN often lose accuracy under these conditions (<xref ref-type="bibr" rid="B2">Bargoti and Underwood, 2017</xref>; <xref ref-type="bibr" rid="B6">Fu et&#xa0;al., 2020</xref>). In contrast, the proposed unsupervised approach remains stable with fewer samples. Compared with DBSCAN, it achieved higher stability and accuracy across distances and image counts (<xref ref-type="bibr" rid="B27">Limwattanapibool and Arch&#x2010;int, 2017</xref>; <xref ref-type="bibr" rid="B13">Hartigan and Wong, 1979</xref>). These results confirm strong generalization and real-time potential for orchard use.</p>
<p>The success of robotic apple picking depends heavily on precise 3D localization. Our results are consistent with earlier research, where YOLO-based algorithms struggle to make real-time changes in challenging agricultural settings (<xref ref-type="bibr" rid="B20">Jiang et&#xa0;al., 2022b</xref>). This is consistent with other studies where YOLO-based models struggle in complex environments (<xref ref-type="bibr" rid="B4">Bresilla et&#xa0;al., 2019</xref>; <xref ref-type="bibr" rid="B33">Parvathi and Selvi, 2021</xref>). Consistent with previous studies, YOLOv7 demonstrated better accuracy and recognition speed than SSD (<xref ref-type="bibr" rid="B46">Wang and Chen, 2024</xref>). In contrast, a previous study showed that YOLOv7 achieved exceptional detection rates of <italic>Camellia oleifera</italic> fruit in orchards with 95.74% mAP, 93.67% F1 score, 94.21% precision, 93.13% recall and a detection time of 0.025 seconds (<xref ref-type="bibr" rid="B49">Wu et&#xa0;al., 2022</xref>). Recent research on brinjal detection using deep learning models has demonstrated the effectiveness of a lightweight YOLO architecture and edge-based computing frameworks for real-time harvesting applications (<xref ref-type="bibr" rid="B31">Nahiduzzaman et&#xa0;al., 2025</xref>; <xref ref-type="bibr" rid="B42">Tamilarasi et&#xa0;al., 2025</xref>). These approaches, while achieving high precision and recall, still depend on large, annotated datasets and relatively intensive computational resources. In contrast, our clustering-based multi-feature method achieves stable performance with fewer training samples and reduced hardware requirements, underscoring its suitability for orchard conditions. Our results are consistent with previous studies, indicating that while SSD performs well in controlled environments, it may struggle in more complex scenarios than YOLOv7. For example, Xu et&#xa0;al. reported lower SSD performance in typical agricultural environments where occlusions and cluttered backgrounds are very common (<xref ref-type="bibr" rid="B52">Xu et&#xa0;al., 2024</xref>). In contrast, Deng et&#xa0;al (<xref ref-type="bibr" rid="B5">Deng et&#xa0;al., 2024</xref>). found that YOLOv7, when enhanced with attention mechanisms, consistently outperformed SSD in citrus detection under different orchard conditions. Apple posture detection is critical in establishing the best picking strategies (<xref ref-type="bibr" rid="B28">Liu et&#xa0;al., 2024</xref>). The observed stable detection suggests that our method effectively addresses occlusion and angle-related distortions, a common challenge in fruit detection (<xref ref-type="bibr" rid="B36">Safari et&#xa0;al., 2024</xref>).</p>
<p>The proposed method showed stable performance relative to MISA and achieved higher accuracy than CNN, TM, and other traditional classifiers, reflecting improved feature extraction and classification capability. Similar challenges in illumination and feature consistency were also noted by (<xref ref-type="bibr" rid="B41">Sun et&#xa0;al., 2021</xref>). Consistent results under varying field conditions confirm that the model can maintain real-time reliability in orchard operations. Previous studies using FCN reported fruit-counting accuracies of 0.91&#x2013;0.95 and yield accuracies up to 0.98 (<xref ref-type="bibr" rid="B12">H&#xe4;ni et&#xa0;al., 2020</xref>), while Faster R-CNN achieved an F1-score of 0.89 and 91% mAP. In contrast, our model achieved higher mAP, F1-score, and frame rate, demonstrating superior detection in dense, multi-fruit environments. Real-world comparison with FCN and Mask R-CNN confirmed the proposed model<italic>&#x2019;</italic>s superior accuracy and processing efficiency for dense, multi-fruit environments (<xref ref-type="bibr" rid="B45">Wan and Goudos, 2020</xref>; <xref ref-type="bibr" rid="B15">He et&#xa0;al., 2017</xref>). Compared to previous studies, Mask R-CNN performed poorly in our study, where the precision rate reached 97.31% and the recall rate reached 95.70% (<xref ref-type="bibr" rid="B19">Jia et&#xa0;al., 2020</xref>). These outcomes highlight its stability and real-time applicability under orchard conditions. Unlike deep-learning models that rely on large annotated datasets, the algorithm maintained strong performance with limited training images, reflecting better adaptability and lower data dependence (<xref ref-type="bibr" rid="B25">Koirala et&#xa0;al., 2019</xref>). Bargoti and Underwood found that 729 training images were necessary to stabilize AP for apple detection, but almond and mango models needed more data (<xref ref-type="bibr" rid="B2">Bargoti and Underwood, 2017</xref>). This study also demonstrated that data augmentation enabled better apple detection using only 100 images compared to 300 images without augmentation. Similarly, 93% of apples were accurately detected in 50 images despite uneven lighting conditions in a previous study (<xref ref-type="bibr" rid="B51">Xu and Lv, 2018</xref>). Compared to deep learning models like Faster R-CNN and YOLOv7, the proposed method requires less computational power and no extensive training, making it suitable for real-time applications on standard hardware. While sequential processing may limit scalability in large-scale deployments, this can be optimized with parallel computing. The pipeline<italic>&#x2019;</italic>s reliance on generalizable features such as colour, texture, and morphology also makes it adaptable to other fruits or crops with minor adjustments. However, large-scale field validation and integration with robotic harvesting systems are still required to confirm performance under real operating conditions, which will be addressed in future development.</p>
<p>In conclusion, this study presents a clustering-based stereo vision algorithm that combines K-means segmentation and multi-feature fusion for accurate apple detection and 3D localization in orchard environments. The method offers high accuracy, strong generalization, and real-time feasibility with minimal training data and computational demand&#x2014;key advantages over deep-learning approaches. While sequential processing and limited field scale remain constraints, these can be addressed through parallel computing and large-scale robotic trials. Future work should focus on optimizing real-time performance and extending the framework to other fruit crops and intelligent harvesting systems.</p>
</sec>
</body>
<back>
<sec id="s5" sec-type="data-availability">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary Material</bold></xref>. Further inquiries can be directed to the corresponding author.</p></sec>
<sec id="s6" sec-type="author-contributions">
<title>Author contributions</title>
<p>JW: Funding acquisition, Visualization, Software, Conceptualization, Resources, Writing &#x2013; original draft, Writing &#x2013; review &amp; editing, Project administration, Validation, Supervision. WS: Formal Analysis, Data curation, Visualization, Investigation, Writing &#x2013; review &amp; editing, Software.</p></sec>
<sec id="s8" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p></sec>
<sec id="s9" sec-type="ai-statement">
<title>Generative AI statement</title>
<p>The author(s) declare that no Generative AI was used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p></sec>
<sec id="s10" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p></sec>
<sec id="s11" sec-type="supplementary-material">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fpls.2025.1598414/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fpls.2025.1598414/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Table1.docx" id="SM1" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document"/></sec>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Ashtiani</surname> <given-names>S.-H. M.</given-names></name>
<name><surname>Javanmardi</surname> <given-names>S.</given-names></name>
<name><surname>Jahanbanifard</surname> <given-names>M.</given-names></name>
<name><surname>Martynenko</surname> <given-names>A.</given-names></name>
<name><surname>Verbeek</surname> <given-names>F. J.</given-names></name>
</person-group> (<year>2021</year>). 
<article-title>Detection of mulberry ripeness stages using deep learning models</article-title>. <source>IEEE Access</source> <volume>9</volume>, <fpage>100380</fpage>&#x2013;<lpage>100394</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ACCESS.2021.3096550</pub-id>
</mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Bargoti</surname> <given-names>S.</given-names></name>
<name><surname>Underwood</surname> <given-names>J.</given-names></name>
</person-group> (<year>2017</year>). &#x201c;
<article-title>Deep fruit detection in orchards</article-title>,&#x201d; in <conf-name>IEEE International Conference on Robotics and Automation (ICRA): IEEE)</conf-name>. New York (USA): Institute of Electrical and Electronics Engineers (IEEE). <fpage>3626</fpage>&#x2013;<lpage>3633</lpage>.
</mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Biffi</surname> <given-names>L. J.</given-names></name>
<name><surname>Mitishita</surname> <given-names>E.</given-names></name>
<name><surname>Liesenberg</surname> <given-names>V.</given-names></name>
<name><surname>Santos</surname> <given-names>A.</given-names></name>
<name><surname>Goncalves</surname> <given-names>D. N.</given-names></name>
<name><surname>Estrabis</surname> <given-names>N. V.</given-names></name>
<etal/>
</person-group>. (<year>2020</year>). 
<article-title>ATSS deep learning-based approach to detect apple fruits</article-title>. <source>Remote Sens.</source> <volume>13</volume>, <elocation-id>54</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/rs13010054</pub-id>
</mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Bresilla</surname> <given-names>K.</given-names></name>
<name><surname>Perulli</surname> <given-names>G. D.</given-names></name>
<name><surname>Boini</surname> <given-names>A.</given-names></name>
<name><surname>Morandi</surname> <given-names>B.</given-names></name>
<name><surname>Corelli Grappadelli</surname> <given-names>L.</given-names></name>
<name><surname>Manfrini</surname> <given-names>L.</given-names></name>
</person-group> (<year>2019</year>). 
<article-title>Single-shot convolution neural networks for real-time fruit detection within the tree</article-title>. <source>Front. Plant Sci.</source> <volume>10</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2019.00611</pub-id>, PMID: <pub-id pub-id-type="pmid">31178875</pub-id>
</mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Deng</surname> <given-names>F.</given-names></name>
<name><surname>Chen</surname> <given-names>J.</given-names></name>
<name><surname>Fu</surname> <given-names>L.</given-names></name>
<name><surname>Zhong</surname> <given-names>J.</given-names></name>
<name><surname>Qiaoi</surname> <given-names>W.</given-names></name>
<name><surname>Luo</surname> <given-names>J.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>Real-time citrus variety detection in orchards based on complex scenarios of improved YOLOv7</article-title>. <source>Front. Plant Sci.</source> <volume>15</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2024.1381694</pub-id>, PMID: <pub-id pub-id-type="pmid">39011299</pub-id>
</mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Fu</surname> <given-names>L.</given-names></name>
<name><surname>Majeed</surname> <given-names>Y.</given-names></name>
<name><surname>Zhang</surname> <given-names>X.</given-names></name>
<name><surname>Karkee</surname> <given-names>M.</given-names></name>
<name><surname>Zhang</surname> <given-names>Q.</given-names></name>
</person-group> (<year>2020</year>). 
<article-title>Faster R&#x2013;CNN&#x2013;based apple detection in dense-foliage fruiting-wall trees using RGB and depth features for robotic harvesting</article-title>. <source>Biosyst. Eng.</source> <volume>197</volume>, <fpage>245</fpage>&#x2013;<lpage>256</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.biosystemseng.2020.07.007</pub-id>
</mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Gao</surname> <given-names>G.</given-names></name>
<name><surname>Bai</surname> <given-names>Q.</given-names></name>
<name><surname>Zhang</surname> <given-names>C.</given-names></name>
<name><surname>Zhang</surname> <given-names>L.</given-names></name>
<name><surname>Yao</surname> <given-names>L.</given-names></name>
</person-group> (<year>2023</year>a). 
<article-title>Dualistic cascade convolutional neural network dedicated to fully PolSAR image ship detection</article-title>. <source>ISPRS J. Photogrammetry Remote Sens.</source> <volume>202</volume>, <fpage>663</fpage>&#x2013;<lpage>681</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.isprsjprs.2023.07.006</pub-id>
</mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Gao</surname> <given-names>G.</given-names></name>
<name><surname>Wang</surname> <given-names>M.</given-names></name>
<name><surname>Zhou</surname> <given-names>P.</given-names></name>
<name><surname>Yao</surname> <given-names>L.</given-names></name>
<name><surname>Zhang</surname> <given-names>X.</given-names></name>
<name><surname>Li</surname> <given-names>H.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>A multi-branch embedding network with bi-classifier for few-shot ship classification of SAR images</article-title>. <source>IEEE Trans. Geosci. Remote Sens.</source> <volume>63</volume>, <fpage>5201515</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TGRS.2024.3500034</pub-id>
</mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Gao</surname> <given-names>G.</given-names></name>
<name><surname>Zhang</surname> <given-names>C.</given-names></name>
<name><surname>Zhang</surname> <given-names>L.</given-names></name>
<name><surname>Duan</surname> <given-names>D.</given-names></name>
</person-group> (<year>2023</year>b). 
<article-title>Scattering characteristic-aware fully polarized SAR ship detection network based on a four-component decomposition model</article-title>. <source>IEEE Trans. Geosci. Remote Sens.</source> <volume>61</volume>, <fpage>1</fpage>&#x2013;<lpage>22</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TGRS.2023.3336300</pub-id>
</mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name><surname>Gonzales</surname> <given-names>R. C.</given-names></name>
<name><surname>Woods</surname> <given-names>R. E.</given-names></name>
</person-group> (<year>2018</year>). <source>Digital image processing 4th edition.</source> (<publisher-loc>New York</publisher-loc>: 
<publisher-name>Pearson</publisher-name>).
</mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Guan</surname> <given-names>Y.</given-names></name>
<name><surname>Zhang</surname> <given-names>X.</given-names></name>
<name><surname>Gao</surname> <given-names>G.</given-names></name>
<name><surname>Cao</surname> <given-names>C.</given-names></name>
<name><surname>Li</surname> <given-names>Z.</given-names></name>
<name><surname>Fu</surname> <given-names>S.</given-names></name>
<etal/>
</person-group>. (<year>2025</year>). 
<article-title>A new indicator for assessing fishing ecological pressure using multi-source data: A case study of the South China Sea</article-title>. <source>Ecol. Indic.</source> <volume>170</volume>, <elocation-id>113096</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ecolind.2025.113096</pub-id>
</mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>H&#xe4;ni</surname> <given-names>N.</given-names></name>
<name><surname>Roy</surname> <given-names>P.</given-names></name>
<name><surname>Isler</surname> <given-names>V.</given-names></name>
</person-group> (<year>2020</year>). 
<article-title>A comparative study of fruit detection and counting methods for yield mapping in apple orchards</article-title>. <source>J. Field Robotics</source> <volume>37</volume>, <fpage>263</fpage>&#x2013;<lpage>282</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/rob.21902</pub-id>
</mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Hartigan</surname> <given-names>J. A.</given-names></name>
<name><surname>Wong</surname> <given-names>M. A.</given-names></name>
</person-group> (<year>1979</year>). 
<article-title>Algorithm AS 136: A k-means clustering algorithm</article-title>. <source>J. R. Stat. Society Ser. C (Applied Statistics)</source> <volume>28</volume>, <fpage>100</fpage>&#x2013;<lpage>108</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.2307/2346830</pub-id>
</mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name><surname>Hartley</surname> <given-names>R.</given-names></name>
<name><surname>Zisserman</surname> <given-names>A.</given-names></name>
</person-group> (<year>2003</year>). <source>Multiple view geometry in computer vision.</source> (<publisher-loc>Cambridge, UK</publisher-loc>: 
<publisher-name>Cambridge University Press</publisher-name>).
</mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>He</surname> <given-names>K.</given-names></name>
<name><surname>Gkioxari</surname> <given-names>G.</given-names></name>
<name><surname>Doll&#xe1;r</surname> <given-names>P.</given-names></name>
<name><surname>Girshick</surname> <given-names>R.</given-names></name>
</person-group> (<year>2017</year>). &#x201c;
<article-title>Mask R-CNN</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE international conference on computer vision: IEE)</conf-name>. New York, USA: Institute of Electrical and Electronics Engineers (IEEE). <fpage>2961</fpage>&#x2013;<lpage>2969</lpage>.
</mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Ikotun</surname> <given-names>A. M.</given-names></name>
<name><surname>Ezugwu</surname> <given-names>A. E.</given-names></name>
<name><surname>Abualigah</surname> <given-names>L.</given-names></name>
<name><surname>Abuhaija</surname> <given-names>B.</given-names></name>
<name><surname>Heming</surname> <given-names>J.</given-names></name>
</person-group> (<year>2023</year>). 
<article-title>K-means clustering algorithms: A comprehensive review, variants analysis, and advances in the era of big data</article-title>. <source>Inf. Sci.</source> <volume>622</volume>, <fpage>178</fpage>&#x2013;<lpage>210</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ins.2022.11.139</pub-id>
</mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Jamel</surname> <given-names>A.</given-names></name>
<name><surname>Akay</surname> <given-names>B.</given-names></name>
</person-group> (<year>2019</year>). 
<article-title>A survey and systematic categorization of parallel K-Means and Fuzzy-C-Means algorithms</article-title>. <source>Comput. Syst. Sci. Eng.</source> <volume>34</volume>, <fpage>259</fpage>&#x2013;<lpage>281</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.32604/csse.2019.34.259</pub-id>
</mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Javanmardi</surname> <given-names>S.</given-names></name>
<name><surname>Ashtiani</surname> <given-names>S.-H. M.</given-names></name>
</person-group> (<year>2025</year>). 
<article-title>AI-driven deep learning framework for shelf life prediction of edible mushrooms</article-title>. <source>Postharvest Biol. Technol.</source> <volume>222</volume>, <elocation-id>113396</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.postharvbio.2025.113396</pub-id>
</mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Jia</surname> <given-names>W.</given-names></name>
<name><surname>Tian</surname> <given-names>Y.</given-names></name>
<name><surname>Luo</surname> <given-names>R.</given-names></name>
<name><surname>Zhang</surname> <given-names>Z.</given-names></name>
<name><surname>Lian</surname> <given-names>J.</given-names></name>
<name><surname>Zheng</surname> <given-names>Y.</given-names></name>
</person-group> (<year>2020</year>). 
<article-title>Detection and segmentation of overlapped fruits based on optimized mask R-CNN application in apple harvesting robot</article-title>. <source>Comput. Electron. Agric.</source> <volume>172</volume>, <elocation-id>105380</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2020.105380</pub-id>
</mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Jiang</surname> <given-names>P.</given-names></name>
<name><surname>Ergu</surname> <given-names>D.</given-names></name>
<name><surname>Liu</surname> <given-names>F.</given-names></name>
<name><surname>Cai</surname> <given-names>Y.</given-names></name>
<name><surname>Ma</surname> <given-names>B.</given-names></name>
</person-group> (<year>2022</year>b). 
<article-title>A Review of Yolo algorithm developments</article-title>. <source>Proc. Comput. Sci.</source> <volume>199</volume>, <fpage>1066</fpage>&#x2013;<lpage>1073</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.procs.2022.01.135</pub-id>
</mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Jiang</surname> <given-names>J.</given-names></name>
<name><surname>Johansen</surname> <given-names>K.</given-names></name>
<name><surname>Stanschewski</surname> <given-names>C. S.</given-names></name>
<name><surname>Wellman</surname> <given-names>G.</given-names></name>
<name><surname>Mousa</surname> <given-names>M. A.</given-names></name>
<name><surname>Fiene</surname> <given-names>G. M.</given-names></name>
<etal/>
</person-group>. (<year>2022</year>a). 
<article-title>Phenotyping a diversity panel of quinoa using UAV-retrieved leaf area index, SPAD-based chlorophyll and a random forest approach</article-title>. <source>Precis. Agric.</source> <volume>23</volume>, <fpage>961</fpage>&#x2013;<lpage>983</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11119-021-09870-3</pub-id>
</mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Johansen</surname> <given-names>K.</given-names></name>
<name><surname>Morton</surname> <given-names>M. J.</given-names></name>
<name><surname>Malbeteau</surname> <given-names>Y.</given-names></name>
<name><surname>Aragon</surname> <given-names>B.</given-names></name>
<name><surname>Al-Mashharawi</surname> <given-names>S.</given-names></name>
<name><surname>Ziliani</surname> <given-names>M. G.</given-names></name>
<etal/>
</person-group>. (<year>2020</year>). 
<article-title>Predicting biomass and yield in a tomato phenotyping experiment using UAV imagery and random forest</article-title>. <source>Front. Artif. Intell.</source> <volume>3</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/frai.2020.00028</pub-id>, PMID: <pub-id pub-id-type="pmid">33733147</pub-id>
</mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Johansen</surname> <given-names>K.</given-names></name>
<name><surname>Morton</surname> <given-names>M.</given-names></name>
<name><surname>Malbeteau</surname> <given-names>Y.</given-names></name>
<name><surname>Aragon Solorio</surname> <given-names>B. J. L.</given-names></name>
<name><surname>Almashharawi</surname> <given-names>S.</given-names></name>
<name><surname>Ziliani</surname> <given-names>M.</given-names></name>
<etal/>
</person-group>. (<year>2019</year>). 
<article-title>Predicting biomass and yield at harvest of salt-stressed tomato plants using UAV imagery</article-title>. <source>Int. Arch. Photogrammetry Remote Sens. Spatial Inf. Sci. - ISPRS Arch.</source> XLII-2/W13, <fpage>407</fpage>&#x2013;<lpage>411</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.5194/isprs-archives-XLII-2-W13-407-2019</pub-id>
</mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Kang</surname> <given-names>H.</given-names></name>
<name><surname>Chen</surname> <given-names>C.</given-names></name>
</person-group> (<year>2020</year>). 
<article-title>Fruit detection, segmentation and 3D visualisation of environments in apple orchards</article-title>. <source>Comput. Electron. Agric.</source> <volume>171</volume>, <elocation-id>105302</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2020.105302</pub-id>
</mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Koirala</surname> <given-names>A.</given-names></name>
<name><surname>Walsh</surname> <given-names>K. B.</given-names></name>
<name><surname>Wang</surname> <given-names>Z.</given-names></name>
<name><surname>Mccarthy</surname> <given-names>C.</given-names></name>
</person-group> (<year>2019</year>). 
<article-title>Deep learning&#x2013;method overview and review of use for fruit detection and yield estimation</article-title>. <source>Comput. Electron. Agric.</source> <volume>162</volume>, <fpage>219</fpage>&#x2013;<lpage>234</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2019.04.017</pub-id>
</mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Likas</surname> <given-names>A.</given-names></name>
<name><surname>Vlassis</surname> <given-names>N.</given-names></name>
<name><surname>Verbeek</surname> <given-names>J. J.</given-names></name>
</person-group> (<year>2003</year>). 
<article-title>The global k-means clustering algorithm</article-title>. <source>Pattern Recognition</source> <volume>36</volume>, <fpage>451</fpage>&#x2013;<lpage>461</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/S0031-3203(02)00060-2</pub-id>
</mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Limwattanapibool</surname> <given-names>O.</given-names></name>
<name><surname>Arch-Int</surname> <given-names>S.</given-names></name>
</person-group> (<year>2017</year>). 
<article-title>Determination of the appropriate parameters for K-means clustering using selection of region clusters based on density DBSCAN (SRCD-DBSCAN)</article-title>. <source>Expert Syst.</source> <volume>34</volume>, <fpage>e12204</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/exsy.12204</pub-id>
</mixed-citation>
</ref>
<ref id="B28">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Liu</surname> <given-names>S.</given-names></name>
<name><surname>Xue</surname> <given-names>J.</given-names></name>
<name><surname>Zhang</surname> <given-names>T.</given-names></name>
<name><surname>Lv</surname> <given-names>P.</given-names></name>
<name><surname>Qin</surname> <given-names>H.</given-names></name>
<name><surname>Zhao</surname> <given-names>T.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Research progress and prospect of key technologies of fruit target recognition for robotic fruit picking</article-title>. <source>Front. Plant Sci.</source> <volume>15</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2024.1423338</pub-id>, PMID: <pub-id pub-id-type="pmid">39711588</pub-id>
</mixed-citation>
</ref>
<ref id="B29">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Mhamed</surname> <given-names>M.</given-names></name>
<name><surname>Zhang</surname> <given-names>Z.</given-names></name>
<name><surname>Yu</surname> <given-names>J.</given-names></name>
<name><surname>Li</surname> <given-names>Y.</given-names></name>
<name><surname>Zhang</surname> <given-names>M.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Advances in apple&#x2019;s automated orchard equipment: A comprehensive research</article-title>. <source>Comput. Electron. Agric.</source> <volume>221</volume>, <elocation-id>108926</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2024.108926</pub-id>
</mixed-citation>
</ref>
<ref id="B30">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Na</surname> <given-names>S.</given-names></name>
<name><surname>Xumin</surname> <given-names>L.</given-names></name>
<name><surname>Yong</surname> <given-names>G.</given-names></name>
</person-group> (<year>2010</year>). &#x201c;
<article-title>Research on k-means clustering algorithm: An improved k-means clustering algorithm</article-title>,&#x201d; in <conf-name>Third International Symposium on Intelligent Information Technology and Security Informatics, IITSI: IEEE)</conf-name>. New York, USA: Institute of Electrical and Electronics Engineers (IEEE). <fpage>63</fpage>&#x2013;<lpage>67</lpage>.
</mixed-citation>
</ref>
<ref id="B31">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Nahiduzzaman</surname> <given-names>M.</given-names></name>
<name><surname>Sarmun</surname> <given-names>R.</given-names></name>
<name><surname>Khandakar</surname> <given-names>A.</given-names></name>
<name><surname>Faisal</surname> <given-names>M.</given-names></name>
<name><surname>Islam</surname> <given-names>M. S.</given-names></name>
<name><surname>Alam</surname> <given-names>M. K.</given-names></name>
<etal/>
</person-group>. (<year>2025</year>). 
<article-title>Deep learning-based real-time detection and classification of tomato ripeness stages using YOLOv8 on raspberry Pi</article-title>. <source>Eng. Res. Express</source> <volume>7</volume>, <fpage>015219</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1088/2631-8695/ada720</pub-id>
</mixed-citation>
</ref>
<ref id="B32">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Onishi</surname> <given-names>Y.</given-names></name>
<name><surname>Yoshida</surname> <given-names>T.</given-names></name>
<name><surname>Kurita</surname> <given-names>H.</given-names></name>
<name><surname>Fukao</surname> <given-names>T.</given-names></name>
<name><surname>Arihara</surname> <given-names>H.</given-names></name>
<name><surname>Iwai</surname> <given-names>A.</given-names></name>
</person-group> (<year>2019</year>). 
<article-title>An automated fruit harvesting robot by using deep learning</article-title>. <source>ROBOMECH J.</source> <volume>6</volume>, <fpage>1</fpage>&#x2013;<lpage>8</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s40648-019-0141-2</pub-id>
</mixed-citation>
</ref>
<ref id="B33">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Parvathi</surname> <given-names>S.</given-names></name>
<name><surname>Selvi</surname> <given-names>S. T.</given-names></name>
</person-group> (<year>2021</year>). 
<article-title>Detection of maturity stages of coconuts in complex background using Faster R-CNN model</article-title>. <source>Biosyst. Eng.</source> <volume>202</volume>, <fpage>119</fpage>&#x2013;<lpage>132</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.biosystemseng.2020.12.002</pub-id>
</mixed-citation>
</ref>
<ref id="B34">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Qu</surname> <given-names>W.</given-names></name>
<name><surname>Shang</surname> <given-names>W.</given-names></name>
<name><surname>Shao</surname> <given-names>Y.</given-names></name>
<name><surname>Wang</surname> <given-names>D.</given-names></name>
<name><surname>Yu</surname> <given-names>X.</given-names></name>
<name><surname>Song</surname> <given-names>H.</given-names></name>
</person-group> (<year>2015</year>). 
<article-title>Segmentation of foreground apple targets by fusing visual attention mechanism and growth rules of seed points</article-title>. <source>Spanish J. Agric. Res.</source> <volume>13</volume>, <fpage>e0214</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.5424/sjar/2015133-7047</pub-id>
</mixed-citation>
</ref>
<ref id="B35">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Ramadhani</surname> <given-names>S.</given-names></name>
<name><surname>Azzahra</surname> <given-names>D.</given-names></name>
<name><surname>Tomi</surname> <given-names>Z.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>Comparison of K-Means and K-Medoids algorithms in text mining based on Davies Bouldin Index testing for classification of student&#x2019;s thesis</article-title>. <source>Jurnal Teknologi Informasi dan Komunikasi</source> <volume>13</volume>, <fpage>24</fpage>&#x2013;<lpage>33</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.31849/digitalzone.v13i1.9292</pub-id>
</mixed-citation>
</ref>
<ref id="B36">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Safari</surname> <given-names>Y.</given-names></name>
<name><surname>Nakatumba-Nabende</surname> <given-names>J.</given-names></name>
<name><surname>Nakasi</surname> <given-names>R.</given-names></name>
<name><surname>Nakibuule</surname> <given-names>R.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>A Review on automated detection and assessment of fruit damage using machine learning</article-title>. <source>IEEE Access</source> <volume>12</volume>, <fpage>1</fpage>&#x2013;<lpage>12</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ACCESS.2024.3362230</pub-id>
</mixed-citation>
</ref>
<ref id="B37">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Sarbaini</surname> <given-names>S.</given-names></name>
<name><surname>Saputri</surname> <given-names>W.</given-names></name>
<name><surname>Muttakin</surname> <given-names>F.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>Cluster analysis menggunakan algoritma fuzzy K-means Untuk Tingkat Pengangguran Di Provinsi Riau</article-title>. <source>Jurnal Teknologi Dan Manajemen Industri Terapan</source> <volume>1</volume>, <fpage>78</fpage>&#x2013;<lpage>84</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.55826/tmit.v1iII.30</pub-id>
</mixed-citation>
</ref>
<ref id="B38">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Shen</surname> <given-names>B.</given-names></name>
<name><surname>Liu</surname> <given-names>T.</given-names></name>
<name><surname>Gao</surname> <given-names>G.</given-names></name>
<name><surname>Chen</surname> <given-names>H.</given-names></name>
<name><surname>Yang</surname> <given-names>J.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>A low-cost polarimetric radar system based on mechanical rotation and its signal processing</article-title>. <source>IEEE Trans. Aerospace Electronic Syst.</source> <volume>61</volume>, <fpage>4744</fpage>&#x2013;<lpage>4765</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TAES.2024.3507776</pub-id>
</mixed-citation>
</ref>
<ref id="B39">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Shi</surname> <given-names>X.</given-names></name>
<name><surname>Wang</surname> <given-names>S.</given-names></name>
<name><surname>Zhang</surname> <given-names>B.</given-names></name>
<name><surname>Ding</surname> <given-names>X.</given-names></name>
<name><surname>Qi</surname> <given-names>P.</given-names></name>
<name><surname>Qu</surname> <given-names>H.</given-names></name>
<etal/>
</person-group>. (<year>2025</year>). 
<article-title>Advances in object detection and localization techniques for fruit harvesting robots</article-title>. <source>Agronomy</source> <volume>15</volume>, <elocation-id>145</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agronomy15010145</pub-id>
</mixed-citation>
</ref>
<ref id="B40">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Song</surname> <given-names>H.</given-names></name>
<name><surname>Zhang</surname> <given-names>C.</given-names></name>
<name><surname>Pan</surname> <given-names>J.</given-names></name>
<name><surname>Yin</surname> <given-names>X.</given-names></name>
<name><surname>Zhuang</surname> <given-names>Y.</given-names></name>
</person-group> (<year>2013</year>). 
<article-title>Segmentation and reconstruction of overlappedapple images based on convex hull</article-title>. <source>Trans. Chin. Soc. Agric. Eng.</source> <volume>29</volume>, <fpage>163</fpage>&#x2013;<lpage>168</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3969/j.issn.1002-6819.2012.22.025</pub-id>
</mixed-citation>
</ref>
<ref id="B41">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Sun</surname> <given-names>S.</given-names></name>
<name><surname>Li</surname> <given-names>C.</given-names></name>
<name><surname>Chee</surname> <given-names>P. W.</given-names></name>
<name><surname>Paterson</surname> <given-names>A. H.</given-names></name>
<name><surname>Meng</surname> <given-names>C.</given-names></name>
<name><surname>Zhang</surname> <given-names>J.</given-names></name>
<etal/>
</person-group>. (<year>2021</year>). 
<article-title>High resolution 3D terrestrial LiDAR for cotton plant main stalk and node detection</article-title>. <source>Comput. Electron. Agric.</source> <volume>187</volume>, <elocation-id>106276</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2021.106276</pub-id>
</mixed-citation>
</ref>
<ref id="B42">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Tamilarasi</surname> <given-names>T.</given-names></name>
<name><surname>Muthulakshmi</surname> <given-names>P.</given-names></name>
<name><surname>Ashtiani</surname> <given-names>S.-H. M.</given-names></name>
</person-group> (<year>2025</year>). 
<article-title>Smart edge computing framework for real-time brinjal harvest decision optimization</article-title>. <source>AgriEngineering</source> <volume>7</volume>, <elocation-id>196</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agriengineering7060196</pub-id>
</mixed-citation>
</ref>
<ref id="B43">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Tianjing</surname> <given-names>Y.</given-names></name>
<name><surname>Mhamed</surname> <given-names>M.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Developments in automated harvesting equipment for the apple in the orchard</article-title>. <source>Smart Agric. Technol.</source> <volume>9</volume>, <elocation-id>100491</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.atech.2024.100491</pub-id>
</mixed-citation>
</ref>
<ref id="B44">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Tu</surname> <given-names>J.</given-names></name>
<name><surname>Liu</surname> <given-names>C.</given-names></name>
<name><surname>Li</surname> <given-names>Y.</given-names></name>
<name><surname>Zhou</surname> <given-names>J.</given-names></name>
<name><surname>Yuan</surname> <given-names>J.</given-names></name>
</person-group> (<year>2010</year>). 
<article-title>Apple recognition method based on illumination invariant graph</article-title>. <source>Trans. Chin. Soc. Agric. Eng.</source> <volume>26</volume>, <fpage>26</fpage>&#x2013;<lpage>31</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3969/j.issn.1002-6819.2014.24.020</pub-id>
</mixed-citation>
</ref>
<ref id="B45">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wan</surname> <given-names>S.</given-names></name>
<name><surname>Goudos</surname> <given-names>S.</given-names></name>
</person-group> (<year>2020</year>). 
<article-title>Faster R-CNN for multi-class fruit detection using a robotic vision system</article-title>. <source>Comput. Networks</source> <volume>168</volume>, <elocation-id>107036</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.comnet.2019.107036</pub-id>
</mixed-citation>
</ref>
<ref id="B46">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>H.</given-names></name>
<name><surname>Chen</surname> <given-names>X.</given-names></name>
</person-group> (<year>2024</year>). &#x201c;
<article-title>Object detection of classroom students based on improved YOLOv7</article-title>,&#x201d; in <conf-name>Third International Symposium on Computer Applications and Information Systems (ISCAIS 2024): SPIE)</conf-name>. Bellingham, Washington, USA: SPIE - The International Society for Optics and Photonics. <fpage>484</fpage>&#x2013;<lpage>489</lpage>.
</mixed-citation>
</ref>
<ref id="B47">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>C.</given-names></name>
<name><surname>Liu</surname> <given-names>S.</given-names></name>
<name><surname>Wang</surname> <given-names>Y.</given-names></name>
<name><surname>Xiong</surname> <given-names>J.</given-names></name>
<name><surname>Zhang</surname> <given-names>Z.</given-names></name>
<name><surname>Zhao</surname> <given-names>B.</given-names></name>
<etal/>
</person-group>. (<year>2022</year>). 
<article-title>Application of convolutional neural network-based detection methods in fresh fruit production: a comprehensive review</article-title>. <source>Front. Plant Sci.</source> <volume>13</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2022.868745</pub-id>, PMID: <pub-id pub-id-type="pmid">35651761</pub-id>
</mixed-citation>
</ref>
<ref id="B48">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wang Dandan</surname> <given-names>W. D.</given-names></name>
<name><surname>Xu Yue</surname> <given-names>X. Y.</given-names></name>
<name><surname>Song Huaibo</surname> <given-names>S. H.</given-names></name>
<name><surname>He Dongjian</surname> <given-names>H. D.</given-names></name>
<name><surname>Zhang Haihui</surname> <given-names>Z. H.</given-names></name>
</person-group> (<year>2015</year>). 
<article-title>Fusion of K-means and Ncut algorithm to realize segmentation and reconstruction of two overlapped apples without blocking by branches and leaves</article-title>. <source>Trans. Chin. Soc. Agric. Eng.</source> <volume>31</volume>, <fpage>227</fpage>&#x2013;<lpage>234</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.11975/j.issn.1002-6819.2015.10.030</pub-id>
</mixed-citation>
</ref>
<ref id="B49">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wu</surname> <given-names>D.</given-names></name>
<name><surname>Jiang</surname> <given-names>S.</given-names></name>
<name><surname>Zhao</surname> <given-names>E.</given-names></name>
<name><surname>Liu</surname> <given-names>Y.</given-names></name>
<name><surname>Zhu</surname> <given-names>H.</given-names></name>
<name><surname>Wang</surname> <given-names>W.</given-names></name>
<etal/>
</person-group>. (<year>2022</year>). 
<article-title>Detection of <italic>Camellia oleifera</italic> fruit in complex scenes by using YOLOv7 and data augmentation</article-title>. <source>Appl. Sci.</source> <volume>12</volume>, <elocation-id>11318</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/app122211318</pub-id>
</mixed-citation>
</ref>
<ref id="B50">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Xiao</surname> <given-names>F.</given-names></name>
<name><surname>Wang</surname> <given-names>H.</given-names></name>
<name><surname>Xu</surname> <given-names>Y.</given-names></name>
<name><surname>Zhang</surname> <given-names>R.</given-names></name>
</person-group> (<year>2023</year>). 
<article-title>Fruit detection and recognition based on deep learning for automatic harvesting: An overview and review</article-title>. <source>Agronomy</source> <volume>13</volume>, <elocation-id>1625</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agronomy13061625</pub-id>
</mixed-citation>
</ref>
<ref id="B51">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Xu</surname> <given-names>L.</given-names></name>
<name><surname>Lv</surname> <given-names>J.</given-names></name>
</person-group> (<year>2018</year>). 
<article-title>Recognition method for apple fruit based on SUSAN and PCNN</article-title>. <source>Multimedia Tools Appl.</source> <volume>77</volume>, <fpage>7205</fpage>&#x2013;<lpage>7219</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11042-017-4629-6</pub-id>
</mixed-citation>
</ref>
<ref id="B52">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Xu</surname> <given-names>D.</given-names></name>
<name><surname>Ren</surname> <given-names>R.</given-names></name>
<name><surname>Zhao</surname> <given-names>H.</given-names></name>
<name><surname>Zhang</surname> <given-names>S.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Intelligent detection of muskmelon ripeness in greenhouse environment based on YOLO-RFEW</article-title>. <source>Agronomy</source> <volume>14</volume>, <elocation-id>1091</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agronomy14061091</pub-id>
</mixed-citation>
</ref>
<ref id="B53">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Yang</surname> <given-names>H.</given-names></name>
<name><surname>Lauren</surname> <given-names>C.</given-names></name>
<name><surname>Nebojsa</surname> <given-names>D.</given-names></name>
<name><surname>Erik</surname> <given-names>W.</given-names></name>
<name><surname>Predrag</surname> <given-names>B.</given-names></name>
</person-group> (<year>2012</year>). &#x201c;
<article-title>Performance analysis of EM-MPM and K-means clustering in 3D ultrasound breast image segmentation</article-title>,&#x201d; in <conf-name>IEEE International Conference on Electro/Information Technology</conf-name>, <conf-loc>Indianapolis, IN, USA</conf-loc> (
<publisher-name>IEE</publisher-name>). New York (USA): Institute of Electrical and Electronics Engineers (IEEE).
</mixed-citation>
</ref>
<ref id="B54">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhang</surname> <given-names>X.</given-names></name>
<name><surname>Gao</surname> <given-names>G.</given-names></name>
<name><surname>Chen</surname> <given-names>S.-W.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Polarimetric autocorrelation matrix: A new tool for joint characterizing of target polarization and Doppler scattering mechanism</article-title>. <source>IEEE Trans. Geosci. Remote Sens.</source> <volume>62</volume>, <fpage>65</fpage>&#x2013;<lpage>75</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TGRS.2024.3398632</pub-id>
</mixed-citation>
</ref>
<ref id="B55">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhang</surname> <given-names>Z.</given-names></name>
<name><surname>Igathinathane</surname> <given-names>C.</given-names></name>
<name><surname>Li</surname> <given-names>J.</given-names></name>
<name><surname>Cen</surname> <given-names>H.</given-names></name>
<name><surname>Lu</surname> <given-names>Y.</given-names></name>
<name><surname>Flores</surname> <given-names>P.</given-names></name>
</person-group> (<year>2020</year>). 
<article-title>Technology progress in mechanical harvest of fresh market apples</article-title>. <source>Comput. Electron. Agric.</source> <volume>175</volume>, <elocation-id>105606</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2020.105606</pub-id>
</mixed-citation>
</ref>
<ref id="B56">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zubair</surname> <given-names>M.</given-names></name>
<name><surname>Iqbal</surname> <given-names>M. A.</given-names></name>
<name><surname>Shil</surname> <given-names>A.</given-names></name>
<name><surname>Chowdhury</surname> <given-names>M.</given-names></name>
<name><surname>Moni</surname> <given-names>M. A.</given-names></name>
<name><surname>Sarker</surname> <given-names>I. H.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>An improved K-means clustering algorithm towards an efficient data-driven modeling</article-title>. <source>Ann. Data Sci.</source> <volume>11</volume>, <fpage>1525</fpage>&#x2013;<lpage>1544</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s40745-022-00428-2</pub-id>, PMID: <pub-id pub-id-type="pmid">40479183</pub-id>
</mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn id="n1" fn-type="custom" custom-type="edited-by">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1107972">Ning Yang</ext-link>, Jiangsu University, China</p></fn>
<fn id="n2" fn-type="custom" custom-type="reviewed-by">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1278541">Seyed-Hassan Miraei Ashtiani</ext-link>, Dalhousie University, Canada</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1584721">Zhenguo Zhang</ext-link>, Xinjiang Agricultural University, China</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1741145">Xi Zhang</ext-link>, Ministry of Natural Resources, China</p></fn>
</fn-group>
</back>
</article>