<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Plant Sci.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Plant Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Plant Sci.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">1664-462X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpls.2026.1761249</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>PTV2-Fr: a point cloud segmentation network for phenotypic trait extraction and gibberellin effect analysis in sorghum seedlings</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Li</surname><given-names>Junyi</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3301974/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Shao</surname><given-names>Yunqi</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Tian</surname><given-names>Luxu</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Zhang</surname><given-names>Ziyi</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Guo</surname><given-names>Yurong</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Zhong</surname><given-names>Zhibo</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Bai</surname><given-names>Ruxiao</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Yang</surname><given-names>Peng</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Pan</surname><given-names>Feng</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Fu</surname><given-names>Xiuqing</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>*</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2059924/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project-administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>College of Smart Agriculture (College of Artificial Intelligence), Nanjing Agricultural University</institution>, <city>Nanjing</city>,&#xa0;<country country="cn">China</country></aff>
<aff id="aff2"><label>2</label><institution>College of Engineering, Nanjing Agricultural University</institution>, <city>Nanjing</city>,&#xa0;<country country="cn">China</country></aff>
<aff id="aff3"><label>3</label><institution>Institute of Farmland Water Conservancy and Soil-Fertilizer, Xinjiang Academy of Agricultural Reclamation Science</institution>, <city>Shihezi</city>, <state>Xinjiang</state>,&#xa0;<country country="cn">China</country></aff>
<aff id="aff4"><label>4</label><institution>Institute of Mechanical Equipment, Xinjiang Academy of Agricultural Reclamation Science</institution>, <city>Shihezi</city>, <state>Xinjiang</state>,&#xa0;<country country="cn">China</country></aff>
<author-notes>
<corresp id="c001"><label>*</label>Correspondence: Xiuqing Fu, <email xlink:href="mailto:fuxiuqing@njau.edu.cn">fuxiuqing@njau.edu.cn</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-19">
<day>19</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>17</volume>
<elocation-id>1761249</elocation-id>
<history>
<date date-type="received">
<day>05</day>
<month>12</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>02</day>
<month>02</month>
<year>2026</year>
</date>
<date date-type="rev-recd">
<day>16</day>
<month>01</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2026 Li, Shao, Tian, Zhang, Guo, Zhong, Bai, Yang, Pan and Fu.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Li, Shao, Tian, Zhang, Guo, Zhong, Bai, Yang, Pan and Fu</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-19">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<p>Sorghum is a globally important crop. Under the breeding goals of high yield and stress resistance, the precise selection of elite germplasm is crucial. Phenotypic parameters such as plant height and leaf area at the seedling stage are core indicators for evaluating growth vitality. However, traditional manual measurement is inefficient and error-prone, making it difficult to meet the needs of high-throughput research. To address this, this study proposes an improved model (PTV2-Fr) based on Point Transformer V2 (PTV2), which combines 3D point cloud technology to realize the automatic extraction of sorghum seedling phenotypic parameters and explores the regulatory effects of different gibberellin (GA<sub>3</sub>) concentrations. In this study, videos of sorghum seedlings were collected using the relevant system of Nanjing Agricultural University, and reconstructed into.ply format 3D point cloud files via the open-source software Colmap. The core optimizations of the PTV2-Fr model are as follows: Firstly, it proposes a Multi-Radius Dual-Coordinate Attention (MRDCA) mechanism to address the problems of leaf overlap and uneven point cloud density, thereby enhancing feature discrimination ability; Secondly, it introduces a Point-Graph Invariant Feature Refinement (PG-InvFR) module to improve the sensitivity of the segmentation head to local geometric details; Thirdly, it constructs a composite loss function (EL Loss) combining class-weighted cross-entropy loss and Lov&#xe1;sz loss to alleviate class imbalance and boost segmentation accuracy. We selected 50 valid datasets from 112 video groups, annotated into three categories: Stem, Leaf, and Pot. The results show that PTV2-Fr outperforms PTV2 by 2.5% in accuracy, with significant improvements in Recall and mean F1-score (mF1). Ablation experiments confirm the positive effects of MRDCA, PG-InvFR, and EL Loss. Furthermore, PTV2-Fr demonstrates good robustness in analyzing GA concentrations, revealing that 50&#x2013;100 mg/L GA concentrations promote seedling growth, while concentrations exceeding 200 mg/L inhibit growth. The PTV2-Fr model provides an efficient solution for the automatic determination of sorghum seedling phenotypes, and the revealed GA<sub>3</sub> regulatory mechanism can offer theoretical references for high-quality seedling cultivation and hormone management.</p>
</abstract>
<kwd-group>
<kwd>gibberellin treatment</kwd>
<kwd>phenotypic trait extraction</kwd>
<kwd>point cloud segmentation</kwd>
<kwd>PTV2-Fr model</kwd>
<kwd>sorghum seedlings</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was received for this work and/or its publication. This work was supported by the Guiding Science and Technology Plan of the Xinjiang Production and Construction Corps(Grant number 2024ZD001), Alar Financial Science and Technology Plan Project of the First Division(Grant number 2024 NY02), Yazhou Bay Seed Lab in Hainan Province (Grant number B21HJ1005) and Jiangsu Province Seed Industry Revitalization Unveiled Project (Grant number JBGS(2021)007).</funding-statement>
</funding-group>
<counts>
<fig-count count="10"/>
<table-count count="9"/>
<equation-count count="25"/>
<ref-count count="44"/>
<page-count count="23"/>
<word-count count="12371"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Sustainable and Intelligent Phytoprotection</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>Sorghum is an annual diploid species. As one of the world&#x2019;s top five food crops, ranking alongside Zea mays, Oryza sativa, Triticum aestivum, and Hordeum vulgare, it serves as a staple food for over 500 million people globally. It is also an important grain and forage dual-purpose crop in arid and semi-arid regions of China and the world, possessing a C4 photosynthetic pathway (<xref ref-type="bibr" rid="B28">Paterson et&#xa0;al., 2009</xref>), high water use efficiency (<xref ref-type="bibr" rid="B18">Hossain et&#xa0;al., 2022</xref>), and strong drought tolerance (<xref ref-type="bibr" rid="B26">Mwamahonje et&#xa0;al., 2024</xref>). It plays a prominent role in ensuring grain and forage supply and developing the bioeconomy in climate-vulnerable regions. To fully exert this core value, breeding elite varieties with strong stress resistance and high productivity is the key path. Phenotypic parameters such as leaf area and stem length are crucial indicators in breeding programs to evaluate the growth vitality of sorghum seedlings and predict variety potential. Traditional phenotypic measurements of seedling traits such as plant height and leaf area often rely on manual determination or low-throughput 2D measurements, which are limited by low efficiency, high labor intensity, significant subjective bias, and insufficient repeatability and traceability (<xref ref-type="bibr" rid="B41">Zhou et&#xa0;al., 2017</xref>; <xref ref-type="bibr" rid="B38">Xiang et&#xa0;al., 2021</xref>). These limitations make it difficult to meet the needs of large-population verification and high-frequency measurements under multiple treatments. Moreover, the manual measurement process may cause certain damage to plants. For example, measuring stem height with a tape measure may lead to stem bending or even breaking, while leaf area measurement mostly requires detaching the leaves (<xref ref-type="bibr" rid="B11">Gao et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B36">Wang et&#xa0;al., 2025</xref>). Therefore, there is an urgent need to develop a rapid, accurate, and automated method for the segmentation, extraction, and calculation of sorghum seedling traits.</p>
<p>In the past, computer vision based on 2D images was one of the methods for measuring phenotypic traits of sorghum seedlings (<xref ref-type="bibr" rid="B20">Jingwen and Hong, 2012</xref>; <xref ref-type="bibr" rid="B34">Tu et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B21">Koyama, 2023</xref>). However, as a monocotyledon, sorghum is commonly cultivated with multiple plants per pot, which leads to frequent leaf occlusion between different plants. For 2D computer vision, it is difficult to accurately segment each leaf, resulting in a decrease in segmentation accuracy (<xref ref-type="bibr" rid="B35">Vayssade et&#xa0;al., 2022</xref>).</p>
<p>With advances in sensing and computer technologies, high-throughput plant phenotyping (HTPP) has evolved from 2D images to 3D reconstruction and point cloud analysis such as multi-view imaging and lidar (<xref ref-type="bibr" rid="B27">Nguyen et&#xa0;al., 2015</xref>; <xref ref-type="bibr" rid="B13">Golbach et&#xa0;al., 2016</xref>). Compared with traditional image processing and shallow machine learning, deep learning methods are more promising in terms of robustness to complex backgrounds, end-to-end feature learning, and cross-scene generalization ability, and have become the mainstream approach for seedling phenotypic measurement (<xref ref-type="bibr" rid="B15">Harandi et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B24">Merto&#x11f;lu et&#xa0;al., 2024</xref>; <xref ref-type="bibr" rid="B44">Zhu et&#xa0;al., 2024</xref>; <xref ref-type="bibr" rid="B33">Song et&#xa0;al., 2025</xref>). In particular, the PointNet algorithm has stood out among a series of vision models due to its ability to directly process sparse and irregular point sets while balancing local and multi-scale geometric features, and serves as a core object detection model in robotics, autonomous driving, and video surveillance (<xref ref-type="bibr" rid="B17">Heiwolt et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B3">Boogaard et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B22">Li et&#xa0;al., 2022</xref>). In the agricultural field, scholars worldwide have specifically optimized the PointNet model to address complex problems in agricultural production. For example, Qiaomei Deng et&#xa0;al (<xref ref-type="bibr" rid="B7">Deng et&#xa0;al., 2024</xref>)developed the CPHNet model that can effectively extract stem features of pumpkin seedlings with different shapes, achieving an mean Intersection over Union (mIoU) of 90.4%, mean Precision (mP) of 93.1%, mean Recall (mR) of 95.6%, and mean F1-score (mF1) of 94.4%. Miao, T. et&#xa0;al (<xref ref-type="bibr" rid="B25">Miao et&#xa0;al., 2021</xref>)proposed an algorithm for automatically segmenting maize seedling point clouds to separate young maize branches and leaves, with the algorithm&#x2019;s mP, mR, mF1, and mean Overall Accuracy(mOA) reaching 0.944, 0.956, 0.950, and 0.953, respectively. Jiacheng Shen et&#xa0;al (<xref ref-type="bibr" rid="B32">Shen et&#xa0;al., 2024</xref>)lightweighted PointNet++ to segment organ point clouds of cotton seedlings and extract phenotypic traits, with the algorithm&#x2019;s mP reaching 96.67%. Zhou, Y. et&#xa0;al (<xref ref-type="bibr" rid="B42">Zhou et&#xa0;al., 2025</xref>). proposed a non-destructive automatic extraction method for phenotypic traits of Phoebe zhennan seedlings based on 3D point clouds, realizing the extraction of stem and leaf phenotypic parameters through stem-leaf segmentation; the measurement accuracies of stem length, stem diameter, leaf length, leaf width, and leaf area reached 97.7%, 93.2%, 96.4%, 88.02%, and 85.84%, respectively. Liu, Z. et&#xa0;al (<xref ref-type="bibr" rid="B23">Liu et&#xa0;al., 2025</xref>)constructed a high-precision organ segmentation network for pumpkin seedling point clouds&#x2014;FACNet&#x2014;which achieved 95.06% mIoU, 96.87% mP, 98.02% mR, and 97.44% mF1 on the pumpkin seedling point cloud segmentation dataset. The SN-MGGE network proposed by Yonglong Zhang et&#xa0;al (<xref ref-type="bibr" rid="B39">Zhang et&#xa0;al., 2024</xref>). achieved mIoU and OA values of 94.90% and 97.43%, on the cucumber seedling dataset.</p>
<p>Recent advances in plant 3D point cloud analysis have shown a growing interest in deep learning&#x2013;based segmentation methods tailored for plant phenotyping. A 2025 comprehensive review highlights progress in machine-learning approaches for plant point cloud segmentation, including supervised and unsupervised strategies, and evaluates traditional and deep neural network&#x2013;based segmentation pipelines such as projection-, voxel-, and point-based models, emphasizing the challenges of data quality, scale variation, and annotation scarcity in plant point clouds (<xref ref-type="bibr" rid="B33">Song et&#xa0;al., 2025</xref>). In the context of semantic organ segmentation, recent studies have proposed multi-head hierarchical attention networks and attention-enhanced models that outperform classical feature-based methods, enabling more accurate discrimination of leaves, stems, and other plant organs in diverse environmental conditions (<xref ref-type="bibr" rid="B19">Jin et&#xa0;al., 2025</xref>).</p>
<p>In addition to methodological developments, there is active work on improving data resources and task generalization. New annotated datasets provide fine-grained organ-level labels across multiple species, supporting broader benchmarking and evaluation of segmentation models (<xref ref-type="bibr" rid="B12">Gilson et&#xa0;al., 2025</xref>). Approaches that aim to retain full resolution without extensive down-sampling, such as species-agnostic sub-sampling strategies, have shown promising results across different sensor modalities and plant types (<xref ref-type="bibr" rid="B10">Galba et&#xa0;al., 2025</xref>). Emerging research also explores unsupervised and self-supervised learning to reduce reliance on dense annotations, as well as frameworks for scalable organ segmentation that combine data, algorithmic, and computing perspectives to bridge gaps in practical applications (<xref ref-type="bibr" rid="B8">Du et&#xa0;al., 2025</xref>).</p>
<p>In summary, traditional seedling vitality testing cannot meet the needs of agricultural automation due to its shortcomings. Therefore, the future trend is to develop lightweight deep learning network models to rapidly and accurately measure the impacts of different abiotic stresses on seed germination.</p>
<p>Our study proposes a modified model (PTV2-Fr) based on PTV2, which can realize rapid and accurate semantic segmentation of leaves and stems of sorghum seedlings. From the segmentation results, stem length, stem diameter, and leaf area are extracted, providing necessary data support for evaluating the growth status of sorghum seedlings. To summarize, our main contributions are as follows:</p>
<p>1. Dataset construction: A manually annotated sorghum seedling point cloud dataset was established, consisting of 50 pots with 15&#x2013;25 sorghum seedlings per pot.</p>
<p>2. PTV2-Fr model: A novel point cloud semantic segmentation model (PTV2-Fr) is proposed. Specifically, the model replaces the original grouped vector attention module with MRDCA to enhance the ability to distinguish small organs and cross-scale structures of seedlings; introduces the PG-InvFR module between the decoder output and the segmentation head, which dynamically adapts weights for different spatial positions to further refine local point cloud features; and adopts the composite loss function EL Loss at the loss function level to alleviate class imbalance and directly improve the segmentation accuracy of IoU, boundaries, and small objects.</p>
<p>3. Provides a method for measuring stem length, stem diameter, and leaf area using point cloud data.</p>
<p>4. Sorghum seedling growth experiment: We conducted sorghum seedling growth experiments under different GA<sub>3</sub> concentration conditions to analyze the effects of hormone treatment on germination potential and early growth patterns. Numerous existing studies have indicated that GA<sub>3</sub> within an appropriate range can promote germination and seedling elongation, but excessive concentrations or specific stress scenarios may lead to a dose-response phenomenon of &#x201c;high-concentration promotion and low-concentration inhibition&#x201d;. For sorghum and related materials such as sweet sorghum, the common effective concentration range is tens to hundreds of mg&#xb7;L<sup>-</sup>&#xb9; or around 100 &#x3bc;M, with the optimal value varying by genotype and environment. Our experiment set up concentration gradients within this empirical range to characterize the temporal evolution trajectories of key seedling traits under different treatments.</p>
</sec>
<sec id="s2" sec-type="materials|methods">
<label>2</label>
<title>Materials and methods</title>
<sec id="s2_1">
<label>2.1</label>
<title>Experimental equipment and experimental design</title>
<p>The Crop Growth and Cultivation System constructed by us consists of a cultivation room environment control module and a rail-based image acquisition module. The overall structure of the Three-View Imaging System is built with aluminum profiles, and the specific configurations are shown in <xref ref-type="table" rid="T1"><bold>Tables&#xa0;1</bold></xref>, <xref ref-type="table" rid="T2"><bold>2</bold></xref>. The experimental system process from crop cultivation to image acquisition and growth monitoring is shown in <xref ref-type="fig" rid="f1"><bold>Figure&#xa0;1</bold></xref>.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Crop growth and cultivation system configurations.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Module name</th>
<th valign="middle" align="center">Component name</th>
<th valign="middle" align="center">Functional purpose</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="9" align="center">Cultivation Room Environmental Control Module</td>
<td valign="middle" align="center">Cultivation Box</td>
<td valign="middle" align="center">Overall bearing of cultivation, environmental control, and image - acquisition related components. Provides a closed cultivation space for crop growth.</td>
</tr>
<tr>
<td valign="middle" align="center">Touch Screen</td>
<td valign="middle" align="center">Installed on the top of the cultivation box for system operation and parameter setting.</td>
</tr>
<tr>
<td valign="middle" align="center">Control Buttons</td>
<td valign="middle" align="center">Integrated on the upper right of the cultivation box, responsible for power control, LED light source switching, and manual temperature adjustment.</td>
</tr>
<tr>
<td valign="middle" align="center">Perforated Partition</td>
<td valign="middle" align="center">Installed inside the cultivation box to divide the box into upper and lower layers for space - partitioned utilization.</td>
</tr>
<tr>
<td valign="middle" align="center">Acrylic Culture Tray</td>
<td valign="middle" align="center">A total of 6, placed in the upper layer of the cultivation box to carry crops for cultivation.</td>
</tr>
<tr>
<td valign="middle" align="center">Deionized Water Storage Area</td>
<td valign="middle" align="center">Located in the lower layer of the cultivation box to store deionized water required for experiments.</td>
</tr>
<tr>
<td valign="middle" align="center">Embedded PTC Hot Air Circulation System</td>
<td valign="middle" align="center">Installed on the side of the cultivation box. Operates to increase the temperature when the temperature is lower than the preset value and stops when it exceeds the upper limit to maintain temperature stability.</td>
</tr>
<tr>
<td valign="middle" align="center">Tp -100 Thermocouple</td>
<td valign="middle" align="center">Installed on the side of the cultivation box to monitor the chamber temperature in real - time.</td>
</tr>
<tr>
<td valign="middle" align="center">LED Light Source</td>
<td valign="middle" align="center">Installed on the side of the cultivation box to supplement lighting for crop growth.</td>
</tr>
<tr>
<td valign="middle" rowspan="2" align="center">Orbital Image Acquisition Module</td>
<td valign="middle" align="center">HIV VISION RGB Industrial Camera</td>
<td valign="middle" align="center">Used for image acquisition of crop growth status.</td>
</tr>
<tr>
<td valign="middle" align="center">Telecentric Lens</td>
<td valign="middle" align="center">Used in conjunction with the industrial camera to optimize the image acquisition effect.</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Three-view imaging system configurations.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Module name</th>
<th valign="middle" align="center">Component name</th>
<th valign="middle" align="center">Functional purpose</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">Overall Frame and Support Structure</td>
<td valign="middle" align="center">Aluminum Profile Frame</td>
<td valign="middle" align="center">Builds the overall support structure of the system, providing a stable installation foundation and an effective acquisition space.</td>
</tr>
<tr>
<td valign="middle" align="center">Imaging and Sensing Device</td>
<td valign="middle" align="center">Color Camera(MV-CS200-10GC)</td>
<td valign="middle" align="center">Core imaging component for capturing high-resolution RGB images of sorghum seedlings (5472 &#xd7; 3648 pixels) for multi-view 3D reconstruction.</td>
</tr>
<tr>
<td valign="middle" align="center">Motion - Drive Component</td>
<td valign="middle" align="center">Ball Screw Module</td>
<td valign="middle" align="center">Adjusts the spatial position of the camera to meet multi - angle imaging requirements.</td>
</tr>
<tr>
<td valign="middle" align="center">Acquisition Platform</td>
<td valign="middle" align="center">Electric Laser Rotary Table</td>
<td valign="middle" align="center">Carries sorghum seedlings and rotates in conjunction with the camera to achieve multi - angle non - destructive image acquisition.</td>
</tr>
<tr>
<td valign="middle" align="center">Control Device</td>
<td valign="middle" align="center">Customized Control Panel</td>
<td valign="middle" align="center">Centralized control of the camera and rotary table operation to ensure accurate multi - angle (front view, side view, top view, axonometric view) acquisition.</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p><bold>(A)</bold> Experimental scenario; <bold>(B)</bold> Cultivating sorghum seedlings; <bold>(C)</bold> Three-view imaging.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1761249-g001.tif">
<alt-text content-type="machine-generated">Laboratory scene with researchers operating automated plant phenotyping equipment, including climate-controlled growth chambers, computer workstations, a three-view imaging system, and examples of system controls and plant growth data visualizations.</alt-text>
</graphic></fig>
<p>The structural framework proposed in this paper for acquiring phenotypic information of sorghum seedlings consists of six distinct components, as shown in <xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2</bold></xref>. Firstly, Part (A) is the growth and cultivation of sorghum seedlings. Secondly, in Part (B), the cultivation of sorghum seedlings primarily involves setting appropriate parameters using the crop growth cultivation system for cultivation. Thirdly, in Part (C), the acquisition of sorghum point cloud data mainly involves video shooting and 3D reconstruction using the three-view imaging system. Fourthly, in Part (D), the point clouds corresponding to stems, leaves, and pots are annotated. Fifthly, in Part (E), PTV2-Fr is used for semantic segmentation of stems, leaves, and pots. Sixthly, in Part (F), the DBSCAN algorithm is utilized to perform instance segmentation on the sorghum point cloud data after semantic segmentation. Finally, in Part (G), four key phenotypic traits are extracted from the results: stem length, stem diameter, leaf length and leaf width.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p><bold>(A)</bold> Seed pretreatment; <bold>(B)</bold> Crop growth cultivation system; <bold>(C)</bold> Three-view imaging system; <bold>(D)</bold> Data annotation; <bold>(E)</bold> The PTV2-Fr overview; <bold>(F)</bold> Instance segmentation; <bold>(G)</bold> Phenotypic trait extraction.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1761249-g002.tif">
<alt-text content-type="machine-generated">A multi-panel scientific diagram outlines the workflow for sorghum seedling growth and automatic trait extraction using point cloud analysis. Panel A shows seeds undergoing soaking, triggering, and cultivation steps. Panel B provides experimental parameters such as seed count, incubation time, temperature, and number of boxes. Panel C illustrates a three-view imaging system setup for data collection. Panel D compares original and labeled point clouds of seedlings. Panel E details the semantic segmentation network architecture. Panel F distinguishes semantic and instance segmentation results. Panel G visualizes extracted leaf and stem traits, including leaf size and stem height.</alt-text>
</graphic></fig>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Seedling phenotypic image acquisition and dataset construction</title>
<p>We focused on the sorghum seedling stage, sowing 25 Nuoyou No. 1 sorghum seeds in a 5&#xd7;5 grid in each acrylic culture tray (25 cm in length, 25 cm in width, and 5 cm in height). To ensure the efficiency of subsequent training, the data were collected from a total of 112 video groups of sorghum seedlings before the germination of the third leaf. After manual screening, 50 valid video groups were selected for further processing. These video groups were reconstructed into 3D point clouds using Colmap, based on two perspectives (front view and axonometric view). We generated.ply files in Colmap using two perspectives (axonometric view and front view), and manually annotated these 50 files with CloudCompare to produce 50 high-quality point cloud datasets. Point-level semantic annotation was performed manually using CloudCompare. Annotators labeled points into three classes (leaf, stem, and pot) by interactively selecting regions in 3D space from multiple viewpoints. To improve annotation consistency, a unified annotation guideline was established prior to labeling, particularly for stem&#x2013;leaf junctions and densely occluded regions. For ambiguous boundary regions where leaves partially overlapped with stems, labels were assigned based on the dominant anatomical structure in the local neighborhood rather than isolated points. All annotated samples were visually inspected after labeling, and inconsistent annotations were corrected through cross-checking by a second annotator. Ambiguous boundary points were labeled conservatively to minimize noise propagation in model training. To enrich data diversity and improve training accuracy, sorghum seedling leaves were annotated as &#x201c;leaf&#x201d;, stems as &#x201c;stem&#x201d;, and acrylic culture trays as &#x201c;pot&#x201d;. To reduce computational complexity, all point clouds of acrylic culture trays were downsampled to 4096 points using Furthest Point Sampling (FPS), which helped retain key features while simplifying the data. This sampling resolution was chosen as a trade-off between preserving fine-grained geometric details of seedling organs and maintaining computational efficiency for transformer-based networks. Preliminary experiments showed that using fewer points led to noticeable degradation in stem&#x2013;leaf boundary segmentation, whereas higher point counts provided marginal performance gains at a substantially increased computational cost. Importantly, the pot (i.e., the physical cultivation unit) is defined as the fundamental unit for dataset partitioning. Although each pot was scanned at multiple time points, all point clouds originating from the same pot (i.e., all time points and derived point blocks) were assigned to the same data subset to avoid temporal data leakage. The dataset was therefore partitioned at the pot level into training, validation, and test sets with an approximate ratio of 7:2:1 (pots), respectively. This pot-level grouping ensures that the model is evaluated on entirely unseen physical plants.</p>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>PTV2-Fr sorghum seedling semantic segmentation design</title>
<p>This paper introduces PTV2-Fr, a point cloud semantic segmentation network built on the enhanced PTV2 backbone, specifically designed for the task of distinguishing plant organs in sorghum seedling point clouds. The overall architecture of PTV2-Fr is shown in (<xref ref-type="fig" rid="f3"><bold>Figure&#xa0;3A</bold></xref>).</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p><bold>(A)</bold> The PTV2-Fr architecture; <bold>(B)</bold> The PG-InvFR architecture; <bold>(C)</bold> The MRDCA architecture.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1761249-g003.tif">
<alt-text content-type="machine-generated">Three labeled diagrams represent neural network architectures for processing human pose or coordinate data. Panel A features a detailed schematic with modules for pose embedding, multiple hierarchical attention and upsampling blocks, and final multi-layer perceptron output for pose estimation. Panel B outlines a pipeline starting with K nearest neighbors, incorporating geometry cues, a kernel generator, uncertainty estimation, point involution, and feature fusion for output. Panel C depicts a linear alignment followed by multi-scale KNN grouping, coordinate attention, aggregation, feature concatenation, and final convolution, batch normalization, and activation. Each panel uses arrows to show data flow and module sequence.</alt-text>
</graphic></fig>
<p>In the feature extraction stage, the MRDCA module is proposed to enhance local geometry and coordinate awareness. This module first learns fine-grained, medium-scale, and coarse-scale patch feature representations through parallel multi-radius branches. Then, the outputs of each branch are fused by concatenation or summation and input into the improved Dual-Coordinate Attention mechanism, which integrates relative positions, normalized absolute coordinates, and channel statistics (avg/max pooling). Adaptive weighting is achieved through learnable gating, significantly improving the model&#x2019;s ability to distinguish subtle structures, such as leaf edges and leaf-stem junctions.</p>
<p>In the decoding output stage, the model incorporates the PG-InvFR module, which iteratively refines neighborhood features before the segmentation head, effectively strengthening the boundary representation between leaves and stems. To improve the model&#x2019;s robustness, the training process includes point cloud data augmentation, to address the class imbalance issue and directly optimize the IoU metric, thereby improving the overall robustness and segmentation accuracy of the model.</p>
<sec id="s2_3_1">
<label>2.3.1</label>
<title>Point-graph involution feature refinement</title>
<p>In plant point cloud segmentation tasks, PTV2 lacks a feature refinement mechanism for &#x201c;category boundary gradient&#x201d; regions, leading to misclassification in fuzzy areas such as the leaf-stem junction, leaf edges, and flower pot edges. To address this issue, the Flora-NET model proposed by Gupta et&#xa0;al. is adopted (<xref ref-type="bibr" rid="B14">Gupta and Tripathi, 2025</xref>). This model introduces involution into flower image classification and designs the Involution-Based Feature Refinement(Inv-FR) module. The model performs hierarchical feature refinement on image grids using &#x201c;spatially specific, channel-independent&#x201d; dynamic kernels, which enhances the expression of petal boundaries and fine-grained structures in complex backgrounds.</p>
<p>However, there are significant differences between point cloud data and images: point clouds have uneven density, and organ boundaries often exhibit continuous gradients, while the geometric anisotropy and semantic uncertainty of organs such as leaves, petioles, and stems are more pronounced. Directly transferring the Inv-FR module from image-based scenarios cannot fully adapt to point clouds. Therefore, an improved version of InvFR for point clouds, called PG-InvFR, is proposed. This version introduces geometric directional constraints, allowing the dynamic kernel to capture directional features in the point cloud neighborhood, especially in areas like leaf edges and organ transition zones. Additionally, the module incorporates boundary uncertainty modulation, enabling adaptive adjustment of kernel weights in ambiguous regions, which enhances robustness at category transition points. Furthermore, the original grid-based neighborhood is replaced with a k-NN-based point cloud neighborhood graph, ensuring stable feature refinement even with sparse and irregular sampling (<xref ref-type="bibr" rid="B29">Qi et&#xa0;al., 2017a</xref>).</p>
<p>Furthermore, beyond replacing the original grid-based neighborhood with a k-NN point cloud graph, PG-InvFR introduces point-cloud-specific refinements: the dynamic kernel generation function is additionally conditioned on directional geometric cues (e.g., distances and local orientation), and an uncertainty-aware modulation term scales the refinement strength in ambiguous boundary regions. These extensions make PG-InvFR a point-cloud-oriented refinement module rather than a simple graph-based adaptation of the original Inv-FR. Through these improvements, the PG-InvFR module not only inherits the efficient dynamic modeling advantages of involution in Flora-NET but also achieves more robust boundary segmentation performance tailored for plant point clouds. It supports sorghum seedling stem-leaf segmentation and parameter extraction. The architecture of PG-InvFR is shown in (<xref ref-type="fig" rid="f3"><bold>Figure&#xa0;3B</bold></xref>).</p>
<p>Involution, unlike traditional convolutions, utilizes spatially varying kernels that are independent of the channel dimension. Specifically, the kernel <inline-formula>
<mml:math display="inline" id="im1"><mml:mrow><mml:msub><mml:mi>H</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> is determined by the following dynamic function, as shown in <xref ref-type="disp-formula" rid="eq1">Equation 1</xref>:</p>
<disp-formula id="eq1"><label>(1)</label>
<mml:math display="block" id="M1"><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:msub><mml:mi>H</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mi>&#x3c6;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>F</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mi>R</mml:mi><mml:mrow><mml:mi>K</mml:mi><mml:mo>&#xd7;</mml:mo><mml:mi>K</mml:mi></mml:mrow></mml:msup></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>Here, <inline-formula>
<mml:math display="inline" id="im2"><mml:mrow><mml:mo>&#xa0;</mml:mo><mml:msub><mml:mi>F</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> denotes the local feature at position <inline-formula>
<mml:math display="inline" id="im3"><mml:mrow><mml:mo>&#xa0;</mml:mo><mml:mo>&#xa0;</mml:mo><mml:mi>i</mml:mi><mml:mo>&#xa0;</mml:mo></mml:mrow></mml:math></inline-formula>, and <inline-formula>
<mml:math display="inline" id="im4"><mml:mrow><mml:mtext>&#x3c6;</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mo>&#xb7;</mml:mo><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> is the dynamic kernel generation function.</p>
<p>The final output feature is obtained as shown in <xref ref-type="disp-formula" rid="eq2">Equation 2</xref>:</p>
<disp-formula id="eq2"><label>(2)</label>
<mml:math display="block" id="M2"><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:msubsup><mml:mi>F</mml:mi><mml:mi>i</mml:mi><mml:mrow><mml:mtext>out</mml:mtext></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>n</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x2208;</mml:mo><mml:mtext>&#x394;</mml:mtext><mml:mi>K</mml:mi></mml:mrow></mml:munder></mml:mstyle><mml:msub><mml:mi>H</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>n</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#xb7;</mml:mo><mml:msub><mml:mi>F</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>+</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>m</mml:mi><mml:mo>,</mml:mo><mml:mi>n</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>Where <inline-formula>
<mml:math display="inline" id="im5"><mml:mrow><mml:mo>&#xa0;</mml:mo><mml:mtext>&#x394;</mml:mtext><mml:mi>K</mml:mi><mml:mo>&#xa0;</mml:mo></mml:mrow></mml:math></inline-formula> is a 2D grid region, indicating the positions of the different kernels applied at various locations.</p>
<p>For the point cloud, we replace the traditional grid-based neighborhood with a k-NN (k-Nearest Neighbors) neighborhood, as shown in <xref ref-type="disp-formula" rid="eq3">Equation 3</xref>:</p>
<disp-formula id="eq3"><label>(3)</label>
<mml:math display="block" id="M3"><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:mi>N</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mo>{</mml:mo><mml:mi>j</mml:mi><mml:mo>&#x2223;</mml:mo><mml:mrow><mml:mo>|</mml:mo><mml:mrow><mml:msub><mml:mi>p</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>p</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow><mml:mo>|</mml:mo></mml:mrow><mml:mo>&lt;</mml:mo><mml:mi>r</mml:mi><mml:mo>}</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>Where <inline-formula>
<mml:math display="inline" id="im6"><mml:mrow><mml:msub><mml:mi>p</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula>
<mml:math display="inline" id="im7"><mml:mrow><mml:msub><mml:mi>p</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> are the coordinates of the points <inline-formula>
<mml:math display="inline" id="im8"><mml:mi>i</mml:mi></mml:math></inline-formula> and <inline-formula>
<mml:math display="inline" id="im9"><mml:mi>j</mml:mi></mml:math></inline-formula>, respectively, and <inline-formula>
<mml:math display="inline" id="im10"><mml:mrow><mml:mo>&#xa0;</mml:mo><mml:mi>r</mml:mi></mml:mrow></mml:math></inline-formula> defines the neighborhood radius.</p>
<p>In the point cloud neighborhood graph, normal vectors and principal curvature directions are introduced to modulate the kernel weight generation function, as shown in <xref ref-type="disp-formula" rid="eq4">Equation 4</xref>:</p>
<disp-formula id="eq4"><label>(4)</label>
<mml:math display="block" id="M4"><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:msub><mml:mi>H</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mi>&#x3c6;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>F</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>d</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:msub><mml:mi>M</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>M</mml:mi><mml:mi>c</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mrow><mml:mo stretchy="false">[</mml:mo><mml:mrow><mml:msub><mml:mi>F</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>d</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">]</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>Where <inline-formula>
<mml:math display="inline" id="im11"><mml:mrow><mml:msub><mml:mi>d</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> is the distance information from point <inline-formula>
<mml:math display="inline" id="im12"><mml:mi>i</mml:mi></mml:math></inline-formula>, and <inline-formula>
<mml:math display="inline" id="im13"><mml:mrow><mml:msub><mml:mi>M</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula>
<mml:math display="inline" id="im14"><mml:mrow><mml:msub><mml:mi>M</mml:mi><mml:mi>c</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> are transformation functions applied to the feature and distance information.</p>
<p>This enables the dynamic kernel to adapt to geometric differences in different directions.</p>
<p>Based on the class probability distribution entropy of the neighboring points, the uncertainty measure <inline-formula>
<mml:math display="inline" id="im15"><mml:mrow><mml:msub><mml:mi>U</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> for each point is defined as shown in <xref ref-type="disp-formula" rid="eq5">Equation 5</xref>:</p>
<disp-formula id="eq5"><label>(5)</label>
<mml:math display="block" id="M5"><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:msub><mml:mi>U</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mo>&#x2212;</mml:mo><mml:mstyle displaystyle="true"><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mi>c</mml:mi></mml:munder></mml:mstyle><mml:msub><mml:mi>p</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow></mml:msub><mml:mi>log</mml:mi><mml:msub><mml:mi>p</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>Where <inline-formula>
<mml:math display="inline" id="im16"><mml:mrow><mml:msub><mml:mi>p</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> represents the probability of the <inline-formula>
<mml:math display="inline" id="im17"><mml:mrow><mml:mo>&#xa0;</mml:mo><mml:mi>i</mml:mi></mml:mrow></mml:math></inline-formula>-th point belonging to class <inline-formula>
<mml:math display="inline" id="im18"><mml:mrow><mml:mo>&#xa0;</mml:mo><mml:mi>c</mml:mi></mml:mrow></mml:math></inline-formula>.</p>
<p>To enhance the adaptability of the kernel in uncertain regions, we introduce the uncertainty weight <inline-formula>
<mml:math display="inline" id="im19"><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mtext>&#x3b1;</mml:mtext><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>, which is derived by applying the sigmoid function to the entropy, as shown in <xref ref-type="disp-formula" rid="eq6">Equation 6</xref>:</p>
<disp-formula id="eq6"><label>(6)</label>
<mml:math display="block" id="M6"><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:msub><mml:mtext>&#x3b1;</mml:mtext><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mi>&#x3c3;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>U</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>Where <inline-formula>
<mml:math display="inline" id="im20"><mml:mrow><mml:mtext>&#x3c3;</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mo>&#xb7;</mml:mo><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> is the sigmoid function.</p>
<p>The final output feature <inline-formula>
<mml:math display="inline" id="im21"><mml:mrow><mml:msubsup><mml:mi>F</mml:mi><mml:mi>i</mml:mi><mml:mrow><mml:mtext>out</mml:mtext></mml:mrow></mml:msubsup></mml:mrow></mml:math></inline-formula> is computed with a weighted sum, where the weight is determined by the uncertainty weight <inline-formula>
<mml:math display="inline" id="im22"><mml:mrow><mml:msub><mml:mi>&#x3b1;</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> and the relationship matrix <inline-formula>
<mml:math display="inline" id="im23"><mml:mrow><mml:msub><mml:mi>H</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> between neighboring points, as shown in <xref ref-type="disp-formula" rid="eq7">Equation 7</xref>:</p>
<disp-formula id="eq7"><label>(7)</label>
<mml:math display="block" id="M7"><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:msubsup><mml:mi>F</mml:mi><mml:mi>i</mml:mi><mml:mrow><mml:mtext>out</mml:mtext></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:msub><mml:mtext>&#x3b1;</mml:mtext><mml:mi>i</mml:mi></mml:msub><mml:mstyle displaystyle="true"><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mi>N</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:munder></mml:mstyle><mml:msub><mml:mi>H</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>&#xb7;</mml:mo><mml:msub><mml:mi>f</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>Here, <inline-formula>
<mml:math display="inline" id="im24"><mml:mrow><mml:msub><mml:mi>f</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> is the feature of the <inline-formula>
<mml:math display="inline" id="im25"><mml:mrow><mml:mo>&#xa0;</mml:mo><mml:mo>&#xa0;</mml:mo><mml:mi>j</mml:mi></mml:mrow></mml:math></inline-formula>-th point, and <inline-formula>
<mml:math display="inline" id="im26"><mml:mrow><mml:mi>N</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> denotes the neighborhood of the <inline-formula>
<mml:math display="inline" id="im27"><mml:mrow><mml:mo>&#xa0;</mml:mo><mml:mi>i</mml:mi></mml:mrow></mml:math></inline-formula>-th point.</p>
<p>Here, the original grid <inline-formula>
<mml:math display="inline" id="im28"><mml:mrow><mml:mtext>&#x394;</mml:mtext><mml:mi>K</mml:mi><mml:mo>&#xa0;</mml:mo></mml:mrow></mml:math></inline-formula> is replaced by a k-NN/ball neighborhood-based point cloud graph to ensure stable kernel behavior under sparse and irregular sampling conditions. The final output feature is as shown in <xref ref-type="disp-formula" rid="eq8">Equation 8</xref>:</p>
<disp-formula id="eq8"><label>(8)</label>
<mml:math display="block" id="M8"><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:mo>&#xa0;</mml:mo><mml:mo>&#xa0;</mml:mo><mml:msubsup><mml:mi>f</mml:mi><mml:mi>i</mml:mi><mml:mrow><mml:mtext>out</mml:mtext></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:msub><mml:mtext>&#x3b1;</mml:mtext><mml:mi>i</mml:mi></mml:msub><mml:mstyle displaystyle="true"><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mi>N</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:munder></mml:mstyle><mml:msub><mml:mi>H</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>&#xb7;</mml:mo><mml:msub><mml:mi>f</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>Thus, when the uncertainty is high, the network increases the adaptability of the dynamic kernel, thereby reducing boundary misclassification.</p>
</sec>
<sec id="s2_3_2">
<label>2.3.2</label>
<title>Multi-radius dual-coordinate attention</title>
<p>In sorghum seedling point clo organ segmentation, challenges such as overlapping leaves, uneven point density, and significant organ scale differences arise. While the original attention mechanism of PTV2 performs excellently in general point cloud tasks, it faces structural limitations when directly applied to seedling point clouds (<xref ref-type="bibr" rid="B4">Chu et&#xa0;al., 2024</xref>; <xref ref-type="bibr" rid="B43">Zhou et&#xa0;al., 2024</xref>). Specifically, in scenes with uneven point density and prominent organ scale differences, this mechanism can confuse features from different leaves or the leaf-stem junction. Short-range neighborhoods struggle to capture global semantics across leaves, while long-range neighborhoods may overwhelm fine boundary information, resulting in blurred boundaries and misclassification. Furthermore, although the Relative Positional Encoding (RPE) in PTV2 provides positional information for attention, its single encoding form is often insufficient to express complex geometric differences in highly localized geometric changes, such as overlapping leaves and fine stems. This leads to an inability to fully highlight subtle but discriminative relative positional features.</p>
<p>To address these issues, we propose the MRDCA module to enhance local geometry and coordinate awareness. The architecture of MRDCA is shown in (<xref ref-type="fig" rid="f3"><bold>Figure&#xa0;3C</bold></xref>). Compared with existing multi-scale or coordinate attention mechanisms used in point-based transformers, MRDCA introduces several design differences tailored to 3D plant point clouds. Most prior multi-scale attention modules rely on fixed receptive fields and stack multiple independent blocks, or simply concatenate features computed with different neighborhood sizes, while coordinate information is injected through a single positional encoding stream. In contrast, MRDCA realizes multi-radius modeling inside one unified module by constructing parallel local branches with different radii/neighbor sizes and dynamically aggregating them through learnable gates. Moreover, instead of mixing geometric cues in one branch, MRDCA explicitly separates relative geometric offsets, absolute 3D coordinates, and channel-wise feature statistics into distinct projections before fusion. This decoupled design allows the attention mechanism to emphasize subtle but discriminative positional cues such as leaf tips and leaf&#x2013;stem junctions, and to better handle highly non-uniform point densities that are characteristic of crop canopies.</p>
<p>In the patch embedding stage, parallel branches with different receptive fields (defined by different radii or the number of neighbors <inline-formula>
<mml:math display="inline" id="im29"><mml:mi>K</mml:mi></mml:math></inline-formula>) are constructed to capture geometric features at various scales. These features include local features for leaf edges and fine stems, as well as global features for inter-leaf relationships and context. The multi-scale information is then integrated into the subsequent Transformer module using strategies like concatenation or summation. The input point features are first dimensionally aligned as shown in <xref ref-type="disp-formula" rid="eq9">Equation 9</xref>:</p>
<disp-formula id="eq9"><label>(9)</label>
<mml:math display="block" id="M9"><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:msup><mml:mi>f</mml:mi><mml:mrow><mml:mtext>proj</mml:mtext></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mtext>ReLU</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext>PointBN</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mtext>Linear</mml:mtext></mml:mrow><mml:mrow><mml:mtext>proj</mml:mtext></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>f</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>After alignment, <inline-formula>
<mml:math display="inline" id="im30"><mml:mrow><mml:msup><mml:mi>f</mml:mi><mml:mrow><mml:mtext>proj</mml:mtext></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula> is divided into three branches (fine/medium/coarse, each with 16 channels), corresponding to short/medium/long receptive fields. Each branch constructs a KNN neighborhood with a different neighborhood size (<inline-formula>
<mml:math display="inline" id="im31"><mml:mrow><mml:mi>e</mml:mi><mml:mo>.</mml:mo><mml:mi>g</mml:mi><mml:mo>.</mml:mo><mml:mo>,</mml:mo><mml:mo>&#xa0;</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mo>&#xa0;</mml:mo><mml:mi>K</mml:mi><mml:mn>1</mml:mn><mml:mo>&#xa0;</mml:mo><mml:mo>=</mml:mo><mml:mo>&#xa0;</mml:mo><mml:mn>8</mml:mn><mml:mo>,</mml:mo><mml:mo>&#xa0;</mml:mo><mml:mi>K</mml:mi><mml:mn>2</mml:mn><mml:mo>&#xa0;</mml:mo><mml:mo>=</mml:mo><mml:mo>&#xa0;</mml:mo><mml:mn>16</mml:mn><mml:mo>,</mml:mo><mml:mo>&#xa0;</mml:mo><mml:mi>K</mml:mi><mml:mn>3</mml:mn><mml:mo>&#xa0;</mml:mo><mml:mo>=</mml:mo><mml:mo>&#xa0;</mml:mo><mml:mn>24</mml:mn><mml:mo>&#xa0;</mml:mo></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>) and performs feature embedding and aggregation within its respective neighborhood as shown in <xref ref-type="disp-formula" rid="eq10">Equation 10</xref>:</p>
<disp-formula id="eq10"><label>(10)</label>
<mml:math display="block" id="M10"><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:msub><mml:mi>F</mml:mi><mml:mrow><mml:mtext>mr</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munder><mml:mo>&#x2295;</mml:mo><mml:mrow><mml:mi>r</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mi>s</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>r</mml:mi><mml:mi>m</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>r</mml:mi><mml:mi>l</mml:mi></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:munder></mml:mstyle><mml:msub><mml:mrow><mml:mtext>Embed</mml:mtext></mml:mrow><mml:mi>r</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mi>f</mml:mi><mml:mrow><mml:mtext>proj</mml:mtext></mml:mrow></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>Where <inline-formula>
<mml:math display="inline" id="im32"><mml:mo>&#x2295;</mml:mo></mml:math></inline-formula> represents concatenation or summation across the channels.</p>
<p>Dual-Coordinate Attention (DCA) computes neighborhood attention by simultaneously utilizing fine-grained relative features <inline-formula>
<mml:math display="inline" id="im33"><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext>rel</mml:mtext></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>, normalized absolute coordinate encoding <inline-formula>
<mml:math display="inline" id="im34"><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext>abs</mml:mtext></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>, and channel-level statistics (avg/max pooling). It adaptively assigns weights between these signals via learnable gates (pool_gate/gate). This enables the model to amplify small but discriminative relative position differences (e.g., subtle displacements at the leaf-stem junction) while using absolute positional information within instances to avoid mis-clustering leaves due to local similarities.</p>
<p>For point <inline-formula>
<mml:math display="inline" id="im35"><mml:mrow><mml:mo>&#xa0;</mml:mo><mml:mi>i</mml:mi><mml:mo>&#xa0;</mml:mo></mml:mrow></mml:math></inline-formula> in the branch and its neighbor <inline-formula>
<mml:math display="inline" id="im36"><mml:mrow><mml:mo>&#xa0;</mml:mo><mml:mo>&#xa0;</mml:mo><mml:mi>j</mml:mi><mml:mo>&#xa0;</mml:mo></mml:mrow></mml:math></inline-formula>, we define as shown in <xref ref-type="disp-formula" rid="eq11">Equations 11</xref> and <xref ref-type="disp-formula" rid="eq12">12</xref>:</p>
<disp-formula id="eq11"><label>(11)</label>
<mml:math display="block" id="M11"><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:msub><mml:mi>q</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mi>&#x3c6;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>W</mml:mi><mml:mi>q</mml:mi></mml:msub><mml:msub><mml:mi>f</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mtext>&#x2003;</mml:mtext><mml:msub><mml:mi>k</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mi>&#x3c6;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>W</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:msub><mml:mi>f</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mtext>&#x2003;</mml:mtext><mml:msub><mml:mi>v</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>W</mml:mi><mml:mi>v</mml:mi></mml:msub><mml:msub><mml:mi>f</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<disp-formula id="eq12"><label>(12)</label>
<mml:math display="block" id="M12"><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:mi>&#x394;</mml:mi><mml:msub><mml:mi>p</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>p</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>p</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mtext>&#x2003;</mml:mtext><mml:mi>&#x394;</mml:mi><mml:msub><mml:mi>f</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>f</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>f</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mtext>&#x2003;</mml:mtext><mml:msubsup><mml:mi>p</mml:mi><mml:mi>i</mml:mi><mml:mrow><mml:mtext>norm</mml:mtext></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:msub><mml:mi>p</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>&#x2212;</mml:mo><mml:mi>min</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>p</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>max</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>p</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mi>min</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>p</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:mfrac></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>The attention weights are computed as shown in <xref ref-type="disp-formula" rid="eq13">Equations 13</xref> and <xref ref-type="disp-formula" rid="eq14">14</xref>:</p>
<disp-formula id="eq13"><label>(13)</label>
<mml:math display="block" id="M13"><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:msub><mml:mi>w</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mtext>Softmax</mml:mtext></mml:mrow><mml:mi>j</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:msub><mml:mi>q</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:msubsup><mml:mi>k</mml:mi><mml:mi>j</mml:mi><mml:mi>T</mml:mi></mml:msubsup></mml:mrow><mml:mrow><mml:msqrt><mml:mi>d</mml:mi></mml:msqrt></mml:mrow></mml:mfrac><mml:mo>+</mml:mo><mml:msub><mml:mtext>&#x3c8;</mml:mtext><mml:mrow><mml:mtext>rel</mml:mtext></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext>&#x394;</mml:mtext><mml:msub><mml:mi>p</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mtext>&#x394;</mml:mtext><mml:msub><mml:mi>f</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>+</mml:mo><mml:msub><mml:mtext>&#x3c8;</mml:mtext><mml:mrow><mml:mtext>abs</mml:mtext></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mi>p</mml:mi><mml:mi>i</mml:mi><mml:mrow><mml:mtext>norm</mml:mtext></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>+</mml:mo><mml:msub><mml:mrow><mml:mtext>&#x3b1;&#x3c8;</mml:mtext></mml:mrow><mml:mrow><mml:mtext>pool</mml:mtext></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext>&#x394;</mml:mtext><mml:msub><mml:mi>f</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>The weighted sum of features is then computed as:</p>
<disp-formula id="eq14"><label>(14)</label>
<mml:math display="block" id="M14"><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:mo>&#xa0;</mml:mo><mml:mo>&#xa0;</mml:mo><mml:msubsup><mml:mi>f</mml:mi><mml:mi>i</mml:mi><mml:mrow><mml:mtext>enh</mml:mtext></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>&#x2208;</mml:mo><mml:msub><mml:mi>N</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:munder></mml:mstyle><mml:msub><mml:mi>w</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mi>v</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>Where <inline-formula>
<mml:math display="inline" id="im37"><mml:mrow><mml:msub><mml:mtext>&#x3c8;</mml:mtext><mml:mrow><mml:mtext>rel</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula>
<mml:math display="inline" id="im38"><mml:mrow><mml:msub><mml:mtext>&#x3c8;</mml:mtext><mml:mrow><mml:mtext>abs</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>, and <inline-formula>
<mml:math display="inline" id="im39"><mml:mrow><mml:msub><mml:mtext>&#x3c8;</mml:mtext><mml:mrow><mml:mtext>pool</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> represent MLP projections for relative position, absolute coordinates, and pooling operations, respectively. <inline-formula>
<mml:math display="inline" id="im40"><mml:mrow><mml:mo>&#xa0;</mml:mo><mml:mtext>&#x3b1;</mml:mtext></mml:mrow></mml:math></inline-formula> is a learnable parameter, and <inline-formula>
<mml:math display="inline" id="im41"><mml:mrow><mml:mtext>&#x3c6;</mml:mtext><mml:mo>=</mml:mo><mml:mtext>ReLU</mml:mtext><mml:mo>&#x2218;</mml:mo><mml:mtext>PointBN</mml:mtext></mml:mrow></mml:math></inline-formula>.</p>
<p>The enhanced features from the three branches are concatenated to form a multi-scale representation and optionally aligned by a linear layer before being fed into the backbone/decoder, as shown in <xref ref-type="disp-formula" rid="eq15">Equations 15</xref> and <xref ref-type="disp-formula" rid="eq16">16</xref>:</p>
<disp-formula id="eq15"><label>(15)</label>
<mml:math display="block" id="M15"><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:msup><mml:mi>f</mml:mi><mml:mrow><mml:mtext>MRDCA</mml:mtext></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mtext>Concat</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mi>f</mml:mi><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mi>s</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mtext>enh</mml:mtext></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mi>f</mml:mi><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mi>m</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mtext>enh</mml:mtext></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mi>f</mml:mi><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mi>l</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mtext>enh</mml:mtext></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<disp-formula id="eq16"><label>(16)</label>
<mml:math display="block" id="M16"><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:mo>&#xa0;</mml:mo><mml:mo>&#xa0;</mml:mo><mml:msup><mml:mi>f</mml:mi><mml:mrow><mml:mtext>pre</mml:mtext></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mi>&#x3c6;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>W</mml:mi><mml:mrow><mml:mtext>fuse</mml:mtext></mml:mrow></mml:msub><mml:msup><mml:mi>f</mml:mi><mml:mrow><mml:mtext>MRDCA</mml:mtext></mml:mrow></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
</sec>
<sec id="s2_3_3">
<label>2.3.3</label>
<title>EL loss</title>
<p>To address the challenges of class imbalance (e.g., low proportion of stem samples), blurry boundary prediction, and small target misdetection in plant point cloud segmentation tasks, this paper designs a hybrid loss function, EL Loss, which combines Weighted Cross-Entropy Loss and Multiclass Lov&#xe1;sz Loss (<xref ref-type="bibr" rid="B2">Berman et&#xa0;al., 2018</xref>). By jointly optimizing class balance and boundary precision, EL Loss improves the model&#x2019;s segmentation performance for fine categories, such as plant stems and flower pots.</p>
<p>The Weighted Cross-Entropy Loss is based on the standard cross-entropy loss, with a learnable weight coefficient introduced for each class to alleviate the model bias caused by class imbalance in the dataset. To mitigate class imbalance, class weights were computed based on the point distribution after FPS downsampling. Specifically, the proportion of each semantic class was calculated using the downsampled training point clouds, and inverse-frequency weighting was applied during loss computation.</p>
<p>Compared to cross-entropy loss, Lov&#xe1;sz Loss focuses on the probability error of individual points. By minimizing the Lov&#xe1;sz distance between the &#x201c;prediction error set&#x201d; and the &#x201c;true class set,&#x201d; it emphasizes the overall consistency of the segmentation regions. This is especially helpful for improving the blurry boundary issue (e.g., between the leaf and stem junction or the edges of flower pots), while also increasing the recall of small targets (e.g., fine stems).</p>
<p>To leverage both the &#x201c;class balancing ability&#x201d; of the Weighted Cross-Entropy Loss and the &#x201c;boundary optimization ability&#x201d; of Lov&#xe1;sz Loss, this paper linearly combines the two losses with a specific weight ratio, forming the final hybrid loss function, as shown in <xref ref-type="disp-formula" rid="eq17">Equation 17</xref>.</p>
<disp-formula id="eq17"><label>(17)</label>
<mml:math display="block" id="M17"><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:msub><mml:mi>L</mml:mi><mml:mrow><mml:mtext>EL</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>&#x3b1;</mml:mi><mml:msub><mml:mi>L</mml:mi><mml:mrow><mml:mtext>CE</mml:mtext></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:mi>&#x3b2;</mml:mi><mml:msub><mml:mi>L</mml:mi><mml:mrow><mml:mtext>Lov&#xe1;sz</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>Where <inline-formula>
<mml:math display="inline" id="im42"><mml:mrow><mml:msub><mml:mi>L</mml:mi><mml:mrow><mml:mtext>CE</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> is the Weighted Cross-Entropy Loss, <inline-formula>
<mml:math display="inline" id="im43"><mml:mrow><mml:msub><mml:mi>L</mml:mi><mml:mrow><mml:mtext>Lov&#xe1;sz</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> is the Lov&#xe1;sz Loss, and <inline-formula>
<mml:math display="inline" id="im44"><mml:mtext>&#x3b1;</mml:mtext></mml:math></inline-formula> and <inline-formula>
<mml:math display="inline" id="im45"><mml:mtext>&#x3b2;</mml:mtext></mml:math></inline-formula> are hyperparameters that control the relative weights of the two loss functions.</p>
</sec>
<sec id="s2_3_4">
<label>2.3.4</label>
<title>Phenotypic trait extraction</title>
<p>The raw point cloud is obtained from the 3D scanning of the plant. Due to uniform scaling biases in the reconstruction coordinates, the three axes are first scaled by a constant factor. All subsequent distance thresholds are set based on the real scale (in meters) after scaling. Using color coding for bitwise matching, three subsets are obtained: stem points, leaf points (the base points are excluded from the analysis).</p>
<p>Project the point cloud onto the <inline-formula>
<mml:math display="inline" id="im46"><mml:mrow><mml:mi>x</mml:mi><mml:mi>y</mml:mi></mml:mrow></mml:math></inline-formula> plane and use DBSCAN to perform density-based clustering. Let <inline-formula>
<mml:math display="inline" id="im47"><mml:mi>&#x3f5;</mml:mi></mml:math></inline-formula> be the radius, and <inline-formula>
<mml:math display="inline" id="im48"><mml:mrow><mml:mtext>minPts</mml:mtext></mml:mrow></mml:math></inline-formula> be the minimum number of neighboring points. The <inline-formula>
<mml:math display="inline" id="im49"><mml:mi>&#x3f5;</mml:mi></mml:math></inline-formula>-neighborhood of point <inline-formula>
<mml:math display="inline" id="im50"><mml:mrow><mml:msub><mml:mi>p</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> is defined as shown in <xref ref-type="disp-formula" rid="eq18">Equation 18</xref>:</p>
<disp-formula id="eq18"><label>(18)</label>
<mml:math display="block" id="M18"><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:msub><mml:mi>N</mml:mi><mml:mi>&#x3f5;</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>p</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mo>{</mml:mo><mml:msub><mml:mi>p</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>&#x2223;</mml:mo><mml:mo>|</mml:mo><mml:msub><mml:mi>p</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>p</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:msub><mml:mo>|</mml:mo><mml:mn>2</mml:mn></mml:msub><mml:mo>&#x2264;</mml:mo><mml:mi>&#x3f5;</mml:mi><mml:mo>}</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>When <inline-formula>
<mml:math display="inline" id="im51"><mml:mrow><mml:mrow><mml:mo>|</mml:mo><mml:mrow><mml:msub><mml:mi>N</mml:mi><mml:mi>&#x3f5;</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>p</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>|</mml:mo></mml:mrow><mml:mo>&#x2265;</mml:mo><mml:mtext>minPts</mml:mtext></mml:mrow></mml:math></inline-formula>, <inline-formula>
<mml:math display="inline" id="im52"><mml:mrow><mml:msub><mml:mi>p</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> is considered a core point, and density-reachable relationships form a cluster. Each cluster is treated as a &#x201c;cluster point&#x201d;, and points labeled as -1 are deleted. To match the experimental scale, the coordinates are scaled to actual sizes, and the default interval is set to <inline-formula>
<mml:math display="inline" id="im53"><mml:mrow><mml:msub><mml:mi>s</mml:mi><mml:mrow><mml:mtext>stem</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>0.03</mml:mn><mml:mtext>&#x2009;m</mml:mtext></mml:mrow></mml:math></inline-formula> (3 cm), with <inline-formula>
<mml:math display="inline" id="im54"><mml:mrow><mml:mtext>minPts</mml:mtext><mml:mo>=</mml:mo><mml:mn>80</mml:mn></mml:mrow></mml:math></inline-formula>. For each plant <inline-formula>
<mml:math display="inline" id="im55"><mml:mi>c</mml:mi></mml:math></inline-formula>, the distance to the centroid, <inline-formula>
<mml:math display="inline" id="im56"><mml:mrow><mml:msub><mml:mtext>&#x3bc;</mml:mtext><mml:mrow><mml:mtext>stem</mml:mtext></mml:mrow></mml:msub><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mi>R</mml:mi><mml:mn>2</mml:mn></mml:msup></mml:mrow></mml:math></inline-formula>, is recorded as a parameter associated with the cluster point.</p>
<p>In the RANSAC Line Fitting process, two points, <inline-formula>
<mml:math display="inline" id="im57"><mml:mrow><mml:msub><mml:mi>p</mml:mi><mml:mn>0</mml:mn></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula>
<mml:math display="inline" id="im58"><mml:mrow><mml:msub><mml:mi>p</mml:mi><mml:mn>1</mml:mn></mml:msub></mml:mrow></mml:math></inline-formula>, are randomly selected to define the direction <inline-formula>
<mml:math display="inline" id="im59"><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>v</mml:mi></mml:mstyle></mml:math></inline-formula> of the line. The distance from each point to the line is calculated using the formula, as shown in <xref ref-type="disp-formula" rid="eq19">Equation 19</xref>:</p>
<disp-formula id="eq19"><label>(19)</label>
<mml:math display="block" id="M19"><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:mi>d</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mo>|</mml:mo><mml:mo>|</mml:mo><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>x</mml:mi><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>p</mml:mi><mml:mn>0</mml:mn></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#xd7;</mml:mo><mml:mover accent="true"><mml:mi>v</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mo>|</mml:mo><mml:mo>|</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mtext>&#x2003;</mml:mtext><mml:mover accent="true"><mml:mi>v</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mo>=</mml:mo><mml:mfrac><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>v</mml:mi></mml:mstyle><mml:mrow><mml:mrow><mml:mo>|</mml:mo><mml:mo>|</mml:mo><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>v</mml:mi></mml:mstyle><mml:mo>|</mml:mo><mml:mo>|</mml:mo></mml:mrow></mml:mrow></mml:mfrac></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>Here, <inline-formula>
<mml:math display="inline" id="im60"><mml:mrow><mml:mtext>d</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mtext>x</mml:mtext><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> represents the perpendicular distance from a point xxx to the line defined by <inline-formula>
<mml:math display="inline" id="im61"><mml:mrow><mml:msub><mml:mi>p</mml:mi><mml:mn>0</mml:mn></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula>
<mml:math display="inline" id="im62"><mml:mrow><mml:msub><mml:mi>p</mml:mi><mml:mn>1</mml:mn></mml:msub></mml:mrow></mml:math></inline-formula>. The point distance threshold is set to <inline-formula>
<mml:math display="inline" id="im63"><mml:mrow><mml:mi>&#x3c4;</mml:mi><mml:mo>=</mml:mo><mml:mn>0.01</mml:mn><mml:mtext>&#x2009;</mml:mtext><mml:mi>c</mml:mi><mml:mi>m</mml:mi></mml:mrow></mml:math></inline-formula>, and the algorithm is run for 120 iterations, after which the most frequent model is selected as the final fit.</p>
<p>In the PCA Precision step, Singular Value Decomposition (SVD) is performed on the set of points, centered around their mean position using the function. The first principal component <inline-formula>
<mml:math display="inline" id="im64"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mi>c</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> is chosen as the direction of maximum variance, with the constraint that <inline-formula>
<mml:math display="inline" id="im65"><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>c</mml:mi><mml:mo>,</mml:mo><mml:mi>z</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2265;</mml:mo><mml:mn>0</mml:mn></mml:mrow></mml:math></inline-formula>. The points are then centered around <inline-formula>
<mml:math display="inline" id="im66"><mml:mrow><mml:msub><mml:mover accent="true"><mml:mi>p</mml:mi><mml:mo>&#xaf;</mml:mo></mml:mover><mml:mi>c</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, ensuring that the data points are aligned to maximize the direction of variance.</p>
<p>To avoid cross-plant connections, all leaf points are first assigned to the corresponding plant <inline-formula>
<mml:math display="inline" id="im67"><mml:mtext>c</mml:mtext></mml:math></inline-formula> based on the closest stem cluster centroid. This ensures that the leaf processing within each plant does not interfere with others. For each plant <inline-formula>
<mml:math display="inline" id="im68"><mml:mtext>c</mml:mtext></mml:math></inline-formula>&#x2018;s set of leaf points, PCA and Delaunay triangulation are used to estimate the geometric properties:</p>
<p>For leaf length/width, PCA is applied to the points, where the first and third principal components <inline-formula>
<mml:math display="inline" id="im69"><mml:mrow><mml:msub><mml:mi>u</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>u</mml:mi><mml:mn>2</mml:mn></mml:msub></mml:mrow></mml:math></inline-formula> are used to project the points onto the principal axes. The leaf&#x2019;s length and width are then computed as the differences between the maximum and minimum projections along the <inline-formula>
<mml:math display="inline" id="im70"><mml:mrow><mml:msub><mml:mi>u</mml:mi><mml:mn>1</mml:mn></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula>
<mml:math display="inline" id="im71"><mml:mrow><mml:msub><mml:mi>u</mml:mi><mml:mn>2</mml:mn></mml:msub></mml:mrow></mml:math></inline-formula> axes, as shown in <xref ref-type="disp-formula" rid="eq20">Equation 20</xref>:</p>
<disp-formula id="eq20"><label>(20)</label>
<mml:math display="block" id="M20"><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mi>max</mml:mi><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mtext>&#x3c0;</mml:mtext><mml:mn>1</mml:mn></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mi>min</mml:mi><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mtext>&#x3c0;</mml:mtext><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo></mml:mrow><mml:mo>}</mml:mo></mml:mrow><mml:mtext>&#x2003;</mml:mtext><mml:mo>&#x2218;</mml:mo><mml:mi>W</mml:mi><mml:mo>=</mml:mo><mml:mi>max</mml:mi><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mtext>&#x3c0;</mml:mtext><mml:mn>2</mml:mn></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mi>min</mml:mi><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mtext>&#x3c0;</mml:mtext><mml:mn>2</mml:mn></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>For leaf area, the Delaunay triangulation is performed on the points projected onto the <inline-formula>
<mml:math display="inline" id="im72"><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>&#x3c0;</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x3c0;</mml:mi><mml:mn>2</mml:mn></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> plane. The area of each triangle in the triangulation is calculated using the formula for the surface area of a triangle in 3D space, as shown in <xref ref-type="disp-formula" rid="eq21">Equation 21</xref>:</p>
<disp-formula id="eq21"><label>(21)</label>
<mml:math display="block" id="M21"><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:mi>A</mml:mi><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mtext>&#x394;</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:munder></mml:mstyle><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac><mml:mrow><mml:mo>|</mml:mo><mml:mo>|</mml:mo><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>p</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>p</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#xd7;</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>p</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>p</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>|</mml:mo><mml:mo>|</mml:mo></mml:mrow></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>Here, the summation is over the triangles formed by the points, and <inline-formula>
<mml:math display="inline" id="im73"><mml:mrow><mml:msub><mml:mi>p</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>p</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>p</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> are the vertices of the triangle. This calculation gives the total area of the leaf projected onto the plane.</p>
<p>This approach is robust to slight wrinkling and yields a 3D leaf area closer to the actual measurement than a direct 2D area estimate.</p>
</sec>
<sec id="s2_3_5">
<label>2.3.5</label>
<title>Model evaluation metrics</title>
<p>To assess the effectiveness of semantic segmentation, the following metrics are used: Intersection over Union (IoU), Precision, Recall, and F1 score. These metrics provide a multi-dimensional evaluation of the model&#x2019;s performance, ensuring the reliability and comprehensiveness of the evaluation, as shown in <xref ref-type="disp-formula" rid="eq22">Equations 22</xref>&#x2013;<xref ref-type="disp-formula" rid="eq25">25</xref>.</p>
<disp-formula id="eq22"><label>(22)</label>
<mml:math display="block" id="M22"><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>+</mml:mo><mml:mi>F</mml:mi><mml:mi>P</mml:mi></mml:mrow></mml:mfrac></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<disp-formula id="eq23"><label>(23)</label>
<mml:math display="block" id="M23"><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:mi>R</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mi>l</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>+</mml:mo><mml:mi>F</mml:mi><mml:mi>N</mml:mi></mml:mrow></mml:mfrac></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<disp-formula id="eq24"><label>(24)</label>
<mml:math display="block" id="M24"><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:mi>F</mml:mi><mml:mn>1</mml:mn><mml:mo>&#x2212;</mml:mo><mml:mi>s</mml:mi><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mo>=</mml:mo><mml:mn>2</mml:mn><mml:mo>&#xd7;</mml:mo><mml:mfrac><mml:mrow><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mo>&#xd7;</mml:mo><mml:mi>R</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mi>l</mml:mi></mml:mrow><mml:mrow><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mo>+</mml:mo><mml:mi>R</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:mfrac></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<disp-formula id="eq25"><label>(25)</label>
<mml:math display="block" id="M25"><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:mi>I</mml:mi><mml:mi>o</mml:mi><mml:mi>U</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>+</mml:mo><mml:mi>F</mml:mi><mml:mi>P</mml:mi><mml:mo>+</mml:mo><mml:mi>F</mml:mi><mml:mi>N</mml:mi></mml:mrow></mml:mfrac></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>Where TP, FP, and FN represent true positive, false positive, and false negative, respectively.</p>
</sec>
</sec>
</sec>
<sec id="s3" sec-type="results">
<label>3</label>
<title>Results</title>
<sec id="s3_1">
<label>3.1</label>
<title>Operating environment</title>
<p>To ensure that the results of PTV2-Fr are not affected by different experimental conditions, all experiments in this study were conducted on an Ubuntu 18.04 server equipped with 120 vCPUs (AMD EPYC 7642 48-Core Processor). Acceleration was achieved using an NVIDIA RTX 3090 GPU with 24GB of video memory. The PTV2-Fr method was implemented based on the CUDA 11.3 + PyTorch 1.12.1 framework. <xref ref-type="table" rid="T3"><bold>Table&#xa0;3</bold></xref> presents the various system configurations used for experimental simulation.</p>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Configuration of the experimental simulation system.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Hardware configuration</th>
<th valign="middle" align="center">Software configuration</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">RAM:32GB</td>
<td valign="middle" align="center">OS: Ubuntu 18.04</td>
</tr>
<tr>
<td valign="middle" align="center">CPU: 120 vCPU (AMD EPYC 7642 48-Core Processor)</td>
<td valign="middle" align="center">Pytorch: 1.12.1</td>
</tr>
<tr>
<td valign="middle" align="center">GPU: NVIDIA RTX 3090 (24GB)</td>
<td valign="middle" align="center">CUDA version:11.3</td>
</tr>
<tr>
<td valign="middle" align="center">Memory: 90GB</td>
<td valign="middle" align="center">Python: 3.8.20</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>For implementation with the Pointcept framework, the dataset files are organized using an S3DISDataset-style directory convention. We emphasize that the &#x201c;Area_1&#x2013;Area_6&#x201d; labels are an implementation-level organization scheme and do not represent literal spatial regions in the experiment. Each Area corresponds to a fixed subset of pots (i.e., complete pot-level groups including all time points). During all experiments, the mapping from pots to Areas was fixed, and all time points from the same pot were mapped to the same Area. The target classes are three semantic labels, with the ignored label set to -1. The data root directory is data/train_2, with the target categories clearly defined as three classes: leaf, stem, and pot, and the ignored label set to -1. During the model training phase, the dataset was divided into training, validation, and test sets at a ratio of 7:2:1. The training data included 50 annotated point cloud files categorized into three classes (leaf, stem, and pot), corresponding to 35 pots in the training set, 10 pots in the validation set, and 5 pots in the test set. The training data consisted of 50 point cloud files, annotated into three categories: leaf, stem, and pot. We used a batch size of 32 and trained the model for 100 epochs. The AdamW optimizer was applied with a learning rate of 0.001 and weight decay of 0.05. The learning rate was adjusted using a MultiStepLR scheduler, decaying by a factor of 0.05 after 60 and 80 epochs. To enhance the generalization ability and robustness of the model, a number of targeted data augmentation techniques have been integrated during the training process, as detailed in <xref ref-type="table" rid="T4"><bold>Table&#xa0;4</bold></xref>.</p>
<table-wrap id="T4" position="float">
<label>Table&#xa0;4</label>
<caption>
<p>Data augmentation techniques used in training.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Augmentation method</th>
<th valign="middle" align="center">Specific parameters</th>
<th valign="middle" align="center">Core function</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">RandomScale</td>
<td valign="middle" align="center">Randomly scale the scale to 0.9-1.1 times</td>
<td valign="middle" align="center">Simulate target scale changes caused by different distances during point cloud acquisition and improve the model&#x2019;s adaptability to scale differences.</td>
</tr>
<tr>
<td valign="middle" align="center">RandomFlip</td>
<td valign="middle" align="center">Perform random flipping with a probability of 0.5</td>
<td valign="middle" align="center">Enhance the model&#x2019;s ability to recognize targets in different postures through symmetric transformation.</td>
</tr>
<tr>
<td valign="middle" align="center">RandomJitter</td>
<td valign="middle" align="center">Point cloud jitter, using Gaussian noise with sigma=0.005 and a clipping range of 0.02</td>
<td valign="middle" align="center">Simulate noise interference during sensor acquisition and reduce the model&#x2019;s overfitting risk to noisy data.</td>
</tr>
<tr>
<td valign="middle" align="center">ElasticDistortion</td>
<td valign="middle" align="center">Elastic distortion with parameters [[0.2, 0.4], [0.8, 1.6]]</td>
<td valign="middle" align="center">Simulate natural morphological deformation during plant growth or slight deformation during acquisition and improve the model&#x2019;s segmentation robustness for non-rigidly deformed targets.</td>
</tr>
<tr>
<td valign="middle" align="center">SphereCrop</td>
<td valign="middle" align="center">Spherical cropping, maximum number of points 80000, random mode</td>
<td valign="middle" align="center">Control the number of input point clouds per batch, avoiding video memory overflow while retaining local feature details.</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Ablation experiments</title>
<p>In the process of our experiments, the control variable method was adopted, and EL Loss, MRDCA, and PG-InvFR were integrated sequentially. By combining these three components, a total of eight ablation experiments were conducted to verify their effectiveness. The baseline model for these experiments is PTV2. The experimental results are presented in <xref ref-type="table" rid="T5"><bold>Table&#xa0;5</bold></xref>.</p>
<table-wrap id="T5" position="float">
<label>Table&#xa0;5</label>
<caption>
<p>Ablation experiment results.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Index</th>
<th valign="middle" align="center">PTV2</th>
<th valign="middle" align="center">+EL Loss</th>
<th valign="middle" align="center">+MRDCA</th>
<th valign="middle" align="center">+PG-InvFR</th>
<th valign="middle" align="center">+EL Loss +MRDCA</th>
<th valign="middle" align="center">+EL Loss +PG-InvFR</th>
<th valign="middle" align="center">+MRDCA +PG-InvFR</th>
<th valign="middle" align="center">PTV2-Fr</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="4" align="center">IoU</td>
<td valign="middle" align="center">88.38</td>
<td valign="middle" align="center">88.28</td>
<td valign="middle" align="center">88.45</td>
<td valign="middle" align="center">89.00</td>
<td valign="middle" align="center">88.66</td>
<td valign="middle" align="center">89.44</td>
<td valign="middle" align="center">90.30</td>
<td valign="middle" align="center">88.64</td>
</tr>
<tr>
<td valign="middle" align="center">79.28</td>
<td valign="middle" align="center">79.40</td>
<td valign="middle" align="center">79.44</td>
<td valign="middle" align="center">79.81</td>
<td valign="middle" align="center">80.08</td>
<td valign="middle" align="center">80.66</td>
<td valign="middle" align="center">82.33</td>
<td valign="middle" align="center">83.48</td>
</tr>
<tr>
<td valign="middle" align="center">96.37</td>
<td valign="middle" align="center">99.97</td>
<td valign="middle" align="center">99.98</td>
<td valign="middle" align="center">99.00</td>
<td valign="middle" align="center">99.99</td>
<td valign="middle" align="center">99.99</td>
<td valign="middle" align="center">99.87</td>
<td valign="middle" align="center">99.48</td>
</tr>
<tr>
<td valign="middle" align="center">88.01</td>
<td valign="middle" align="center">89.22</td>
<td valign="middle" align="center">89.29</td>
<td valign="middle" align="center">89.27</td>
<td valign="middle" align="center">89.58</td>
<td valign="middle" align="center">90.03</td>
<td valign="middle" align="center">90.83</td>
<td valign="middle" align="center">90.53</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="center">Precision</td>
<td valign="middle" align="center">96.23</td>
<td valign="middle" align="center">97.98</td>
<td valign="middle" align="center">97.59</td>
<td valign="middle" align="center">96.77</td>
<td valign="middle" align="center">96.29</td>
<td valign="middle" align="center">97.21</td>
<td valign="middle" align="center">98.84</td>
<td valign="middle" align="center">94.66</td>
</tr>
<tr>
<td valign="middle" align="center">83.00</td>
<td valign="middle" align="center">82.02</td>
<td valign="middle" align="center">85.65</td>
<td valign="middle" align="center">84.41</td>
<td valign="middle" align="center">86.31</td>
<td valign="middle" align="center">86.64</td>
<td valign="middle" align="center">89.42</td>
<td valign="middle" align="center">91.42</td>
</tr>
<tr>
<td valign="middle" align="center">96.37</td>
<td valign="middle" align="center">99.99</td>
<td valign="middle" align="center">99.98</td>
<td valign="middle" align="center">99.99</td>
<td valign="middle" align="center">99.99</td>
<td valign="middle" align="center">99.99</td>
<td valign="middle" align="center">99.93</td>
<td valign="middle" align="center">99.66</td>
</tr>
<tr>
<td valign="middle" align="center">91.87</td>
<td valign="middle" align="center">93.33</td>
<td valign="middle" align="center">94.41</td>
<td valign="middle" align="center">93.72</td>
<td valign="middle" align="center">94.20</td>
<td valign="middle" align="center">94.61</td>
<td valign="middle" align="center">96.06</td>
<td valign="middle" align="center">95.25</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="center">Recall</td>
<td valign="middle" align="center">90.66</td>
<td valign="middle" align="center">89.92</td>
<td valign="middle" align="center">90.42</td>
<td valign="middle" align="center">91.73</td>
<td valign="middle" align="center">90.05</td>
<td valign="middle" align="center">91.79</td>
<td valign="middle" align="center">91.91</td>
<td valign="middle" align="center">93.09</td>
</tr>
<tr>
<td valign="middle" align="center">94.61</td>
<td valign="middle" align="center">96.13</td>
<td valign="middle" align="center">95.34</td>
<td valign="middle" align="center">93.60</td>
<td valign="middle" align="center">96.72</td>
<td valign="middle" align="center">94.50</td>
<td valign="middle" align="center">93.88</td>
<td valign="middle" align="center">90.71</td>
</tr>
<tr>
<td valign="middle" align="center">99.97</td>
<td valign="middle" align="center">99.97</td>
<td valign="middle" align="center">99.98</td>
<td valign="middle" align="center">99.99</td>
<td valign="middle" align="center">99.99</td>
<td valign="middle" align="center">99.99</td>
<td valign="middle" align="center">99.88</td>
<td valign="middle" align="center">99.81</td>
</tr>
<tr>
<td valign="middle" align="center">95.08</td>
<td valign="middle" align="center">95.34</td>
<td valign="middle" align="center">95.25</td>
<td valign="middle" align="center">95.11</td>
<td valign="middle" align="center">95.59</td>
<td valign="middle" align="center">95.43</td>
<td valign="middle" align="center">95.22</td>
<td valign="middle" align="center">94.54</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="center">F1-Score</td>
<td valign="middle" align="center">93.83</td>
<td valign="middle" align="center">93.78</td>
<td valign="middle" align="center">93.87</td>
<td valign="middle" align="center">94.18</td>
<td valign="middle" align="center">93.99</td>
<td valign="middle" align="center">94.42</td>
<td valign="middle" align="center">95.30</td>
<td valign="middle" align="center">93.86</td>
</tr>
<tr>
<td valign="middle" align="center">88.42</td>
<td valign="middle" align="center">88.52</td>
<td valign="middle" align="center">88.54</td>
<td valign="middle" align="center">88.77</td>
<td valign="middle" align="center">88.94</td>
<td valign="middle" align="center">89.30</td>
<td valign="middle" align="center">91.60</td>
<td valign="middle" align="center">91.05</td>
</tr>
<tr>
<td valign="middle" align="center">98.15</td>
<td valign="middle" align="center">99.99</td>
<td valign="middle" align="center">99.99</td>
<td valign="middle" align="center">99.99</td>
<td valign="middle" align="center">99.99</td>
<td valign="middle" align="center">99.99</td>
<td valign="middle" align="center">99.93</td>
<td valign="middle" align="center">99.73</td>
</tr>
<tr>
<td valign="middle" align="center">93.47</td>
<td valign="middle" align="center">94.10</td>
<td valign="middle" align="center">94.13</td>
<td valign="middle" align="center">94.31</td>
<td valign="middle" align="center">94.31</td>
<td valign="middle" align="center">94.57</td>
<td valign="middle" align="center">95.61</td>
<td valign="middle" align="center">94.88</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>By comparing the experimental results of PTV2 and +MRDCA, mIoU increased by 1.28% and mP increased by 2.57%. This indicates that the MRDCA module can effectively capture multi-scale features and enhance coordinate attention, thereby improving the overall segmentation accuracy of the model. Comparing the baseline model with the +PG-InvFR, mIoU increased by 1.26%. This result shows that the PG-InvFR module can refine point features by generating adaptive weights based on neighborhood relationships, which is particularly beneficial for distinguishing fine-grained parts such as leaves and stems. By comparing the results of PTV2 and the +Loss, it is known that after integrating Lov&#xe1;sz loss on the basis of cross-entropy loss, mIoU increased by 1.21%, which confirms that this composite loss strategy can directly optimize the segmentation metrics.</p>
<p>The results of all eight experimental groups confirm that the hybrid loss function, MRDCA module, and PG-InvFR module all make significant contributions to improving the model&#x2019;s segmentation performance. Overall, the finally proposed PTV2-Fr model outperforms the baseline model PTV2 in most accuracy metrics: the mIoU increases by 2.52%, the mP rises by 3.38%, and the mF1 improves by 1.41%. Therefore, it can be concluded that in the organ segmentation task on this dataset, the PTV2-Fr model performs better than the baseline model PTV2.</p>
<p>To quantify not only the accuracy gain but also the computational footprint of each component, we report in <xref ref-type="table" rid="T6"><bold>Table&#xa0;6</bold></xref> the trainable parameters, peak GPU memory, and average inference time for all eight model variants in the ablation study. As shown in <xref ref-type="table" rid="T6"><bold>Table&#xa0;6</bold></xref>, MRDCA and PG-InvFR provide consistent improvements in segmentation performance with only modest increases in model size, memory usage, and latency, while the EL loss enhances boundary segmentation without affecting inference-time complexity. Overall, the final PTV2-Fr configuration achieves a favorable trade-off between segmentation accuracy and throughput for high-throughput phenotyping applications.</p>
<table-wrap id="T6" position="float">
<label>Table&#xa0;6</label>
<caption>
<p>Training hyperparameters used for baseline models and the proposed PTV2-Fr.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Metrics</th>
<th valign="middle" align="center">PTV2</th>
<th valign="middle" align="center">+EL LOSS</th>
<th valign="middle" align="center">+MRDCA</th>
<th valign="middle" align="center">+PG-InvFR</th>
<th valign="middle" align="center">+EL LOSS +MRDCA</th>
<th valign="middle" align="center">+EL LOSS + PG-InvFR</th>
<th valign="middle" align="center">+MRDCA + PG-InvFR</th>
<th valign="middle" align="center">PTV2-Fr</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">Params</td>
<td valign="middle" align="center">3.96</td>
<td valign="middle" align="center">3.96</td>
<td valign="middle" align="center">3.94</td>
<td valign="middle" align="center">3.96</td>
<td valign="middle" align="center">3.94</td>
<td valign="middle" align="center">3.96</td>
<td valign="middle" align="center">3.94</td>
<td valign="middle" align="center">3.94</td>
</tr>
<tr>
<td valign="middle" align="center">Mem(MB)</td>
<td valign="middle" align="center">1811.9</td>
<td valign="middle" align="center">1811.9</td>
<td valign="middle" align="center">1824.5</td>
<td valign="middle" align="center">1812.0</td>
<td valign="middle" align="center">1824.5</td>
<td valign="middle" align="center">1812.0</td>
<td valign="middle" align="center">1824.5</td>
<td valign="middle" align="center">1824.5</td>
</tr>
<tr>
<td valign="middle" align="center">Time</td>
<td valign="middle" align="center">169.90</td>
<td valign="middle" align="center">170.90</td>
<td valign="middle" align="center">207.91</td>
<td valign="middle" align="center">178.69</td>
<td valign="middle" align="center">209.82</td>
<td valign="middle" align="center">178.50</td>
<td valign="middle" align="center">216.16</td>
<td valign="middle" align="center">220.09</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Furthermore, to evaluate model robustness, we employed a five-fold cross-validation protocol conducted at the pot level. Specifically, the set of pots was partitioned into five folds such that all point clouds and derived point blocks from the same pot were assigned to the same fold. In each iteration, four folds (&#x2248;80% of pots) were used for training and one fold (&#x2248;20% of pots) was used for validation; the final held-out test set remained completely independent of the cross-validation process. Cross-validation results are reported as mean &#xb1; standard deviation across the five folds for all main metrics to characterize result variability and model stability. The cross-validation outcomes are shown in <xref ref-type="fig" rid="f4"><bold>Figure&#xa0;4</bold></xref>. To provide a statistical summary of model stability, the average performance is additionally reported as mean &#xb1; standard deviation across the five folds, as summarized in <xref ref-type="table" rid="T7"><bold>Table&#xa0;7</bold></xref>. The experimental results are shown in <xref ref-type="fig" rid="f4"><bold>Figure&#xa0;4</bold></xref>.</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>The results of k-fold cross-validation.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1761249-g004.tif">
<alt-text content-type="machine-generated">Four line graphs compare segmentation metrics for leaf, stem, and pot classes over five folds. Top left shows mean Intersection over Union (mIoU), top right shows mean Precision (mP), bottom left shows mean Recall (mR), and bottom right shows mean F1 score (mF1). Pot class maintains consistently high values around one hundred percent, while leaf and stem classes have lower, more variable scores across all metrics. Average values are plotted for each metric, summarizing overall performance across folds.</alt-text>
</graphic></fig>
<table-wrap id="T7" position="float">
<label>Table&#xa0;7</label>
<caption>
<p>Mean &#xb1; standard deviation of five-fold cross-validation results for the PTV2-Fr model.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Metric</th>
<th valign="middle" align="center">Mean(%)</th>
<th valign="middle" align="center">Std(%)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">mIou</td>
<td valign="middle" align="center">90.53</td>
<td valign="middle" align="center">0.46</td>
</tr>
<tr>
<td valign="middle" align="center">mP</td>
<td valign="middle" align="center">95.25</td>
<td valign="middle" align="center">0.21</td>
</tr>
<tr>
<td valign="middle" align="center">mR</td>
<td valign="middle" align="center">94.54</td>
<td valign="middle" align="center">0.56</td>
</tr>
<tr>
<td valign="middle" align="center">mF1</td>
<td valign="middle" align="center">94.88</td>
<td valign="middle" align="center">0.26</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The experimental results indicate that during the five-fold cross-validation process, PTV2-Fr exhibited stable performance across all metrics, with mIoU, mP, mR, and mF1 scores consistently maintained at high levels. The mIoU, mP, mR, and mF1 in the five experiments were 90.53%, 95.25%, 94.54%, and 94.88% respectively. Model generalization is evaluated using a strict pot-level hold-out test set, in which all test pots are completely unseen during training and cross-validation. Because each pot is scanned across multiple growth stages, the test set naturally includes unseen plant morphologies and developmental states, introducing a meaningful form of domain shift. This evaluation setup assesses the model&#x2019;s ability to generalize to new physical plants and different growth stages, rather than claiming large-scale cross-domain generalization. These results demonstrate that PTV2-Fr performs excellently in the sorghum seedling point cloud organ segmentation task and exhibits strong generalization ability and stability under different data division methods. The application of the five-fold cross-validation method enables a more comprehensive evaluation of the model&#x2019;s performance, reduces the risk of overfitting, and ensures the reliability and effectiveness of the model in practical applications.</p>
<p>To visually evaluate the effectiveness of each submodule in the stem segmentation of sorghum seedling point clouds, four representative samples (A, B, C, and D) were selected from the test set for comparative visualization analysis, as shown in <xref ref-type="fig" rid="f5"><bold>Figure&#xa0;5</bold></xref>. The figure presents the segmentation results of Manual segmentation, PTV2, the sequentially added modules, and the final model PTV2-Fr. In the visualization, red represents leaves, green represents stems, yellow indicates the pot, and the small boxes show enlarged details. As observed from the results, in samples A and B, the baseline model exhibits minor misclassification between leaves and stems and some omissions of fine stem segments. After introducing the Loss module, stem continuity was improved, the MRDCA module enhanced the recognition of curved structures, and the PG-InvFR module further improved the discrimination between stems and leaves. In sample C, where the leaves are more curved and overlapped, the +MRDCA module effectively alleviated segmentation gaps, while the +PG-InvFR module optimized global consistency, resulting in more complete segmentation. For the complex sample D, with dense and intertwined leaves and stems, PointTransformerV2 showed obvious misclassification and discontinuity. In contrast, the combination of multiple modules&#x2014;especially +MRDCA+PG-InvFR and the final PTV2-Fr&#x2014;accurately identified fine branches and maintained the integrity of stems and leaves, producing segmentation results close to the manual annotations. Overall, the Loss module improved local continuity, the MRDCA module strengthened multi-scale feature fusion to adapt to complex plant morphology, and the PG-InvFR module enhanced class discrimination. The integration of all three modules enabled the final model to outperform the baseline under varying densities and morphological conditions, achieving higher segmentation accuracy and completeness.</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>Qualitative visual analysis of sorghum seedlings in ablation experiments.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1761249-g005.tif">
<alt-text content-type="machine-generated">Grid of 3D renderings compares manual segmentation and various point cloud segmentation methods for four sorghum samples, displaying plant parts in green and red, with small inset charts showing finer segmentation details for each method and sample.</alt-text>
</graphic></fig>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Comparison of point cloud semantic segmentation results for sorghum seedlings using different methods</title>
<p>Our study selected five common point cloud semantic segmentation networks&#x2014;PointNet (<xref ref-type="bibr" rid="B29">Qi et&#xa0;al., 2017a</xref>), PointNet++ (<xref ref-type="bibr" rid="B30">Qi et&#xa0;al., 2017b</xref>), Point Transformer(PTV1) (<xref ref-type="bibr" rid="B40">Zhao et&#xa0;al., 2021</xref>), PTV3 (<xref ref-type="bibr" rid="B37">Wu et&#xa0;al., 2024</xref>), and U-Net (<xref ref-type="bibr" rid="B5">&#xc7;i&#xe7;ek et&#xa0;al., 2016</xref>)&#x2014;to compare with PTV2-Fr. Section 3.1 has introduced the experimental settings and parameter configurations of PTV2-Fr, while the other networks adopted the recommended parameters in their original papers. For transparency and reproducibility, the exact training hyperparameters used for each baseline model and the proposed PTV2-Fr. are now explicitly reported in <xref ref-type="table" rid="T8"><bold>Table&#xa0;8</bold></xref>. <xref ref-type="table" rid="T9"><bold>Table&#xa0;9</bold></xref> presents the organ segmentation accuracy of the six networks, including IoU, Precision, Recall, and F1-Score.</p>
<table-wrap id="T8" position="float">
<label>Table&#xa0;8</label>
<caption>
<p>Training hyperparameters used for baseline models and the proposed PTV2-Fr.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Model</th>
<th valign="middle" align="center">Optimizer</th>
<th valign="middle" align="center">Learning rate</th>
<th valign="middle" align="center">Weight decay</th>
<th valign="middle" align="center">Batch size</th>
<th valign="middle" align="center">Epochs</th>
<th valign="middle" align="center">Scheduler</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">PointNet</td>
<td valign="middle" align="center">Adam</td>
<td valign="middle" align="center">0.001</td>
<td valign="middle" align="center">/</td>
<td valign="middle" align="center">16</td>
<td valign="middle" align="center">200</td>
<td valign="middle" align="center">Step</td>
</tr>
<tr>
<td valign="middle" align="center">PointNet++</td>
<td valign="middle" align="center">Adam</td>
<td valign="middle" align="center">0.001</td>
<td valign="middle" align="center">/</td>
<td valign="middle" align="center">16</td>
<td valign="middle" align="center">200</td>
<td valign="middle" align="center">Step</td>
</tr>
<tr>
<td valign="middle" align="center">PTV1</td>
<td valign="middle" align="center">Adamw</td>
<td valign="middle" align="center">0.005</td>
<td valign="middle" align="center">0.01</td>
<td valign="middle" align="center">16</td>
<td valign="middle" align="center">400</td>
<td valign="middle" align="center">Cosine</td>
</tr>
<tr>
<td valign="middle" align="center">PTV3</td>
<td valign="middle" align="center">Adamw</td>
<td valign="middle" align="center">0.005</td>
<td valign="middle" align="center">0.01</td>
<td valign="middle" align="center">16</td>
<td valign="middle" align="center">300</td>
<td valign="middle" align="center">Cosine</td>
</tr>
<tr>
<td valign="middle" align="center">U-Net</td>
<td valign="middle" align="center">Adam</td>
<td valign="middle" align="center">0.001</td>
<td valign="middle" align="center">0.05</td>
<td valign="middle" align="center">16</td>
<td valign="middle" align="center">200</td>
<td valign="middle" align="center">Step</td>
</tr>
<tr>
<td valign="middle" align="center">PTV2-Fr</td>
<td valign="middle" align="center">Adamw</td>
<td valign="middle" align="center">0.001</td>
<td valign="middle" align="center">0.05</td>
<td valign="middle" align="center">32</td>
<td valign="middle" align="center">100</td>
<td valign="middle" align="center">MultiStepLR</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T9" position="float">
<label>Table&#xa0;9</label>
<caption>
<p>A comparative analysis of semantic segmentation performance across different deep learning networks.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Index</th>
<th valign="middle" align="center">Part</th>
<th valign="middle" align="center">PointNet</th>
<th valign="middle" align="center">PointNet++</th>
<th valign="middle" align="center">PTV1</th>
<th valign="middle" align="center">PTV3</th>
<th valign="middle" align="center">U-Net</th>
<th valign="middle" align="center">PTV2-Fr</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="4" align="center">IoU</td>
<td valign="middle" align="center">Leaf</td>
<td valign="middle" align="center">84.21</td>
<td valign="middle" align="center">87.91</td>
<td valign="middle" align="center">86.80</td>
<td valign="middle" align="center">83.63</td>
<td valign="middle" align="center">89.03</td>
<td valign="middle" align="center">88.64</td>
</tr>
<tr>
<td valign="middle" align="center">Stem</td>
<td valign="middle" align="center">70.49</td>
<td valign="middle" align="center">77.10</td>
<td valign="middle" align="center">71.50</td>
<td valign="middle" align="center">74.02</td>
<td valign="middle" align="center">78.72</td>
<td valign="middle" align="center">83.48</td>
</tr>
<tr>
<td valign="middle" align="center">Pot</td>
<td valign="middle" align="center">82.71</td>
<td valign="middle" align="center">98.68</td>
<td valign="middle" align="center">88.48</td>
<td valign="middle" align="center">89.91</td>
<td valign="middle" align="center">90.69</td>
<td valign="middle" align="center">99.48</td>
</tr>
<tr>
<td valign="middle" align="center">Mean</td>
<td valign="middle" align="center">79.14</td>
<td valign="middle" align="center">87.90</td>
<td valign="middle" align="center">82.26</td>
<td valign="middle" align="center">82.52</td>
<td valign="middle" align="center">86.15</td>
<td valign="middle" align="center">90.53</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="center">Precision</td>
<td valign="middle" align="center">Leaf</td>
<td valign="middle" align="center">94.00</td>
<td valign="middle" align="center">95.46</td>
<td valign="middle" align="center">95.93</td>
<td valign="middle" align="center">98.61</td>
<td valign="middle" align="center">95.29</td>
<td valign="middle" align="center">94.66</td>
</tr>
<tr>
<td valign="middle" align="center">Stem</td>
<td valign="middle" align="center">78.53</td>
<td valign="middle" align="center">83.71</td>
<td valign="middle" align="center">81.23</td>
<td valign="middle" align="center">75.45</td>
<td valign="middle" align="center">86.24</td>
<td valign="middle" align="center">91.42</td>
</tr>
<tr>
<td valign="middle" align="center">Pot</td>
<td valign="middle" align="center">83.31</td>
<td valign="middle" align="center">99.02</td>
<td valign="middle" align="center">88.60</td>
<td valign="middle" align="center">89.92</td>
<td valign="middle" align="center">90.69</td>
<td valign="middle" align="center">99.66</td>
</tr>
<tr>
<td valign="middle" align="center">Mean</td>
<td valign="middle" align="center">85.28</td>
<td valign="middle" align="center">92.73</td>
<td valign="middle" align="center">88.59</td>
<td valign="middle" align="center">87.99</td>
<td valign="middle" align="center">90.74</td>
<td valign="middle" align="center">95.25</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="center">Recall</td>
<td valign="middle" align="center">Leaf</td>
<td valign="middle" align="center">88.96</td>
<td valign="middle" align="center">91.74</td>
<td valign="middle" align="center">90.12</td>
<td valign="middle" align="center">84.63</td>
<td valign="middle" align="center">93.13</td>
<td valign="middle" align="center">93.09</td>
</tr>
<tr>
<td valign="middle" align="center">Stem</td>
<td valign="middle" align="center">87.38</td>
<td valign="middle" align="center">90.66</td>
<td valign="middle" align="center">85.64</td>
<td valign="middle" align="center">97.51</td>
<td valign="middle" align="center">90.03</td>
<td valign="middle" align="center">90.71</td>
</tr>
<tr>
<td valign="middle" align="center">Pot</td>
<td valign="middle" align="center">99.17</td>
<td valign="middle" align="center">99.69</td>
<td valign="middle" align="center">99.80</td>
<td valign="middle" align="center">99.92</td>
<td valign="middle" align="center">99.99</td>
<td valign="middle" align="center">99.81</td>
</tr>
<tr>
<td valign="middle" align="center">Mean</td>
<td valign="middle" align="center">91.84</td>
<td valign="middle" align="center">94.03</td>
<td valign="middle" align="center">91.85</td>
<td valign="middle" align="center">94.02</td>
<td valign="middle" align="center">94.38</td>
<td valign="middle" align="center">94.54</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="center">F1-Score</td>
<td valign="middle" align="center">Leaf</td>
<td valign="middle" align="center">91.41</td>
<td valign="middle" align="center">93.56</td>
<td valign="middle" align="center">92.94</td>
<td valign="middle" align="center">91.09</td>
<td valign="middle" align="center">94.20</td>
<td valign="middle" align="center">93.86</td>
</tr>
<tr>
<td valign="middle" align="center">Stem</td>
<td valign="middle" align="center">82.72</td>
<td valign="middle" align="center">87.05</td>
<td valign="middle" align="center">83.38</td>
<td valign="middle" align="center">85.07</td>
<td valign="middle" align="center">88.09</td>
<td valign="middle" align="center">91.05</td>
</tr>
<tr>
<td valign="middle" align="center">Pot</td>
<td valign="middle" align="center">90.55</td>
<td valign="middle" align="center">99.35</td>
<td valign="middle" align="center">93.90</td>
<td valign="middle" align="center">94.69</td>
<td valign="middle" align="center">95.12</td>
<td valign="middle" align="center">99.73</td>
</tr>
<tr>
<td valign="middle" align="center">Mean</td>
<td valign="middle" align="center">88.23</td>
<td valign="middle" align="center">93.32</td>
<td valign="middle" align="center">90.07</td>
<td valign="middle" align="center">90.28</td>
<td valign="middle" align="center">92.47</td>
<td valign="middle" align="center">94.88</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Early point cloud semantic segmentation models such as PointNet and PointNetV++ have laid the foundation for the processing of 3D point cloud data. Since PointNet only performs point-wise MLP and global pooling without neighborhood geometric modeling, it tends to confuse spatially close but semantically different points in the sorghum seedling semantic segmentation task. This results in relatively low comprehensive IoU values for leaf and stem classes. PointNet++ improves local modeling through hierarchical multi-scale aggregation, making it more capable of capturing local shapes and topology than pure global methods. Thus, it performs reliably when segmenting larger leaves or distinct stems. However, its performance is highly dependent on the settings of sampling radius and k-NN parameters. A single-scale configuration is prone to two types of errors on plant organs with variable scales and uneven point density: an excessively large radius leads to over-smoothing of boundaries, causing small leaves to be merged into stems; an overly small radius may fail to capture sufficient context, resulting in fragmentation or noise sensitivity. Overall, it exhibits over-smoothing or local underfitting when dealing with samples with large organ scale variations and uneven point density.</p>
<p>PTV1 leverages the self-attention mechanism of Transformer, introducing enhanced global feature extraction capability for point cloud processing. By modeling relationships between all points in the point cloud, it effectively aggregates long-range semantic information, thus demonstrating robustness in recognizing large-scale, distinct-shaped leaves or main stems and being able to correct local noise using global context. However, its original design does not focus on enhancing highly local geometric details&#x2014;which leads to under-segmentation or errors of being merged into adjacent categories in structures with sparse points and complex topology, such as small leaves clinging to the main stem or newly emerged leaves. PTV3 introduces a unique design concept: it significantly expands the model&#x2019;s effective receptive field and reduces real-time computation and memory overhead by replacing traditional KNN or ball queries with serialized neighborhood mapping. Nevertheless, this does not solve the problem of being merged into adjacent categories when target objects are extremely small, have sparse point counts, or are highly geometrically similar to neighboring structures. Without additional local refinement or multi-scale compensation, serialized neighborhood aggregation tends to smooth out key local discriminative features in attention allocation.</p>
<p>U-Net, which is based on spconv, uses voxelization as a preprocessing step, converting the point cloud into a sparse voxel grid. When the voxel size is relatively coarse, small targets with fewer points and limited spatial extent&#x2014;such as petioles, small leaves attached to the main stem, or the pot&#x2014;may be merged into neighboring regions, resulting in information loss or blurred boundaries. This is the primary reason for the low IoU of small classes. Moreover, the downsampling and upsampling operations in the sparse U-Net, together with submanifold convolutions, tend to smooth details during multiple aggregation processes. Without additional point-level local refinement, the smoothed information becomes difficult to recover, causing small objects to be merged into adjacent classes or to appear broken.</p>
<p>Experimental results show that PTV2-Fr outperforms recent point cloud segmentation models in overall performance, achieving 90.53% mIoU, 95.25% mPrec, 94.54% mRec, and 94.88% mF1 in the sorghum seedling point cloud organ segmentation task. Among the advanced models listed, PTV2-Fr is the most suitable model for segmenting sorghum seedling point clouds.</p>
<p><xref ref-type="fig" rid="f6"><bold>Figure&#xa0;6</bold></xref> presents a visual comparison of different methods for stem segmentation in sorghum seedling point clouds, using four representative samples (A, B, C, and D) for analysis. The figure includes the results of Manual segmentation, PointNet, PointNet++, PTV1, PTV3, U-Net, and the proposed PTV2-Fr model. In the visualization, red represents leaves, green represents stems, yellow represents the pot, and the small boxes show enlarged details of local segmentation areas.</p>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>Qualitative visual analysis of sorghum seedlings in different experiments.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1761249-g006.tif">
<alt-text content-type="machine-generated">Grid of 3D sorghum plant models showing segmentation results across four samples (columns A&#x2013;D) and seven methods (manual and six algorithms) in rows, with stems in red and leaves in green; errors are highlighted with inset boxes for algorithmic segmentations.</alt-text>
</graphic></fig>
<p>Overall, the traditional methods PointNet and PointNet++ exhibit relatively low segmentation accuracy, often showing confusion between leaves and stems as well as stem discontinuities. Due to its simple feature extraction mechanism, PointNet displays obvious misclassifications in samples A and B, with fine stem segments being incorrectly identified as leaves. Although PointNet++ enhances local feature representation through a hierarchical structure, it still suffers from segmentation breaks and omissions in the more complex samples C and D, particularly in regions where stems and leaves are intertwined. The introduction of the attention mechanism in PTV1 improves local segmentation consistency to some extent, making the boundary between stems and leaves clearer. However, in dense samples (such as C and D), some misclassifications and stem breaks remain observable. PTV3 further refines feature modeling, achieving better overall performance and distinguishing stems and leaves more effectively, though minor errors still occur in curved and overlapping structures.</p>
<p>U-Net performs well in multi-scale feature fusion, achieving stable segmentation of stems and leaves under complex morphological conditions. The enlarged details show that U-Net significantly improves stem continuity and leaf boundary delineation. In comparison, the proposed PTV2-Fr integrates multi-module feature fusion and structure-adaptive mechanisms, yielding segmentation results most consistent with manual annotations. Whether in sparse (A, B) or dense and complex (C, D) samples, PTV2-Fr accurately identifies fine stem segments while maintaining structural integrity, producing clear leaf&#x2013;stem boundaries and significantly reducing misclassification.</p>
<p>In summary, the visual results demonstrate that segmentation accuracy and stability improve progressively with model refinement. By integrating multi-level feature representations with global contextual information, PTV2-Fr achieves the best performance among all methods in the challenging sorghum seedling point cloud segmentation task. It effectively addresses the segmentation difficulties caused by leaf curvature, overlap, and complex morphology, resulting in more accurate and complete stem&#x2013;leaf segmentation outcomes.</p>
</sec>
<sec id="s3_4">
<label>3.4</label>
<title>Detection of sorghum seedling phenotypes under different concentrations of GA<sub>3</sub> solution</title>
<p>Numerous existing studies have indicated that GA<sub>3</sub> within an appropriate range can promote germination and seedling elongation, but a dose-response phenomenon of &#x201c;low-concentration promotion and high-concentration inhibition&#x201d; may occur at excessively high concentrations or under specific stress scenarios. Based on the hypothesis of this hormone dose-response, our study took GA<sub>3</sub> as the treatment factor to systematically examine the threshold effect of different concentrations on the growth performance of sorghum seedlings at the seedling stage. For this purpose, six treatment groups were set up: control check (CK) consisting of deionized water, and 50, 100, 150, 200, and 250 mg&#xb7;L<sup>-</sup>&#xb9; GA<sub>3</sub> solutions. Three-dimensional (3D) point cloud data were collected every 12 hours within the range of 36 h to 108 h after sowing. Because the same physical pot was scanned repeatedly over time (i.e., longitudinal sampling), care was taken during dataset partitioning to avoid temporal leakage: all scans from a single pot were assigned to the same data subset (training, validation, or test). This ensures that temporal measurements from the same plant never appear across different evaluation subsets. <xref ref-type="fig" rid="f7"><bold>Figure&#xa0;7</bold></xref> shows the front-view images of sorghum seedlings during growth, <xref ref-type="fig" rid="f8"><bold>Figure&#xa0;8</bold></xref> presents the 3D point cloud reconstruction images of sorghum seedlings during growth, and <xref ref-type="fig" rid="f9"><bold>Figure&#xa0;9</bold></xref> displays the semantic segmentation results of sorghum seedling 3D point cloud data using the PTV2-Fr model. For the 3D point cloud data after semantic segmentation, a clustering segmentation method was used to automatically identify and quantify the number of leaves. Axial stem height, leaf area, and basal stem diameter were extracted from the segmented voxels. Three replicate experiments were conducted for each treatment to calculate the average growth rate and relative response value. <xref ref-type="fig" rid="f10"><bold>Figure&#xa0;10</bold></xref> shows the variations in the total number of leaves, leaf area index(LAI), average axial stem height, and average stem diameter of sorghum seedlings under the CK and different concentrations of GA<sub>3</sub> solutions. To analyze the effects of GA<sub>3</sub> concentration on sorghum seedling phenotypic traits while accounting for repeated measurements over time, a linear mixed-effects model (LMM) was employed. In the model, GA<sub>3</sub> concentration, measurement time, and their interaction were treated as fixed effects, while pot identity was included as a random effect to account for within-pot correlations arising from repeated measurements. Statistical significance was evaluated at the 0.05 level.</p>
<fig id="f7" position="float">
<label>Figure&#xa0;7</label>
<caption>
<p>Front-view captured images of sorghum seedlings during the growth process.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1761249-g007.tif">
<alt-text content-type="machine-generated">Grid of plant growth stages in yellow trays over a black background, arranged by increasing concentrations of a solution from left to right and time intervals from top to bottom, illustrating differences in seedling development.</alt-text>
</graphic></fig>
<fig id="f8" position="float">
<label>Figure&#xa0;8</label>
<caption>
<p>Three-view point cloud reconstructed images of sorghum seedlings during the growth process.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1761249-g008.tif">
<alt-text content-type="machine-generated">Matrix of plant growth in trays under six treatment conditions: CK, 50, 100, 150, 200, and 250 milligrams per liter, with images at 36, 48, 60, 72, 84, 96, and 108 hours showing variability in seedling development and density based on concentration and time.</alt-text>
</graphic></fig>
<fig id="f9" position="float">
<label>Figure&#xa0;9</label>
<caption>
<p>Semantic segmentation of point cloud data images of sorghum seedlings using the PTV2-Fr model.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1761249-g009.tif">
<alt-text content-type="machine-generated">Illustration composed of a seven-by-six matrix showing plant growth in trays under different concentrations labeled CK, 50 milligrams per liter, 100 milligrams per liter, 150 milligrams per liter, 200 milligrams per liter, and 250 milligrams per liter across time points 36 hours to 108 hours. Green and red colors illustrate plant parts, with denser, taller growth depicted as time and concentrations increase.</alt-text>
</graphic></fig>
<fig id="f10" position="float">
<label>Figure&#xa0;10</label>
<caption>
<p><bold>(A)</bold> The changes in the total number of leaves of sorghum seedlings over time in the CK and different concentrations of GA<sub>3</sub> solutions; <bold>(B)</bold> The changes in the LAI of sorghum seedlings over time in the CK and different concentrations of GA<sub>3</sub> solutions; <bold>(C)</bold> The changes in the average axial stem height of sorghum seedlings over time in the CK and different concentrations of GA<sub>3</sub> solutions; <bold>(D)</bold> The changes in the average stem diameter of sorghum seedlings over time in the CK and different concentrations of GA<sub>3</sub> solutions.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1761249-g010.tif">
<alt-text content-type="machine-generated">Figure contains four panels labeled A, B, C, and D. Panel A displays six bar charts showing leaf area index over time for varying concentrations. Panel B displays six bar charts of the number of leaves over time for the same concentrations. Panel C presents six radar charts comparing stem diameter and leaf number over time. Panel D shows a 3D line graph of stem height in centimeters over time with GA levels indicated by different colored lines.</alt-text>
</graphic></fig>
<p>(<xref ref-type="fig" rid="f10"><bold>Figure&#xa0;10A</bold></xref>) shows the changes in the total number of leaves of sorghum seedlings over time in the CK and different concentrations of GA<sub>3</sub> solutions. With the passage of time, the total number of leaves increased in both CK and all GA<sub>3</sub>-treated groups. However, as the concentration gradient of solution increased, the total number of leaves first increased and then decreased, reaching a peak at 50 mg/L. At the 7th recording (i.e., 108 h after sowing), the total number of leaves in CK, 50 mg/L, 100 mg/L, 150 mg/L, 200 mg/L, and 250 mg/L groups were 66.67, 74.33, 71.67, 70.33, 69.67, and 68.67, respectively. Overall, within the concentration range of 50 mg/L to 250 mg/L, the promoting effect of GA<sub>3</sub> on the total number of leaves of sorghum seedlings first strengthened and then weakened, with the strongest effect observed at 50 mg/L. LMM revealed a significant main effect of GA<sub>3</sub> concentration (p&lt; 0.001) and time (p&lt; 0.001), as well as a significant GA<sub>3</sub> concentration &#xd7; time interaction (p&lt; 0.01). Estimated marginal means indicated that the 50 mg/L treatment produced a higher leaf number than the control (mean difference = 7.66, 95% CI [5.21, 10.11]).</p>
<p>(<xref ref-type="fig" rid="f10"><bold>Figure&#xa0;10B</bold></xref>) shows the changes in the LAI of sorghum seedlings over time in the CK and different concentrations of GA<sub>3</sub> solutions. Since the average leaf area may be inaccurate due to the influence of newly emerged leaves, we adopted LAI as the indicator, with the formula: total leaf area divided by unit area (625 cm&#xb2;). With the passage of time, the LAI increased in both CK and all GA<sub>3</sub>-treated groups. However, as the concentration gradient of GA<sub>3</sub> solution increased, the LAI first increased and then decreased, reaching a peak at 50 mg/L. At the 7th recording, the LAI values in CK, 50 mg/L, 100 mg/L, 150 mg/L, 200 mg/L, and 250 mg/L groups were 1.71, 2.93, 2.44, 2.17, and 1.80, respectively. Overall, within the concentration range of 50 mg/L to 250 mg/L, the promoting effect of GA<sub>3</sub> on the LAI of sorghum seedlings first strengthened and then weakened, with the strongest effect observed at 50 mg/L. LMM revealed a significant main effect of GA<sub>3</sub> concentration (p&lt; 0.001) and time (p&lt; 0.001), as well as a significant GA<sub>3</sub> concentration &#xd7; time interaction (p&lt; 0.01). Estimated marginal means indicated that the 50 mg/L treatment produced a higher LAI than the control (mean difference = 1.22, 95% CI [0.89, 1.55]).</p>
<p>(<xref ref-type="fig" rid="f10"><bold>Figure&#xa0;10C</bold></xref>) shows the changes in the average axial stem height of sorghum seedlings over time in the CK and different concentrations of GA<sub>3</sub> solutions. With the passage of time, the average axial stem height increased in both CK and all GA<sub>3</sub>-treated groups. However, as the concentration gradient of GA<sub>3</sub> solution increased, the average axial stem height first increased and then decreased, reaching a peak at 50 mg/L. At the 7th recording the average axial stem height values in CK, 50 mg/L, 100 mg/L, 150 mg/L, 200 mg/L, and 250 mg/L groups were 3.98cm, 4.53cm, 4.28cm, 4.15cm, 4.09cm, and 4.08cm, respectively. Overall, within the concentration range of 50 mg/L to 250 mg/L, the promoting effect of GA<sub>3</sub> on the average axial stem height of sorghum seedlings first strengthened and then weakened, with the strongest effect observed at 50 mg/L. LMM revealed a significant main effect of GA<sub>3</sub> concentration (p&lt; 0.05) and time (p&lt; 0.001), as well as a significant GA<sub>3</sub> concentration &#xd7; time interaction (p&lt; 0.05). Estimated marginal means indicated that the 50 mg/L treatment produced a larger stem diameter than the control (mean difference = 0.55, 95% CI [0.32, 0.78]).</p>
<p>(<xref ref-type="fig" rid="f10"><bold>Figure&#xa0;10D</bold></xref>) shows the changes in the average stem diameter of sorghum seedlings over time in the CK and different concentrations of GA<sub>3</sub> solutions. With the passage of time, the average stem diameter increased in both CK and all GA<sub>3</sub>-treated groups. However, as the concentration gradient of GA<sub>3</sub> solution increased, the average stem diameter first increased and then decreased, reaching a peak at 50 mg/L. At the 7th recording, the average stem diameter values in CK, 50 mg/L, 100 mg/L, 150 mg/L, 200 mg/L, and 250 mg/L groups were 6.18mm, 8.73mm, 8.60mm, 7.47mm, 6.96mm, and 6.76mm, respectively. Overall, within the concentration range of 50 mg/L to 250 mg/L, the promoting effect of GA<sub>3</sub> on the average stem diameter of sorghum seedlings first strengthened and then weakened, with the strongest effect observed at 50 mg/L. LMM revealed a significant main effect of GA<sub>3</sub> concentration (p&lt; 0.001) and time (p&lt; 0.001), as well as a significant GA<sub>3</sub> concentration &#xd7; time interaction (p&lt; 0.001). Estimated marginal means indicated that the 50 mg/L treatment produced a taller stem height than the control (mean difference = 2.55, 95% CI [1.98, 3.12]).</p>
<p>GA<sub>3</sub> affects seedling growth through multiple interacting physiological pathways.</p>
<p>Firstly, GA<sub>3</sub> promotes cell elongation&#x2014;primarily by inducing cell wall relaxation-related enzymes/proteins (e.g., expansin, XET) to reduce cell wall rigidity, thereby promoting longitudinal internode elongation rather than increasing the number of internodes. This type of cell wall relaxation mechanism is an important basis for elongation-promoting responses in plants (<xref ref-type="bibr" rid="B6">Cosgrove, 2024</xref>).</p>
<p>Secondly, GA<sub>3</sub> can induce the mobilization of storage substances in seeds/endosperms (e.g., inducing &#x3b1;-amylase synthesis), accelerating the decomposition of starch into soluble sugars to provide carbon sources and energy for rapid growth (<xref ref-type="bibr" rid="B16">Hedden, 2025</xref>).</p>
<p>Thirdly, in terms of antioxidant and stress response, appropriate concentrations of GA<sub>3</sub> can increase the activities of enzymes such as SOD, POD, CAT, and APX, thereby reducing the accumulation of ROS and MDA, and maintaining membrane system and cellular homeostasis. This is of great significance for seedling protection under stress conditions (<xref ref-type="bibr" rid="B31">Shahzad et&#xa0;al., 2021</xref>).</p>
<p>Fourthly, appropriate concentrations of GA<sub>3</sub> can increase chlorophyll content, net photosynthetic rate, and stomatal conductance, while decreasing intercellular CO<sub>2</sub> concentration. These changes indicate higher carbon assimilation capacity and gas exchange efficiency, providing energy and material basis for biomass accumulation. In contrast, excessive hormones or hormonal imbalance may be accompanied by photosynthetic inhibition and chlorophyll degradation, thereby impairing growth advantages (<xref ref-type="bibr" rid="B9">Fu et&#xa0;al., 2023</xref>).</p>
<p>In addition, under stress conditions such as salt stress, GA<sub>3</sub> has been reported to be involved in regulating ion homeostasis (e.g., reducing intercellular Na<sup>+</sup>, increasing K<sup>+</sup>/Ca&#xb2;<sup>+</sup> ratios), synergizing with the aforementioned pathways to alleviate salt damage (<xref ref-type="bibr" rid="B1">Ali et&#xa0;al., 2021</xref>); however, the dose and application method determine its positive and negative effects. Combined with the &#x201c;first increase and then decrease&#x201d; response observed in this study&#x2019;s gradient experiment of 50&#x2013;250 mg&#xb7;L<sup>-</sup>&#xb9; (with 50 mg&#xb7;L<sup>-</sup>&#xb9; as the optimal concentration), it can be concluded that low to medium concentrations of GA<sub>3</sub> mainly promote seedling growth by enhancing cell elongation, nutrient mobilization, antioxidant capacity, and photosynthetic capacity, while high concentrations may cause inhibition due to hormonal signal and metabolic disorders.</p>
<p>In summary, within the concentration range of 50 mg/L to 250 mg/L, the promoting effect of GA<sub>3</sub> on the growth of sorghum seedlings first strengthened and then weakened, with the maximum effect observed at 50 mg/L.</p>
</sec>
</sec>
<sec id="s4" sec-type="conclusions">
<label>4</label>
<title>Conclusions</title>
<p>Our study proposes and implements a point cloud semantic segmentation network PTV2-Fr for organ segmentation and automatic extraction of phenotypic parameters of sorghum seedlings at the seedling stage. These early-stage phenotypic traits, such as leaf number, leaf area index, and stem height, are widely used as indicators of seedling vigor and establishment capacity, which are important targets in early-stage selection and screening in sorghum breeding programs. In terms of structural design, PTV2-Fr makes three key improvements targeting the characteristics of seedling point clouds: MRDCA for multi-scale geometric and coordinate perception enhancement; PG-InvFR for refining boundaries and local features before the segmentation head; and the composite loss EL Loss for alleviating class imbalance and directly optimizing IoU. Based on the sorghum seedling point cloud dataset constructed and manually annotated by us, systematic experiments and ablation studies show that the above designs make significant contributions to improving organ segmentation accuracy and boundary robustness.</p>
<p>PTV2-Fr demonstrates robust performance in quantitative evaluation: compared with the baseline PointTransformerV2, the overall accuracy is improved by approximately 2.5%; in terms of key segmentation metrics, the model achieves a mIoU increase of about 2.52%, a mPrec improvement of around 3.38%, and a mF1 rise of roughly 1.41% compared to the baseline. It should be noted that the objective of this study is not to directly predict final yield or late-stage agronomic performance, but to provide a reliable and fine-grained phenotyping tool for early-stage screening and comparative analysis under controlled experimental conditions. Ablation experiments further confirm the positive contributions of the three modules (MRDCA, PG-InvFR, and EL Loss) to performance&#x2014;each brings significant improvements in mIoU and boundary accuracy under different combinations. When compared with other common point cloud segmentation networks (PointNet, PointNet++, PTV1, PTV3, and U-Net), PTV2-Fr exhibits superior comprehensive performance in scenarios with dense seedlings, large differences in organ scales, and severe local occlusion, enabling more accurate distinction between three semantic regions: stems, leaves, and flowerpots.</p>
<p>While our model achieves high performance on the sorghum seedling dataset, there are still several limitations that need to be addressed. One significant challenge is handling occlusion. In plant point cloud data, occlusion often occurs when leaves or stems overlap, causing parts of the plant to be obscured. This issue can lead to missing or inaccurate segmentation of plant organs. To address this, future work could explore the integration of multi-view data, depth sensing, or point cloud completion techniques, which can help fill in the gaps caused by occlusion.</p>
<p>Another limitation is lighting changes, which can significantly impact point cloud quality. Variations in lighting conditions, such as changes in ambient light or sensor positioning, may affect the reflectivity and quality of point cloud data. While deep learning models show robustness under controlled conditions, their performance may degrade in real-world scenarios with varying lighting. Future research could focus on augmenting training data with varied lighting conditions or applying domain adaptation techniques to make models more resilient to lighting changes.</p>
<p>Finally, cross-species generalization remains a challenge. Our model was trained and evaluated on sorghum seedlings, and its performance on other plant species is not fully known. Different plant species have varying organ shapes, sizes, and structures, which can lead to difficulties in generalizing across species. To improve generalization, future work could focus on developing more robust models that are capable of handling a variety of plant species, potentially through the use of multi-species datasets and transfer learning techniques.</p>
<p>Based on the above limitations, future research can carry out improvements and expansions in the following directions: First, introduce multi-modal data and design a cross-modal feature alignment module to enhance stability under occlusion and light changes. Second, construct a multi-crop joint training or transfer learning framework to adapt the model to crops with significant morphological differences such as corn and wheat, and improve cross-species generalization ability. Third, it is necessary to extend the approach to large-scale, high-throughput data collection in field environments, including outdoor and semi-field conditions, in order to evaluate the model&#x2019;s applicability in real agricultural production scenarios. Fourth, combine uncertainty estimation with active acquisition strategies to trigger supplementary collection or manual review on samples with high uncertainty, thereby improving the system&#x2019;s reliability and deploy ability.</p>
<p>In conclusion, PTV2-Fr provides a feasible and high-performance technical solution for the efficient, non-destructive, and automated measurement of sorghum seedling phenotypes at the seedling stage. It not only significantly improves the recognition ability for small targets and boundaries in point cloud organ segmentation tasks but also offers reliable tool support for large-scale agricultural phenotypic quantification and studies on hormonal treatment responses based on point clouds. Once trained, the proposed model enables automated, non-destructive phenotypic measurement with minimal human intervention, which makes the pipeline suitable for scaling to larger populations in breeding and physiological studies. In the future, through data scale expansion, multi-modal fusion, and cross-crop generalization research, PTV2-Fr is expected to promote the application implementation of point cloud-driven high-throughput crop phenomics in breeding and precision cultivation.</p>
</sec>
</body>
<back>
<sec id="s5" sec-type="data-availability">
<title>Data availability statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation.</p></sec>
<sec id="s6" sec-type="author-contributions">
<title>Author contributions</title>
<p>JL: Writing &#x2013; original draft, Writing &#x2013; review &amp; editing, Conceptualization, Methodology, Software, Visualization. YS: Writing &#x2013; original draft, Writing &#x2013; review &amp; editing, Conceptualization, Formal analysis, Methodology, Visualization. LT: Investigation, Writing &#x2013; review &amp; editing. ZYZ: Software, Writing &#x2013; review &amp; editing. YG: Data curation, Writing &#x2013; review &amp; editing. ZBZ: Supervision, Writing &#x2013; review &amp; editing. RB: Visualization, Writing &#x2013; review &amp; editing. PY: Formal analysis, Writing &#x2013; review &amp; editing. FP: Resources, Writing &#x2013; review &amp; editing. XF: Funding acquisition, Project administration, Supervision, Writing &#x2013; review &amp; editing.</p></sec>
<ack>
<title>Acknowledgments</title>
<p>We are very grateful to XF for his guidance and every student involved in this study for their help and advice. Thanks again to Nanjing Agricultural University for building the experimental platform.</p>
</ack>
<sec id="s8" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p></sec>
<sec id="s9" sec-type="ai-statement">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p></sec>
<sec id="s10" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p></sec>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Ali</surname> <given-names>A. Y. A.</given-names></name>
<name><surname>Ibrahim</surname> <given-names>M. E. H.</given-names></name>
<name><surname>Zhou</surname> <given-names>G.</given-names></name>
<name><surname>Nimir</surname> <given-names>N. E. A.</given-names></name>
<name><surname>Elsiddig</surname> <given-names>A. M. I.</given-names></name>
<name><surname>Jiao</surname> <given-names>X.</given-names></name>
<etal/>
</person-group>. (<year>2021</year>). 
<article-title>Gibberellic acid and nitrogen efficiently protect early seedlings growth stage from salt stress damage in sorghum</article-title>. <source>Sci. Rep.</source> <volume>11</volume>, <fpage>6672</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-021-84713-9</pub-id>, PMID: <pub-id pub-id-type="pmid">33758238</pub-id>
</mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Berman</surname> <given-names>M.</given-names></name>
<name><surname>Triki</surname> <given-names>A. R.</given-names></name>
<name><surname>Blaschko</surname> <given-names>M. B.</given-names></name>
</person-group> (<year>2018</year>). &#x201c;
<article-title>The Lov&#xe1;sz-Softmax loss: A tractable surrogate for the optimization of the intersection-over-union measure in neural networks</article-title>,&#x201d; in <conf-name>Proc. IEEE Conf. Comput. Vis. Pattern Recognit</conf-name> (<publisher-loc>Salt Lake City, UT, USA</publisher-loc>: 
<publisher-name>IEEE</publisher-name>), pp. <fpage>4413</fpage>&#x2013;<lpage>4421</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/CVPR.2018.00464</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Boogaard</surname> <given-names>F. P.</given-names></name>
<name><surname>van Henten</surname> <given-names>E. J.</given-names></name>
<name><surname>Kootstra</surname> <given-names>G.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>Improved point-cloud segmentation for plant phenotyping through class-dependent sampling of training data to battle class imbalance</article-title>. <source>Front. Plant Sci.</source> <volume>13</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2022.838190</pub-id>, PMID: <pub-id pub-id-type="pmid">35419014</pub-id>
</mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Chu</surname> <given-names>M.</given-names></name>
<name><surname>De Maria</surname> <given-names>G. L.</given-names></name>
<name><surname>Dai</surname> <given-names>R.</given-names></name>
<name><surname>Benenati</surname> <given-names>S.</given-names></name>
<name><surname>Yu</surname> <given-names>W.</given-names></name>
<name><surname>Zhong</surname> <given-names>J.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>DCCAT: Dual-coordinate cross-attention transformer for thrombus segmentation on coronary OCT</article-title>. <source>Med. Image Anal.</source> <volume>97</volume>, <elocation-id>103265</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.media.2024.103265</pub-id>, PMID: <pub-id pub-id-type="pmid">39029158</pub-id>
</mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>&#xc7;i&#xe7;ek</surname> <given-names>&#xd6;.</given-names></name>
<name><surname>Abdulkadir</surname> <given-names>A.</given-names></name>
<name><surname>Lienkamp</surname> <given-names>S. S.</given-names></name>
<name><surname>Brox</surname> <given-names>T.</given-names></name>
<name><surname>Ronneberger</surname> <given-names>O.</given-names></name>
</person-group> (<year>2016</year>). 
<article-title>3D U-Net: Learning dense volumetric segmentation from sparse annotation</article-title>. <source>Medical Image Computing and Computer-Assisted Intervention &#x2013; MICCAI 2016, Lecture Notes in Computer Science</source> <volume>9901</volume>, <fpage>424</fpage>&#x2013;<lpage>432</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/978-3-319-46723-8_49</pub-id>
</mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Cosgrove</surname> <given-names>D. J.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Plant cell wall loosening by expansins</article-title>. <source>Annu. Rev. Cell Dev. Biol.</source> <volume>40</volume>, <fpage>329</fpage>&#x2013;<lpage>352</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1146/annurev-cellbio-111822-115334</pub-id>, PMID: <pub-id pub-id-type="pmid">38724021</pub-id>
</mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Deng</surname> <given-names>Q.</given-names></name>
<name><surname>Zhao</surname> <given-names>J.</given-names></name>
<name><surname>Li</surname> <given-names>R.</given-names></name>
<name><surname>Liu</surname> <given-names>G.</given-names></name>
<name><surname>Hu</surname> <given-names>Y.</given-names></name>
<name><surname>Ye</surname> <given-names>Z.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>A precise segmentation algorithm of pumpkin seedling point cloud stem based on CPHNet</article-title>. <source>Plants</source> <volume>13</volume>, <elocation-id>2300</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/plants13162300</pub-id>, PMID: <pub-id pub-id-type="pmid">39204736</pub-id>
</mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name><surname>Du</surname> <given-names>R.</given-names></name>
<name><surname>Zhai</surname> <given-names>G.</given-names></name>
<name><surname>Qiu</surname> <given-names>T.</given-names></name>
<name><surname>Jiang</surname> <given-names>Y.</given-names></name>
</person-group> (<year>2025</year>). <source>Towards scalable organ level 3D plant segmentation: bridging the data algorithm computing gap</source>. (<publisher-loc>Ithaca, NY, USA</publisher-loc>: 
<publisher-name>Cornell University</publisher-name>). doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.2509.06329</pub-id>
</mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Fu</surname> <given-names>J.</given-names></name>
<name><surname>Li</surname> <given-names>L.</given-names></name>
<name><surname>Wang</surname> <given-names>S.</given-names></name>
<name><surname>Yu</surname> <given-names>N.</given-names></name>
<name><surname>Shan</surname> <given-names>H.</given-names></name>
<name><surname>Shi</surname> <given-names>Z.</given-names></name>
<etal/>
</person-group>. (<year>2023</year>). 
<article-title>Effect of gibberellic acid on photosynthesis and oxidative stress response in maize under weak light conditions</article-title>. <source>Front. Plant Sci.</source> <volume>14</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2023.1128780</pub-id>, PMID: <pub-id pub-id-type="pmid">36875610</pub-id>
</mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Galba</surname> <given-names>A.</given-names></name>
<name><surname>Masner</surname> <given-names>J.</given-names></name>
<name><surname>Kholov&#xe1;</surname> <given-names>J.</given-names></name>
<name><surname>Kartal</surname> <given-names>S.</given-names></name>
<name><surname>Sto&#x10d;es</surname> <given-names>M.</given-names></name>
<name><surname>Mike&#x161;</surname> <given-names>V.</given-names></name>
<etal/>
</person-group>. (<year>2025</year>). 
<article-title>Annotated 3D point cloud dataset of broad&#x2212;leaf legumes captured by high&#x2212;throughput phenotyping platform</article-title>. <source>Sci. Data</source> <volume>12</volume>, <fpage>1764</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41597&#x2212;025&#x2212;06049&#x2212;7</pub-id>, PMID: <pub-id pub-id-type="pmid">41213977</pub-id>
</mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Gao</surname> <given-names>T.</given-names></name>
<name><surname>Zhu</surname> <given-names>F.</given-names></name>
<name><surname>Paul</surname> <given-names>P.</given-names></name>
<name><surname>Sandhu</surname> <given-names>J.</given-names></name>
<name><surname>Doku</surname> <given-names>H. A.</given-names></name>
<name><surname>Sun</surname> <given-names>J.</given-names></name>
<etal/>
</person-group>. (<year>2021</year>). 
<article-title>Novel 3D imaging systems for high-throughput phenotyping of plants</article-title>. <source>Remote Sens.</source> <volume>13</volume>, <elocation-id>2113</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/rs13112113</pub-id>
</mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Gilson</surname> <given-names>A.</given-names></name>
<name><surname>Meyer</surname> <given-names>L.</given-names></name>
<name><surname>Scholz</surname> <given-names>O.</given-names></name>
<name><surname>Schmid</surname> <given-names>U.</given-names></name>
</person-group> (<year>2025</year>). 
<article-title>OmniPlantSeg: Species&#x2212;agnostic 3D point cloud organ segmentation for high&#x2212;resolution plant phenotyping across modalities</article-title>. <source>arXiv [preprint]</source>. arXiv:2509.21038. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.2509.21038</pub-id>
</mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Golbach</surname> <given-names>F.</given-names></name>
<name><surname>Kootstra</surname> <given-names>G.</given-names></name>
<name><surname>Damjanovic</surname> <given-names>S.</given-names></name>
<name><surname>Otten</surname> <given-names>G.</given-names></name>
<name><surname>van de Zedde</surname> <given-names>R.</given-names></name>
</person-group> (<year>2016</year>). 
<article-title>Validation of plant part measurements using a 3D reconstruction method suitable for high-throughput seedling phenotyping</article-title>. <source>Mach. Vis. Appl.</source> <volume>27</volume>, <fpage>663</fpage>&#x2013;<lpage>680</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s00138-015-0727-5</pub-id>
</mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Gupta</surname> <given-names>S.</given-names></name>
<name><surname>Tripathi</surname> <given-names>A. K.</given-names></name>
</person-group> (<year>2025</year>). 
<article-title>Flora-NET: Integrating dual coordinate attention with adaptive kernel based convolution network for medicinal flower identification</article-title>. <source>Comput. Electron. Agric.</source> <volume>230</volume>, <elocation-id>109834</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2024.109834</pub-id>
</mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Harandi</surname> <given-names>N.</given-names></name>
<name><surname>Vandenberghe</surname> <given-names>B.</given-names></name>
<name><surname>Vankerschaver</surname> <given-names>J.</given-names></name>
<name><surname>Depuydt</surname> <given-names>S.</given-names></name>
<name><surname>Van Messem</surname> <given-names>A.</given-names></name>
</person-group> (<year>2023</year>). 
<article-title>How to make sense of 3D representations for plant phenotyping: A compendium of processing and analysis techniques</article-title>. <source>Plant Methods</source> <volume>19</volume>, <fpage>60</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s13007-023-01031-z</pub-id>, PMID: <pub-id pub-id-type="pmid">37353846</pub-id>
</mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Hedden</surname> <given-names>P.</given-names></name>
</person-group> (<year>2025</year>). 
<article-title>Induction of &#x3b1;-amylase and endosperm-imposed seed dormancy: Two pioneering papers in gibberellin research</article-title>. <source>Planta</source> <volume>261</volume>, <fpage>118</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s00425-025-04699-w</pub-id>, PMID: <pub-id pub-id-type="pmid">40278915</pub-id>
</mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Heiwolt</surname> <given-names>K.</given-names></name>
<name><surname>Duckett</surname> <given-names>T.</given-names></name>
<name><surname>Cielniak</surname> <given-names>G.</given-names></name>
</person-group> (<year>2021</year>). 
<article-title>Deep semantic segmentation of 3D plant point clouds</article-title>. <source>Toward Auton. Robot. Syst</source>. <volume>13054</volume>, <fpage>36</fpage>&#x2013;<lpage>45</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/978-3-030-89177-0_4</pub-id>
</mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Hossain</surname> <given-names>M. S.</given-names></name>
<name><surname>Islam</surname> <given-names>M. N.</given-names></name>
<name><surname>Rahman</surname> <given-names>M. M.</given-names></name>
<name><surname>Mostofa</surname> <given-names>M. G.</given-names></name>
<name><surname>Khan</surname> <given-names>M. A. R.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>Sorghum: A prospective crop for climatic vulnerability, food and nutritional security</article-title>. <source>J. Agric. Food Res.</source> <volume>8</volume>, <elocation-id>100300</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jafr.2022.100300</pub-id>
</mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Jin</surname> <given-names>S.</given-names></name>
<name><surname>Li</surname> <given-names>D.</given-names></name>
<name><surname>Yun</surname> <given-names>T.</given-names></name>
<name><surname>Tang</surname> <given-names>J.</given-names></name>
<name><surname>Wang</surname> <given-names>K.</given-names></name>
<name><surname>Li</surname> <given-names>S.</given-names></name>
<etal/>
</person-group>. (<year>2025</year>). 
<article-title>Deep learning for three&#x2212;dimensional (3D) plant phenomics</article-title>. <source>Plant Phenomics</source> <volume>7</volume>, <elocation-id>100107</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.plaphe.2025.100107</pub-id>
</mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Jingwen</surname> <given-names>W.</given-names></name>
<name><surname>Hong</surname> <given-names>L.</given-names></name>
</person-group> (<year>2012</year>). 
<article-title>Measurement and analysis of plant leaf area based on image processing</article-title>. <source>Proc. Int. Symp. Inf. Technol. Med. Educ</source>. <volume>2</volume>, <fpage>1070</fpage>&#x2013;<lpage>1074</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ITIME.2012.6291485</pub-id>
</mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Koyama</surname> <given-names>K.</given-names></name>
</person-group> (<year>2023</year>). 
<article-title>Leaf area estimation by photographing leaves sandwiched between transparent clear file folder sheets</article-title>. <source>Horticulturae</source> <volume>9</volume>, <elocation-id>709</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/horticulturae9060709</pub-id>
</mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Li</surname> <given-names>D.</given-names></name>
<name><surname>Shi</surname> <given-names>G.</given-names></name>
<name><surname>Li</surname> <given-names>J.</given-names></name>
<name><surname>Chen</surname> <given-names>Y.</given-names></name>
<name><surname>Zhang</surname> <given-names>S.</given-names></name>
<name><surname>Xiang</surname> <given-names>S.</given-names></name>
<etal/>
</person-group>. (<year>2022</year>). 
<article-title>PlantNet: A dual-function point cloud segmentation network for multiple plant species</article-title>. <source>ISPRS J. Photogramm. Remote Sens.</source> <volume>184</volume>, <fpage>243</fpage>&#x2013;<lpage>263</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.isprsjprs.2022.01.007</pub-id>
</mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Liu</surname> <given-names>Z.</given-names></name>
<name><surname>Zhao</surname> <given-names>J.</given-names></name>
<name><surname>Hu</surname> <given-names>Y.</given-names></name>
<name><surname>Li</surname> <given-names>R.</given-names></name>
<name><surname>Deng</surname> <given-names>Q.</given-names></name>
<name><surname>Guan</surname> <given-names>R.</given-names></name>
<etal/>
</person-group>. (<year>2025</year>). 
<article-title>FACNet: A high-precision pumpkin seedling point cloud organ segmentation method</article-title>. <source>Comput. Electron. Agric.</source> <volume>231</volume>, <elocation-id>110049</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2025.110049</pub-id>
</mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Merto&#x11f;lu</surname> <given-names>K.</given-names></name>
<name><surname>&#x15e;alk</surname> <given-names>Y.</given-names></name>
<name><surname>Sar&#x131;kaya</surname> <given-names>S. K.</given-names></name>
<name><surname>Turgut</surname> <given-names>K.</given-names></name>
<name><surname>Evreneso&#x11f;lu</surname> <given-names>Y.</given-names></name>
<name><surname>&#xc7;evikalp</surname> <given-names>H.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>PLANesT-3D: A new annotated dataset for segmentation of 3D plant point clouds</article-title>. <source>arXiv [preprint]</source> arXiv:2407.21150 doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.2407.21150</pub-id>
</mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Miao</surname> <given-names>T.</given-names></name>
<name><surname>Zhu</surname> <given-names>C.</given-names></name>
<name><surname>Xu</surname> <given-names>T.</given-names></name>
<name><surname>Yang</surname> <given-names>T.</given-names></name>
<name><surname>Li</surname> <given-names>N.</given-names></name>
<name><surname>Zhou</surname> <given-names>Y.</given-names></name>
<etal/>
</person-group>. (<year>2021</year>). 
<article-title>Automatic stem-leaf segmentation of maize shoots using three-dimensional point cloud</article-title>. <source>Comput. Electron. Agric.</source> <volume>187</volume>, <elocation-id>106310</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2021.106310</pub-id>
</mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Mwamahonje</surname> <given-names>A.</given-names></name>
<name><surname>Mdindikasi</surname> <given-names>Z.</given-names></name>
<name><surname>Mchau</surname> <given-names>D.</given-names></name>
<name><surname>Mwenda</surname> <given-names>E.</given-names></name>
<name><surname>Sanga</surname> <given-names>D.</given-names></name>
<name><surname>Garcia-Oliveira</surname> <given-names>A. L.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>Advances in sorghum improvement for climate resilience in the global arid and semi-arid tropics: A review</article-title>. <source>Agronomy</source> <volume>14</volume>, <elocation-id>3025</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agronomy14123025</pub-id>
</mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Nguyen</surname> <given-names>T. T.</given-names></name>
<name><surname>Slaughter</surname> <given-names>D. C.</given-names></name>
<name><surname>Max</surname> <given-names>N.</given-names></name>
<name><surname>Maloof</surname> <given-names>J. N.</given-names></name>
<name><surname>Sinha</surname> <given-names>N.</given-names></name>
</person-group> (<year>2015</year>). 
<article-title>Structured light-based 3D reconstruction system for plants</article-title>. <source>Sensors</source> <volume>15</volume>, <fpage>18587</fpage>&#x2013;<lpage>18612</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/s150818587</pub-id>, PMID: <pub-id pub-id-type="pmid">26230701</pub-id>
</mixed-citation>
</ref>
<ref id="B28">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Paterson</surname> <given-names>A. H.</given-names></name>
<name><surname>Bowers</surname> <given-names>J. E.</given-names></name>
<name><surname>Bruggmann</surname> <given-names>R.</given-names></name>
<name><surname>Dubchak</surname> <given-names>I.</given-names></name>
<name><surname>Grimwood</surname> <given-names>J.</given-names></name>
<name><surname>Gundlach</surname> <given-names>H.</given-names></name>
<etal/>
</person-group>. (<year>2009</year>). 
<article-title>The <italic>Sorghum bicolor</italic> genome and the diversification of grasses</article-title>. <source>Nature</source> <volume>457</volume>, <fpage>551</fpage>&#x2013;<lpage>556</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/nature07723</pub-id>, PMID: <pub-id pub-id-type="pmid">19189423</pub-id>
</mixed-citation>
</ref>
<ref id="B29">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Qi</surname> <given-names>C. R.</given-names></name>
<name><surname>Su</surname> <given-names>H.</given-names></name>
<name><surname>Mo</surname> <given-names>K.</given-names></name>
<name><surname>Guibas</surname> <given-names>L. J.</given-names></name>
</person-group> (<year>2017</year>a). &#x201c;
<article-title>PointNet: Deep learning on point sets for 3D classification and segmentation</article-title>,&#x201d; in <conf-name>Proc. IEEE Conf. Comput. Vis. Pattern Recognit</conf-name> <conf-loc>Honolulu, HI, USA</conf-loc> (<publisher-loc>Piscataway, NJ</publisher-loc>: 
<publisher-name>IEEE</publisher-name>). <fpage>77</fpage>&#x2013;<lpage>85</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/CVPR.2017.16</pub-id>
</mixed-citation>
</ref>
<ref id="B30">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Qi</surname> <given-names>C. R.</given-names></name>
<name><surname>Yi</surname> <given-names>L.</given-names></name>
<name><surname>Su</surname> <given-names>H.</given-names></name>
<name><surname>Guibas</surname> <given-names>L. J.</given-names></name>
</person-group> (<year>2017</year>b). 
<article-title>PointNet++: Deep hierarchical feature learning on point sets in a metric space</article-title>. <source>Adv. Neural Inf. Process. Syst.</source> (<publisher-loc>Red Hook, NY</publisher-loc>: 
<publisher-name>Curran Associates, Inc.</publisher-name>) <volume>30</volume>, <fpage>4</fpage>&#x2013;<lpage>9</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1706.02413</pub-id>
</mixed-citation>
</ref>
<ref id="B31">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Shahzad</surname> <given-names>K.</given-names></name>
<name><surname>Hussain</surname> <given-names>S.</given-names></name>
<name><surname>Arfan</surname> <given-names>M.</given-names></name>
<name><surname>Hussain</surname> <given-names>S.</given-names></name>
<name><surname>Waraich</surname> <given-names>E. A.</given-names></name>
<name><surname>Zamir</surname> <given-names>S.</given-names></name>
<etal/>
</person-group>. (<year>2021</year>). 
<article-title>Exogenously applied gibberellic acid enhances growth and salinity stress tolerance of maize through modulating the morpho-physiological, biochemical and molecular attributes</article-title>. <source>Biomolecules</source> <volume>11</volume>, <elocation-id>1005</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/biom11071005</pub-id>, PMID: <pub-id pub-id-type="pmid">34356629</pub-id>
</mixed-citation>
</ref>
<ref id="B32">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Shen</surname> <given-names>J.</given-names></name>
<name><surname>Wu</surname> <given-names>T.</given-names></name>
<name><surname>Zhao</surname> <given-names>J.</given-names></name>
<name><surname>Wu</surname> <given-names>Z.</given-names></name>
<name><surname>Huang</surname> <given-names>Y.</given-names></name>
<name><surname>Gao</surname> <given-names>P.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>Organ segmentation and phenotypic trait extraction of cotton seedling point clouds based on a 3D lightweight network</article-title>. <source>Agronomy</source> <volume>14</volume>, <elocation-id>1083</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agronomy14051083</pub-id>
</mixed-citation>
</ref>
<ref id="B33">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Song</surname> <given-names>H.</given-names></name>
<name><surname>Wen</surname> <given-names>W.</given-names></name>
<name><surname>Wu</surname> <given-names>S.</given-names></name>
<name><surname>Guo</surname> <given-names>X.</given-names></name>
</person-group> (<year>2025</year>). 
<article-title>Comprehensive review on 3D point cloud segmentation in plants</article-title>. <source>Artif. Intell. Agric</source>. <volume>15</volume>, <fpage>296</fpage>&#x2013;<lpage>315</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.aiia.2025.01.006</pub-id>
</mixed-citation>
</ref>
<ref id="B34">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Tu</surname> <given-names>L.-F.</given-names></name>
<name><surname>Peng</surname> <given-names>Q.</given-names></name>
<name><surname>Li</surname> <given-names>C.-S.</given-names></name>
<name><surname>Zhang</surname> <given-names>A.</given-names></name>
</person-group> (<year>2021</year>). 
<article-title>2D in <italic>situ</italic> method for measuring plant leaf area with camera correction and background color calibration</article-title>. <source>Sci. Program.</source> <volume>2021</volume>, <elocation-id>6650099</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1155/2021/6650099</pub-id>
</mixed-citation>
</ref>
<ref id="B35">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Vayssade</surname> <given-names>J.-A.</given-names></name>
<name><surname>Jones</surname> <given-names>G.</given-names></name>
<name><surname>G&#xe9;e</surname> <given-names>C.</given-names></name>
<name><surname>Paoli</surname> <given-names>J.-N.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>Pixelwise instance segmentation of leaves in dense foliage</article-title>. <source>Comput. Electron. Agric.</source> <volume>195</volume>, <elocation-id>106797</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2022.106797</pub-id>
</mixed-citation>
</ref>
<ref id="B36">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>R.-F.</given-names></name>
<name><surname>Qu</surname> <given-names>H.-R.</given-names></name>
<name><surname>Su</surname> <given-names>W.-H.</given-names></name>
</person-group> (<year>2025</year>). 
<article-title>From sensors to insights: Technological trends in image-based high-throughput plant phenotyping</article-title>. <source>Smart Agric. Technol.</source> <volume>12</volume>, <fpage>101257</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.atech.2025.101257</pub-id>
</mixed-citation>
</ref>
<ref id="B37">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Wu</surname> <given-names>X.</given-names></name>
<name><surname>Jiang</surname> <given-names>L.</given-names></name>
<name><surname>Wang</surname> <given-names>P.-S.</given-names></name>
<name><surname>Liu</surname> <given-names>Z.</given-names></name>
<name><surname>Liu</surname> <given-names>X.</given-names></name>
<name><surname>Qiao</surname> <given-names>Y.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). &#x201c;
<article-title>Point transformer v3: Simpler, faster, stronger</article-title>,&#x201d; in <conf-name>Proc. IEEE/CVF Conf. Comput. Vis. Pattern Recognit</conf-name> (<publisher-loc>Piscataway, NJ</publisher-loc>: 
<publisher-name>IEEE</publisher-name>). doi:&#xa0;<pub-id pub-id-type="doi">10.1109/CVPR52733.2024.00463</pub-id>
</mixed-citation>
</ref>
<ref id="B38">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Xiang</surname> <given-names>L.</given-names></name>
<name><surname>Tang</surname> <given-names>L.</given-names></name>
<name><surname>Gai</surname> <given-names>J.</given-names></name>
<name><surname>Wang</surname> <given-names>L.</given-names></name>
</person-group> (<year>2021</year>). 
<article-title>Measuring stem diameter of sorghum plants in the field using a high-throughput stereo vision system</article-title>. <source>Trans. ASABE</source> <volume>64</volume>, <fpage>1999</fpage>&#x2013;<lpage>2010</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.13031/trans.14156</pub-id>
</mixed-citation>
</ref>
<ref id="B39">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhang</surname> <given-names>Y.</given-names></name>
<name><surname>Xie</surname> <given-names>Y.</given-names></name>
<name><surname>Zhou</surname> <given-names>J.</given-names></name>
<name><surname>Xu</surname> <given-names>X.</given-names></name>
<name><surname>Miao</surname> <given-names>M.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Cucumber seedling segmentation network based on a multiview geometric graph encoder from 3D point clouds</article-title>. <source>Plant Phenomics</source> <volume>6</volume>, <elocation-id>254</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.34133/plantphenomics.0254</pub-id>, PMID: <pub-id pub-id-type="pmid">39415968</pub-id>
</mixed-citation>
</ref>
<ref id="B40">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Zhao</surname> <given-names>H.</given-names></name>
<name><surname>Jiang</surname> <given-names>L.</given-names></name>
<name><surname>Jia</surname> <given-names>J.</given-names></name>
<name><surname>Torr</surname> <given-names>P. H. S.</given-names></name>
<name><surname>Koltun</surname> <given-names>V.</given-names></name>
</person-group> (<year>2021</year>). &#x201c;
<article-title>Point transformer</article-title>,&#x201d; in <conf-name>Proc. IEEE/CVF Int. Conf. Comput. Vis</conf-name> (<publisher-loc>Piscataway, NJ</publisher-loc>: 
<publisher-name>IEEE</publisher-name>). doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ICCV48922.2021.01595</pub-id>
</mixed-citation>
</ref>
<ref id="B41">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhou</surname> <given-names>J.</given-names></name>
<name><surname>Applegate</surname> <given-names>C.</given-names></name>
<name><surname>Alonso</surname> <given-names>A. D.</given-names></name>
<name><surname>Reynolds</surname> <given-names>D.</given-names></name>
<name><surname>Orford</surname> <given-names>S.</given-names></name>
<name><surname>Mackiewicz</surname> <given-names>M.</given-names></name>
<etal/>
</person-group>. (<year>2017</year>). 
<article-title>Leaf-GP: An open and automated software application for measuring growth phenotypes for <italic>Arabidopsis</italic> and wheat</article-title>. <source>Plant Methods</source> <volume>13</volume>, <fpage>117</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s13007-017-0266-3</pub-id>, PMID: <pub-id pub-id-type="pmid">29299051</pub-id>
</mixed-citation>
</ref>
<ref id="B42">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhou</surname> <given-names>Y.</given-names></name>
<name><surname>Qi</surname> <given-names>Y.</given-names></name>
<name><surname>Xiang</surname> <given-names>L.</given-names></name>
</person-group> (<year>2025</year>). 
<article-title>Automatic extraction method of phenotypic parameters for <italic>Phoebe zhennan</italic> seedlings based on 3D point cloud</article-title>. <source>Agriculture</source> <volume>15</volume>, <elocation-id>834</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agriculture15080834</pub-id>
</mixed-citation>
</ref>
<ref id="B43">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhou</surname> <given-names>W.</given-names></name>
<name><surname>Wang</surname> <given-names>X.</given-names></name>
<name><surname>Yang</surname> <given-names>X.</given-names></name>
<name><surname>Hu</surname> <given-names>Y.</given-names></name>
<name><surname>Yi</surname> <given-names>Y.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Skeleton-guided multi-scale dual-coordinate attention aggregation network for retinal blood vessel segmentation</article-title>. <source>Comput. Biol. Med.</source> <volume>181</volume>, <elocation-id>109027</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compbiomed.2024.109027</pub-id>, PMID: <pub-id pub-id-type="pmid">39178808</pub-id>
</mixed-citation>
</ref>
<ref id="B44">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhu</surname> <given-names>J.</given-names></name>
<name><surname>Zhai</surname> <given-names>R.</given-names></name>
<name><surname>Ren</surname> <given-names>H.</given-names></name>
<name><surname>Xie</surname> <given-names>K.</given-names></name>
<name><surname>Du</surname> <given-names>A.</given-names></name>
<name><surname>He</surname> <given-names>X.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>Crops3D: A diverse 3D crop dataset for realistic perception and segmentation toward agricultural applications</article-title>. <source>Sci. Data</source> <volume>11</volume>, <fpage>1438</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41597-024-04290-0</pub-id>, PMID: <pub-id pub-id-type="pmid">39730336</pub-id>
</mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn id="n1" fn-type="custom" custom-type="edited-by">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3152000">Sathishkumar Samiappan</ext-link>, The University of Tennessee, United States</p></fn>
<fn id="n2" fn-type="custom" custom-type="reviewed-by">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3121077">Ajay Kumar Patel</ext-link>, Saint Louis University, United States</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3319693">Pan Zhang</ext-link>, China Agricultural University, China</p></fn>
</fn-group>
<fn-group>
<fn fn-type="abbr" id="abbrev1">
<label>Abbreviations:</label>
<p>PTV2, Point Transformer V2; 3D, three-dimensional; GA<sub>3</sub>, Gibberellin; MRDCA, Multi-radius Dual-Coordinate Attention; PG-InvFR, Point-Graph Involution Feature Refinement; EL Loss, The combination of class-weighted CrossEntropy Loss and Lov&#xe1;sz Loss; PTV3, Point Transformer V3; mR, mean Recall; mF1, mean F1-score; mIoU, mean Intersection over Union; mP, mean Precision; Inv-FR, Involution-Based Feature Refinement; PTV1, Point Transformer; CK, Control check; LAI, leaf area index; LMM, linear mixed-effects model.</p>
</fn>
</fn-group>
</back>
</article>