<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Plant Sci.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Plant Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Plant Sci.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">1664-462X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpls.2026.1743104</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>SoyCountNet: a deep learning framework for counting and locating soybean seeds in field environment</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Liu</surname><given-names>Fei</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Wu</surname><given-names>Qiong</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Wang</surname><given-names>Haoyu</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Han</surname><given-names>Zhongzhi</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/868226/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Wang</surname><given-names>Shudong</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Zhao</surname><given-names>Longgang</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2918527/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Wang</surname><given-names>Zhaohua</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Luan</surname><given-names>Hexiang</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>*</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2720840/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>College of Science and Information Science, Qingdao Agricultural University</institution>, <city>Qingdao</city>,&#xa0;<country country="cn">China</country></aff>
<aff id="aff2"><label>2</label><institution>Qingdao Institute of Software, College of Computer Science and Technology, China University of Petroleum (East China)</institution>, <city>Qingdao</city>,&#xa0;<country country="cn">China</country></aff>
<aff id="aff3"><label>3</label><institution>Institute of Agricultural Information and Economics, Shandong Academy of Agricultural Sciences</institution>, <city>Jinan</city>,&#xa0;<country country="cn">China</country></aff>
<author-notes>
<corresp id="c001"><label>*</label>Correspondence: Hexiang Luan, <email xlink:href="mailto:dbkh@qau.edu.cn">dbkh@qau.edu.cn</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-25">
<day>25</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>17</volume>
<elocation-id>1743104</elocation-id>
<history>
<date date-type="received">
<day>11</day>
<month>11</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>09</day>
<month>02</month>
<year>2026</year>
</date>
<date date-type="rev-recd">
<day>01</day>
<month>02</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2026 Liu, Wu, Wang, Han, Wang, Zhao, Wang and Luan.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Liu, Wu, Wang, Han, Wang, Zhao, Wang and Luan</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-25">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>Accurate counting and spatial localization of soybean seeds&#x2014;particularly Seeds Per Plant (SPP)&#x2014;are critical for yield estimation and cultivar evaluation. In field environments, however, complex backgrounds, pod occlusion, and uneven grain filling make high-precision counting challenging, and traditional methods often struggle to balance accuracy and robustness.</p>
</sec>
<sec>
<title>Methods</title>
<p>To address these challenges, this study proposes SoyCountNet, a deep learning framework for automatic soybean seed counting and localization at the single-plant level under field conditions. The model is built on a self-constructed field-based phenotyping platform and optimized using the lightweight Point-to-Point Network (P2PNet). For feature extraction, a VGG19_BN backbone and a Super Token Sampling Vision Transformer (SViT) module are employed to enhance local feature representation and global contextual understanding. During feature fusion, the Efficient Channel Attention (ECA) mechanism strengthens seed-related features while suppressing interference from leaves, stems, and soil. Furthermore, an improved loss function that combines point-distance constraints with overlap penalties enhances both counting precision and spatial consistency.</p>
</sec>
<sec>
<title>Results</title>
<p>Experimental results demonstrate that SoyCountNet outperforms existing approaches on the field soybean dataset. It achieves a mean absolute error (MAE) of 4.61, a root mean square error (RMSE) of 6.03, and a coefficient of determination (R&#xb2;) of 0.94. The model demonstrates consistent performance across the tested soybean cultivars, providing reliable SPP estimates within the evaluated dataset.</p>
</sec>
<sec>
<title>Discussion</title>
<p>These findings indicate that SoyCountNet offers a reliable and scalable solution for precise soybean seed counting and localization in complex field environments. Its lightweight architecture allows deployment on intelligent agricultural platforms, supporting high-throughput phenotyping, yield prediction, and precision breeding, while providing a foundation for the future development of intelligent and sustainable agricultural technologies.</p>
</sec>
</abstract>
<kwd-group>
<kwd>attention mechanism</kwd>
<kwd>deep learning</kwd>
<kwd>point-to-point network</kwd>
<kwd>precision breeding</kwd>
<kwd>seeds per plant</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was received for this work and/or its publication. This study was funded by the Seed-Industrialized Development Program in Shandong Province (2024LZGC030, 2024LZGC010), Shandong Soybean Industrial Technology System of China (SDAIT-28), National Natural Science Foundation of China (32301895), Ministry of Science and Technology of China with the National Key Research and Development Program (2025YFE0111301), Key R&amp;D Program (Soft Science Project) of Shandong Province, China (2025RZA0601).</funding-statement>
</funding-group>
<counts>
<fig-count count="13"/>
<table-count count="5"/>
<equation-count count="13"/>
<ref-count count="43"/>
<page-count count="18"/>
<word-count count="9203"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Sustainable and Intelligent Phytoprotection</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>With the rapid development of smart agriculture and high-throughput phenotyping (HTP) technologies, automated and precise acquisition of crop phenotypic data has become essential for modern breeding and yield prediction (<xref ref-type="bibr" rid="B4">Fan et&#xa0;al., 2021</xref>; <xref ref-type="bibr" rid="B37">Zavafer et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B15">Liu et&#xa0;al., 2025b</xref>). Soybean (Glycine max), valued for its high protein and oil content, is an economically and ecologically important crop (<xref ref-type="bibr" rid="B17">Mishra et&#xa0;al., 2024</xref>). Soybean yield is influenced by multiple agronomic traits, including pod number, seed number, and their spatial distribution, which serve as key indicators for evaluating population productivity and guiding genetic improvement (<xref ref-type="bibr" rid="B41">Zhao et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B35">Yang et&#xa0;al., 2024</xref>; <xref ref-type="bibr" rid="B40">Zhang et&#xa0;al., 2025</xref>). Among these, the number of seeds per plant is a particularly critical determinant of yield (<xref ref-type="bibr" rid="B9">Li et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B35">Yang et&#xa0;al., 2024</xref>). Traditional seed counting methods, relying on manual observation or semi-automated image analysis, are time-consuming, labor-intensive, and prone to human error. Under complex field conditions&#x2014;characterized by variable lighting, severe occlusion, heterogeneous backgrounds, and incomplete grain filling&#x2014;manual counting often results in substantial inaccuracies (<xref ref-type="bibr" rid="B30">Wattana et&#xa0;al., 2018</xref>). Recent advances in computer vision and deep learning (DL) have provided effective tools to overcome these challenges.</p>
<p>In recent years, DL has demonstrated substantial potential in agricultural computer vision, offering robust solutions for crop phenotyping, object counting, and localization under complex field conditions (<xref ref-type="bibr" rid="B19">Murphy et&#xa0;al., 2024</xref>; <xref ref-type="bibr" rid="B7">Jin et&#xa0;al., 2025</xref>). For instance, region-based detectors such as Faster R-CNN achieve high precision in localizing fruits and plant organs (<xref ref-type="bibr" rid="B2">Behera et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B24">Tian et&#xa0;al., 2025</xref>), whereas single-stage models like YOLO and SSD provide a favorable balance between accuracy and computational efficiency in high-throughput counting tasks (<xref ref-type="bibr" rid="B10">Li et&#xa0;al., 2024</xref>; <xref ref-type="bibr" rid="B36">Yu et&#xa0;al., 2024</xref>). However, anchor-based detection frameworks often encounter difficulties when dealing with dense, small, or occluded targets, resulting in missed detections, redundant predictions, and localization errors. These limitations constrain their applicability in high-density crop counting and fine-grained phenotyping. To address these challenges, recent studies have explored point-based counting strategies and attention-enhanced architectures to improve the detection of small, dense, and occluded targets (<xref ref-type="bibr" rid="B32">Wu et&#xa0;al., 2025</xref>; <xref ref-type="bibr" rid="B7">Jin et&#xa0;al., 2025</xref>).</p>
<p>DL has also been increasingly applied to soybean phenotyping, improving automation and accuracy in feature extraction from complex field images (<xref ref-type="bibr" rid="B22">Okada et&#xa0;al., 2024</xref>; <xref ref-type="bibr" rid="B8">Kwon et&#xa0;al., 2025</xref>). These advancements have facilitated high-throughput data acquisition and data-driven decision-making in breeding, germplasm improvement, and yield prediction (<xref ref-type="bibr" rid="B43">Zhou et&#xa0;al., 2025</xref>). For example, <xref ref-type="bibr" rid="B18">Moeinizade et&#xa0;al. (2022)</xref> employed a CNN&#x2013;LSTM model with UAV-based time-series imagery to estimate soybean maturity, while <xref ref-type="bibr" rid="B10">Li et&#xa0;al. (2024)</xref> proposed SoybeanNet, which integrates a Transformer with point regression for pod counting and localization. <xref ref-type="bibr" rid="B3">Chen et&#xa0;al. (2023)</xref> utilized hyperspectral imaging to quantify leaf traits, and <xref ref-type="bibr" rid="B31">Wei et&#xa0;al. (2023)</xref> applied Kinect 2.0&#x2013;based 3D reconstruction to estimate the leaf area index (LAI). <xref ref-type="bibr" rid="B39">Zhang Z, et&#xa0;al. (2024)</xref> developed DSBEAN to link phenotypes with genetic improvement, and (<xref ref-type="bibr" rid="B14">Liu et al., 2025a</xref>) introduced SmartPod for automatic pod counting under field conditions. Despite these advances, automatic seed counting at the single-plant level remains largely underexplored. Most existing statistical or vision-based approaches exhibit limited robustness in complex field environments, where heterogeneous backgrounds, dense occlusion, and subtle seed textures degrade detection accuracy and spatial consistency. Therefore, developing a robust and scalable DL framework for accurate seed counting under real-world field conditions is essential for intelligent soybean phenotyping and precision breeding.</p>
<p>To address this gap, this study constructed a point-annotated soybean seed dataset using a self-developed field phenotyping platform and proposed SoyCountNet, a deep learning framework built upon the lightweight P2PNet. The framework adopts a multi-level feature optimization strategy: the VGG19_BN backbone enhances feature representation and training stability, while the SViT module captures both local and global contextual information to improve the detection of dense and occluded seeds. During feature fusion, the ECA mechanism highlights seed-related features and suppresses background interference. Moreover, an improved loss function combining Nearest-Neighbor and Target Overlap penalties mitigates uneven point distribution and redundant predictions, thereby enhancing localization consistency and counting robustness. Experimental results demonstrate that SoyCountNet significantly improves seed detection and counting accuracy under complex field conditions while maintaining a lightweight and deployable architecture, providing a scalable solution for intelligent field phenotyping and high-throughput seed counting.</p>
</sec>
<sec id="s2" sec-type="materials|methods">
<label>2</label>
<title>Materials and methods</title>
<sec id="s2_1">
<label>2.1</label>
<title>Experimental conditions, data acquisition, and annotation</title>
<p>Data were acquired using the automated gantry-based field phenotyping system TraitDiscover, equipped with an industrial-grade Trait-RGB camera, as shown in <xref ref-type="fig" rid="f1"><bold>Figure&#xa0;1</bold></xref>, with main specifications listed in <xref ref-type="table" rid="T1"><bold>Table&#xa0;1</bold></xref>. The system supports high-throughput, multi-angle imaging with controlled illumination and automatic positioning, enabling continuous and precise capture of crop samples. The camera was mounted at a fixed height of 1.8 m to capture the full canopy of each soybean plant. To minimize the influence of natural light fluctuations, all images were collected at night under uniform illumination provided by the platform&#x2019;s built-in constant light source. Each plant was photographed from both front and back views to enhance target coverage and counting accuracy.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Schematic diagram of the TraitDiscover high-throughput phenotyping platform.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1743104-g001.tif">
<alt-text content-type="machine-generated">Nighttime outdoor scene showing a large entrance gate with Chinese characters, illuminated by yellow lighting. An inset image highlights a rectangular white monitoring device labeled &#x201c;TOPSCMART,&#x201d; situated below the structure and lit by a bright spotlight.</alt-text>
</graphic></fig>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Parameters of the Trait-RGB.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Field of view</th>
<th valign="middle" align="left">2000mm*1500mm@H=1500mm</th>
<th valign="middle" align="left">Camera sensor type</th>
<th valign="middle" align="left">CMOS</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left" style="">Pixel Resolution</td>
<td valign="middle" align="left" style="">6460*4850(3100 Megapixels)</td>
<td valign="middle" align="left" style="">Frame Rate</td>
<td valign="middle" align="left" style="">8fps</td>
</tr>
<tr>
<td valign="middle" align="left" style="">Focal Length</td>
<td valign="middle" align="left" style="">8mm</td>
<td valign="middle" align="left" style="">Color Type</td>
<td valign="middle" align="left" style="">RGB Color</td>
</tr>
<tr>
<td valign="middle" align="left" style="">Lens Mount</td>
<td valign="middle" align="left" style="">C-Mount</td>
<td valign="middle" align="left" style="">Pixel Size</td>
<td valign="middle" align="left" style="">3.45um*3.45um</td>
</tr>
<tr>
<td valign="middle" align="left" style="">Object Distance</td>
<td valign="middle" align="left" style="">2000mm~3500mm</td>
<td valign="middle" align="left" style="">Exposure Time</td>
<td valign="middle" align="left" style="">46us-2sec</td>
</tr>
<tr>
<td valign="middle" align="left" style="">Camera Sensor Size</td>
<td valign="middle" align="left" style="">&#x2265;14.1mm*10.3mm</td>
<td valign="middle" align="left" style="">Imaging Light Source</td>
<td valign="middle" align="left" style="">Four-band LED supplemental lighting, standard color calibration with a color reference card</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The field experiment was conducted from June 2024 to October 2025 at the National Saline&#x2013;Alkali Land Comprehensive Utilization Technology Innovation Center in Guangrao County, Dongying, Shandong Province, China (37&#xb0;18&#x2032;36&#x2033; N, 118&#xb0;39&#x2032;0&#x2033; E), as shown in <xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2</bold></xref>. The field layout followed a standardized planting scheme with 40 cm row spacing and 10 cm plant spacing. To simulate different salinity conditions, the soil was treated with 0&#x2030; NaCl for the control group and 2.5&#x2030; NaCl for the stress group. Six representative soybean cultivars were cultivated independently under both conditions, with an equal number of images collected under each treatment (1:1 ratio). A total of 800 high-resolution RGB images were obtained at the mature stage, covering diverse cultivars and plant architectures. All images were divided into training, validation, and test sets in an 8:1:1 ratio, with 20 images from each cultivar selected for cross-cultivar robustness evaluation.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Geographical location of the experimental fields.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1743104-g002.tif">
<alt-text content-type="machine-generated">Four-panel image showing: a map of Guangrao County, Dongying, Shandong Province, China with location markers; a photograph of saline-alkali farmland with a large sign and researchers; a detailed field plan labeled as the National Center for Comprehensive Utilization of Saline-Alkali Land Technology Innovation; and a field trial of crops on Yanchi saline-alkali land with rows of labeled plants.</alt-text>
</graphic></fig>
<p>For seed-level annotation, a point-based labeling strategy was adopted, where each soybean seed was marked by a single pixel at its centroid. Three researchers with agronomy expertise independently performed the annotations, which were cross-verified by an expert to minimize subjective bias. For overlapping or partially occluded seeds, only the visible central region was annotated to avoid redundant markings. To ensure annotation consistency, a quality control protocol combining cross-validation and random inspection was applied: after every 100 annotated images, 10% were randomly reviewed, and any coordinates deviating by more than 8 pixels or showing missing or duplicate labels were corrected. The final mean annotation error was controlled within 6 pixels. Annotation data were stored in TXT format, including image paths, seed coordinates, cultivar, and salinity condition for subsequent model training and supervision. Representative examples of the annotated data are presented in <xref ref-type="fig" rid="f3"><bold>Figure&#xa0;3</bold></xref>.</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Representative single-plant soybean images and corresponding point annotations across different cultivars. The examples illustrate variations in background complexity, including pod and stem occlusion, similar soil coloration, and incomplete pod filling.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1743104-g003.tif">
<alt-text content-type="machine-generated">Five vertically aligned photographs of soybean plants on cracked soil display varying pod arrangements and occlusions. Two insets, outlined in red, enlarge selected regions with yellow circles marking pod occlusion, green diamonds for stem occlusion, and blue arrows for poor seed filling, as indicated by a legend at the bottom. Red dots highlight individual seeds or pods.</alt-text>
</graphic></fig>
<p>The number of seeds per plant was counted for all 800 images, with a minimum of 3 seeds, a maximum of 149 seeds, and an average of 52.33 seeds per plant (see <xref ref-type="table" rid="T2"><bold>Table&#xa0;2</bold></xref>), indicating substantial variation in seed density among the samples. From the perspective of the counting task, <xref ref-type="fig" rid="f4"><bold>Figure&#xa0;4</bold></xref> presents the distribution histogram of seed counts per plant. The results show that the seed counts exhibit a continuous range from sparse to dense, rather than being concentrated within a narrow interval. Combined with the density stratification statistics in <xref ref-type="table" rid="T2"><bold>Table&#xa0;2</bold></xref>, the dataset includes 273 sparse samples (&#x2264;37 seeds), 267 medium-density samples (38&#x2013;63 seeds), and 260 dense samples (&gt;63 seeds), providing a relatively balanced distribution across different density levels.</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Statistical summary of seed counts and density stratification in the dataset.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Number of images</th>
<th valign="middle" align="center">Min seeds/plant</th>
<th valign="middle" align="center">Mean seeds/plant</th>
<th valign="middle" align="center">Max seeds/plant</th>
<th valign="middle" align="center">Sparse (&#x2264; 37 seeds)</th>
<th valign="middle" align="center">Medium (38&#x2013;63 seeds)</th>
<th valign="middle" align="center">Dense (&gt; 63 seeds)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center" style="">800</td>
<td valign="middle" align="center" style="">3</td>
<td valign="middle" align="center" style="">52.22</td>
<td valign="middle" align="center" style="">149</td>
<td valign="middle" align="center" style="">273</td>
<td valign="middle" align="center" style="">267</td>
<td valign="middle" align="center" style="">260</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Distribution of seed counts per plant in the constructed dataset. The dashed vertical lines indicate the quantile-based thresholds used for coarse density stratification.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1743104-g004.tif">
<alt-text content-type="machine-generated">Histogram depicting the distribution of seed counts per plant with frequency on the y-axis labeled as number of images and seed count per plant on the x-axis; most common seed count ranges between approximately ten and seventy. Vertical dashed lines mark two specific values, possibly indicating statistical thresholds such as quartiles or means.</alt-text>
</graphic></fig>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>SoyCountNet for soybean seed counting and localization</title>
<p>An improved end-to-end counting and localization framework, SoyCountNet, was developed based on the P2PNet architecture to achieve high-precision seed counting and stable localization under field conditions. The framework addresses challenges arising from complex backgrounds, pod occlusion, and uneven seed filling. Unlike conventional density- or bounding-box-based methods, P2PNet directly learns the spatial distribution of target centers from point annotations by regressing their coordinates, avoiding blurred density estimation and feature drift in high-density or occluded scenarios. Its architecture comprises a backbone network, a feature fusion layer, and a point regression branch, optimized with a multi-task loss function to minimize spatial deviations between predicted and ground-truth points. This design enables end-to-end training, maintains a lightweight structure, and ensures high spatial localization accuracy (<xref ref-type="bibr" rid="B41">Zhao et&#xa0;al., 2023</xref>).</p>
<p>Soybean seed images collected under field conditions often exhibit complex textures, similar foreground&#x2013;background colors, cultivar-specific morphological variations, and weak seed bulging features, which can limit the accuracy and robustness of the original P2PNet. Duplicate, missing, or mislocalized predictions are particularly common in high-density, overlapping, or small-object scenarios. To overcome these limitations, SoyCountNet incorporates four key enhancements:</p>
<list list-type="order">
<list-item>
<p>Backbone Feature Extraction (VGG19_BN): efficient multi-scale spatial encoding for enhanced feature representation.</p></list-item>
<list-item>
<p>Global Context Modeling (SViT): captures long-range dependencies and global semantic information.</p></list-item>
<list-item>
<p>Feature Enhancement (ECA): amplifies discriminative seed features while suppressing background interference.</p></list-item>
<list-item>
<p>Loss Function: combines point-distance constraints and overlap penalties to improve counting precision and spatial consistency.</p></list-item>
</list>
<p>These modules preserve the lightweight, end-to-end design of P2PNet while substantially improving counting accuracy and robustness under complex field conditions.</p>
<p>The processing workflow of SoyCountNet is as follows. Annotated RGB images are first passed through the VGG19_BN backbone to extract high-level features with rich local semantics. The SViT module then models global context, capturing long-range dependencies and semantic relationships, which is particularly beneficial in dense or occluded regions. Extracted features are subsequently enhanced via ECA, which emphasizes seed-specific channel responses while suppressing interference from leaves, stems, and soil, thereby improving localization accuracy and counting stability. Finally, regression and classification heads predict spatial coordinates and confidence scores for each seed. During training, the loss function incorporates Nearest-Neighbor and Target Overlap penalties to regulate spatial distribution, reduce duplicate predictions, and minimize mismatches. Through this hierarchical and modular design, SoyCountNet achieves fast, accurate, and robust soybean seed counting and localization under complex field conditions, providing a reliable solution for high-throughput phenotyping and yield estimation. The overall architecture is illustrated in <xref ref-type="fig" rid="f5"><bold>Figure&#xa0;5</bold></xref>.</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>The SoyCountNet framework for soybean grain counting and localization.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1743104-g005.tif">
<alt-text content-type="machine-generated">Flowchart illustrating a seed counting and localization neural network pipeline, starting with an input image of a plant, passing through a VGG19-BN backbone with multiple stages, followed by STT and ECA blocks, then splitting into regression and classification heads that output regressed points and scores, with penalties applied for nearest-neighbor and target overlap, and arrows indicating upsampling, convolution, summation, and ECA operations.</alt-text>
</graphic></fig>
<sec id="s2_2_1">
<label>2.2.1</label>
<title>VGG19_BN</title>
<p>In the SoyCountNet framework, VGG19_BN serves as the backbone network to extract multi-scale convolutional features from input images, providing a foundation for subsequent tasks. Based on the classical VGG19 architecture, it consists of 16 convolutional layers organized into five convolutional blocks. Each block contains 2&#x2013;4 consecutive 3&#xd7;3 convolutional layers followed by ReLU activation, with batch normalization (BN) added after each block to enhance training stability, accelerate convergence, and improve feature consistency. Layer-wise convolution and pooling progressively expand the receptive field, enabling features to capture information at multiple scales. Low-level features encode textures, edges, and local morphology; mid-level features capture both local patterns and partial context; and high-level features provide semantic understanding in dense or complex regions. These multi-scale feature maps are essential for high-precision point detection: low-level features support edge detection of small seeds, mid-level features balance local texture and contextual information, and high-level features supply semantic cues to distinguish dense or occluded targets. The deep convolutional structure enhances feature representation, enabling stable performance in field images with complex backgrounds, similar soil colors, large pod shape variations, or subtle seed features. In SoyCountNet, outputs from VGG19_BN are fed into both the SViT module for global context modeling and the ECA module for channel-wise attention. Through multi-level feature fusion and channel-weighted enhancement, the framework significantly improves single-seed localization accuracy and robustness, providing a solid foundation for dense, multi-scale soybean seed detection under complex field conditions.</p>
</sec>
<sec id="s2_2_2">
<label>2.2.2</label>
<title>Super vision transformer</title>
<p>In point detection tasks, conventional convolutional neural networks (CNNs) are effective at capturing local features but are limited by restricted receptive fields, which hinder the modeling of long-range dependencies and global context. This limitation is particularly pronounced in field images of densely packed or occluded soybean seeds. To address this issue, the SViT module (<xref ref-type="bibr" rid="B5">Han et&#xa0;al., 2022</xref>) was incorporated after the VGG19_BN backbone in SoyCountNet. SViT tokenizes high-level convolutional features and aggregates semantically related tokens, enabling joint modeling of local details and global context. This design improves the detection of dense, small, and occluded seeds. As illustrated in <xref ref-type="fig" rid="f6"><bold>Figure&#xa0;6</bold></xref>, SViT consists of three core components:</p>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>Structure of SViT Block.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1743104-g006.tif">
<alt-text content-type="machine-generated">Flowchart illustrating a deep learning model for pose estimation, starting from an insect image and progressing through sequential layers: stem, stages, convolution module, STT block, SPPF, neck, and pose modules. An enlarged view details the STT block, showing steps such as depthwise convolution, layer normalization, super token attention, multi-head self-attention, token upsampling, batch normalization, and convolution operations, with iterative token associations and various data connections depicted by arrows.</alt-text>
</graphic></fig>
<list list-type="order">
<list-item>
<p>Convolutional Position Embedding (CPE): encodes spatial information before tokenization to preserve positional structure, allowing the Transformer to distinguish local features at different locations.</p></list-item>
<list-item>
<p>Super Token Attention (STA): aggregates semantically similar tokens into &#x201c;super tokens,&#x201d; integrating local features and modeling global dependencies in dense target regions while suppressing background interference.</p></list-item>
<list-item>
<p>Convolutional Feed-Forward Network (ConvFFN): enhances feature representation after attention, preserving spatial structure and combining global semantics with local details for subsequent point regression and classification.</p></list-item>
</list>
<p>The globally enhanced features output by SViT are then fed into the ECA module for channel weighting, emphasizing discriminative seed features while suppressing interference from leaves, stems, and soil. This provides high-quality input for the regression and classification branches. Experimental results demonstrate that SViT significantly improves the spatial distribution accuracy of predicted points and counting robustness under high-density, occlusion, and complex background conditions. Furthermore, the module maintains the lightweight design of the model, enhancing SoyCountNet&#x2019;s adaptability and generalization for multi-scale soybean seed detection in complex field environments.</p>
</sec>
<sec id="s2_2_3">
<label>2.2.3</label>
<title>Efficient channel attention</title>
<p>Soybean seed images often contain stems, leaves, pods, and soil-colored regions, which introduce irrelevant features and reduce the model&#x2019;s discriminative capability. To emphasize salient seed-related channels while suppressing background noise, SoyCountNet incorporates the ECA module after feature extraction and convolution operations (<xref ref-type="bibr" rid="B28">Wang et&#xa0;al., 2020b</xref>). The core mechanism of ECA is the adaptive modeling of inter-channel dependencies via a one-dimensional convolution. Unlike conventional channel attention mechanisms that rely on fully connected layers for dimensionality reduction and expansion, ECA performs local convolution directly along the channel dimension, preserving inter-channel relationships while substantially reducing computational complexity. Specifically, ECA adaptively determines the kernel size k based on the number of input channels and computes the relative importance of each channel, dynamically adjusting their contribution to the overall feature representation, the formula is shown in <xref ref-type="disp-formula" rid="eq1">Equation 1</xref>.</p>
<disp-formula id="eq1"><label>(1)</label>
<mml:math display="block" id="M1"><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mi>&#x3c8;</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>C</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mrow><mml:mo>|</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:msub><mml:mrow><mml:mi>log</mml:mi></mml:mrow><mml:mn>2</mml:mn></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>C</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mi>&#x3b3;</mml:mi></mml:mfrac><mml:mo>+</mml:mo><mml:mfrac><mml:mi>b</mml:mi><mml:mi>&#x3b3;</mml:mi></mml:mfrac></mml:mrow><mml:mo>|</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>o</mml:mi><mml:mi>d</mml:mi><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math>
</disp-formula>
<p>Where <italic>C</italic> denotes the number of input feature channels and <inline-formula>
<mml:math display="inline" id="im1"><mml:mrow><mml:msub><mml:mrow><mml:mrow><mml:mo>|</mml:mo><mml:mi>t</mml:mi><mml:mo>|</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>o</mml:mi><mml:mi>d</mml:mi><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> is the odd integer closest to <italic>t</italic>.</p>
<p>This mechanism strengthens the response of salient seed channels, enhancing both the localization accuracy and counting robustness of the point-detection branch, particularly under dense, occluded, or morphologically similar targets. When integrated with multi-level features from the VGG19_BN backbone and SViT module, ECA effectively optimizes global feature representation, achieving efficient channel attention without significantly increasing model parameters. Experimental results demonstrate that ECA substantially improves the spatial distribution accuracy of predicted points and enhances counting robustness under complex field conditions. The structural overview of the module is illustrated in <xref ref-type="fig" rid="f7"><bold>Figure&#xa0;7</bold></xref>.</p>
<fig id="f7" position="float">
<label>Figure&#xa0;7</label>
<caption>
<p>Architecture of the ECA module.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1743104-g007.tif">
<alt-text content-type="machine-generated">Block diagram illustrating the Squeeze-and-Excitation (SE) module in neural networks, showing input tensor &#x3c7; with dimensions H by W by C, global average pooling, two fully connected layers, sigmoid activation, and output tensor &#x3c7;-tilde after channel recalibration.</alt-text>
</graphic></fig>
</sec>
<sec id="s2_2_4">
<label>2.2.4</label>
<title>Loss function</title>
<p>The loss function is a core component of SoyCountNet, directly influencing both localization accuracy and counting performance. In the original P2PNet, classification loss and point regression loss are jointly optimized to achieve target detection and localization. However, for field images of densely packed or heavily occluded soybean seeds, the original loss may lead to duplicate predictions or positional deviations, limiting counting accuracy and robustness. To address these issues, SoyCountNet introduces two additional penalty terms: the Nearest-Neighbor Penalty and the Overlap Penalty, which constrain the spatial proximity of predicted points and the degree of overlap, respectively. These penalties enhance localization stability and counting accuracy under complex field conditions. The overall loss function is expressed as <xref ref-type="disp-formula" rid="eq2">Equation 2</xref>.</p>
<disp-formula id="eq2"><label>(2)</label>
<mml:math display="block" id="M2"><mml:mrow><mml:msub><mml:mi>L</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>o</mml:mi><mml:mi>t</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>L</mml:mi><mml:mrow><mml:mi>c</mml:mi><mml:mi>l</mml:mi><mml:mi>s</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x3bb;</mml:mi><mml:mrow><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mi>L</mml:mi><mml:mrow><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x3bb;</mml:mi><mml:mrow><mml:mi>n</mml:mi><mml:mi>e</mml:mi><mml:mi>a</mml:mi><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mi>L</mml:mi><mml:mrow><mml:mi>n</mml:mi><mml:mi>e</mml:mi><mml:mi>a</mml:mi><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x3bb;</mml:mi><mml:mrow><mml:mi>o</mml:mi><mml:mi>v</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mi>l</mml:mi><mml:mi>a</mml:mi><mml:mi>p</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mi>L</mml:mi><mml:mrow><mml:mi>o</mml:mi><mml:mi>v</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mi>l</mml:mi><mml:mi>a</mml:mi><mml:mi>p</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math>
</disp-formula>
<p>Where <inline-formula>
<mml:math display="inline" id="im2"><mml:mrow><mml:msub><mml:mi>L</mml:mi><mml:mrow><mml:mi>c</mml:mi><mml:mi>l</mml:mi><mml:mi>s</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> denotes the classification loss, <inline-formula>
<mml:math display="inline" id="im3"><mml:mrow><mml:msub><mml:mi>L</mml:mi><mml:mrow><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> denotes the point regression loss, <inline-formula>
<mml:math display="inline" id="im4"><mml:mrow><mml:msub><mml:mi>L</mml:mi><mml:mrow><mml:mi>n</mml:mi><mml:mi>e</mml:mi><mml:mi>a</mml:mi><mml:mi>r</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> represents the nearest-neighbor penalty, and <inline-formula>
<mml:math display="inline" id="im5"><mml:mrow><mml:msub><mml:mi>L</mml:mi><mml:mrow><mml:mi>o</mml:mi><mml:mi>v</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mi>l</mml:mi><mml:mi>a</mml:mi><mml:mi>p</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> represents the overlap penalty. The coefficients <inline-formula>
<mml:math display="inline" id="im6"><mml:mrow><mml:msub><mml:mi>&#x3bb;</mml:mi><mml:mrow><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula>
<mml:math display="inline" id="im7"><mml:mrow><mml:msub><mml:mi>&#x3bb;</mml:mi><mml:mrow><mml:mi>n</mml:mi><mml:mi>e</mml:mi><mml:mi>a</mml:mi><mml:mi>r</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula>
<mml:math display="inline" id="im8"><mml:mrow><mml:msub><mml:mi>&#x3bb;</mml:mi><mml:mrow><mml:mi>o</mml:mi><mml:mi>v</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mi>l</mml:mi><mml:mi>a</mml:mi><mml:mi>p</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> are used to balance the contributions and relative weights of each loss term.</p>
<p>The classification loss and regression loss constitute the basic supervision component, the formula is shown in <xref ref-type="disp-formula" rid="eq3">Equation 3</xref>.</p>
<disp-formula id="eq3"><label>(3)</label>
<mml:math display="block" id="M3"><mml:mrow><mml:msub><mml:mi>L</mml:mi><mml:mrow><mml:mi>b</mml:mi><mml:mi>a</mml:mi><mml:mi>s</mml:mi><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>L</mml:mi><mml:mrow><mml:mi>c</mml:mi><mml:mi>l</mml:mi><mml:mi>s</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x3bb;</mml:mi><mml:mrow><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mi>L</mml:mi><mml:mrow><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math>
</disp-formula>
<p>Where <inline-formula>
<mml:math display="inline" id="im9"><mml:mrow><mml:msub><mml:mi>L</mml:mi><mml:mrow><mml:mi>c</mml:mi><mml:mi>l</mml:mi><mml:mi>s</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> employs binary cross-entropy loss to distinguish foreground from background, while <inline-formula>
<mml:math display="inline" id="im10"><mml:mrow><mml:msub><mml:mi>L</mml:mi><mml:mrow><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> uses the Smooth L1 loss function to constrain the distance between predicted points and ground-truth points.</p>
<p>To prevent the model from generating excessive and overly close predicted points around the same target, the Nearest-Neighbor Penalty introduces a constraint on the spatial distribution of predicted points. Let the set of predicted points be <inline-formula>
<mml:math display="inline" id="im11"><mml:mrow><mml:mi>p</mml:mi><mml:mo>=</mml:mo><mml:msubsup><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>p</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:msub><mml:mi>N</mml:mi><mml:mi>p</mml:mi></mml:msub></mml:mrow></mml:msubsup></mml:mrow></mml:math></inline-formula>, and the Euclidean distance between any two points is defined as <xref ref-type="disp-formula" rid="eq4">Equation 4</xref>.</p>
<disp-formula id="eq4"><label>(4)</label>
<mml:math display="block" id="M4"><mml:mrow><mml:msub><mml:mi>d</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mrow><mml:mo>&#x2016;</mml:mo><mml:mrow><mml:msub><mml:mi>p</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>p</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow><mml:mo>&#x2016;</mml:mo></mml:mrow></mml:mrow><mml:mn>2</mml:mn></mml:msub></mml:mrow></mml:math>
</disp-formula>
<p>A penalty is applied when <inline-formula>
<mml:math display="inline" id="im12"><mml:mrow><mml:msub><mml:mi>d</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>&lt;</mml:mo><mml:msub><mml:mi>d</mml:mi><mml:mrow><mml:mi>min</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>, defined as <xref ref-type="disp-formula" rid="eq5">Equation 5</xref>.</p>
<disp-formula id="eq5"><label>(5)</label>
<mml:math display="block" id="M5"><mml:mrow><mml:msub><mml:mi>L</mml:mi><mml:mrow><mml:mi>n</mml:mi><mml:mi>e</mml:mi><mml:mi>a</mml:mi><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:msubsup><mml:mi>N</mml:mi><mml:mi>p</mml:mi><mml:mn>2</mml:mn></mml:msubsup></mml:mrow></mml:mfrac><mml:mstyle displaystyle="true"><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>&#x2260;</mml:mo><mml:mi>j</mml:mi></mml:mrow></mml:munder><mml:mrow><mml:mi>max</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:msub><mml:mi>d</mml:mi><mml:mrow><mml:mi>min</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>d</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:math>
</disp-formula>
<p>This term penalizes predicted points that are too close to each other, effectively reducing duplicate detections, with its effect controlled by the coefficient <inline-formula>
<mml:math display="inline" id="im13"><mml:mrow><mml:msub><mml:mi>&#x3bb;</mml:mi><mml:mrow><mml:mtext>near</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>. Meanwhile, the overlap penalty is applied to mitigate excessive clustering of predictions around a single ground-truth point. Let the set of ground-truth points be <inline-formula>
<mml:math display="inline" id="im14"><mml:mrow><mml:mi>G</mml:mi><mml:mo>=</mml:mo><mml:msubsup><mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>g</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:msub><mml:mi>N</mml:mi><mml:mi>g</mml:mi></mml:msub></mml:mrow></mml:msubsup></mml:mrow></mml:math></inline-formula>, and let <inline-formula>
<mml:math display="inline" id="im15"><mml:mrow><mml:msub><mml:mi>n</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>denote the number of predicted points within a distance <inline-formula>
<mml:math display="inline" id="im16"><mml:mrow><mml:msub><mml:mi>g</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>less than the threshold <inline-formula>
<mml:math display="inline" id="im17"><mml:mrow><mml:msub><mml:mi>r</mml:mi><mml:mi>o</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>. The overlap penalty is then defined as <xref ref-type="disp-formula" rid="eq6">Equation 6</xref>.</p>
<disp-formula id="eq6"><label>(6)</label>
<mml:math display="block" id="M6"><mml:mrow><mml:msub><mml:mi>L</mml:mi><mml:mrow><mml:mi>o</mml:mi><mml:mi>v</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mi>l</mml:mi><mml:mi>a</mml:mi><mml:mi>p</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:msub><mml:mi>N</mml:mi><mml:mi>g</mml:mi></mml:msub></mml:mrow></mml:mfrac><mml:mstyle displaystyle="true"><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:msub><mml:mi>N</mml:mi><mml:mi>g</mml:mi></mml:msub></mml:mrow></mml:munderover><mml:mrow><mml:mi>max</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:msub><mml:mi>n</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:math>
</disp-formula>
<p>This term constrains the redundant distribution of predicted points around a single target, ensuring that each ground-truth point corresponds to only one prediction.</p>
<p>By integrating the Nearest-Neighbor Penalty and Overlap Penalty into the original loss function, SoyCountNet effectively regulates the spatial distribution of predicted points, reducing duplicate detections and overmatching. The weighting coefficients and distance thresholds in the loss function were determined based on training set observations and prior knowledge of point-based counting methods. The weights of the nearest-neighbor and overlap penalties provide sufficient regularization without overshadowing the classification and regression losses. Distance thresholds were set according to the average spatial scale of soybean seeds, triggering penalties only when predicted points are excessively close or clustered. This enhancement significantly improves the model&#x2019;s robustness and counting accuracy in dense or occluded scenarios, while ensuring stable and convergent training and providing a solid foundation for subsequent network optimization and parameter tuning.</p>
</sec>
<sec id="s2_2_5">
<label>2.2.5</label>
<title>Evaluation metrics</title>
<p>To comprehensively evaluate the proposed soybean seed counting framework, multiple aspects were considered, including counting accuracy, robustness, model fit, and statistical reliability. The specific evaluation metrics are as follows:</p>
<p>1. Mean Absolute Error (MAE): quantifies the average absolute deviation between predicted and manually counted values, the formula is shown in <xref ref-type="disp-formula" rid="eq7">Equation 7</xref>.</p>
<disp-formula id="eq7"><label>(7)</label>
<mml:math display="block" id="M7"><mml:mrow><mml:mi>M</mml:mi><mml:mi>A</mml:mi><mml:mi>E</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mi>N</mml:mi></mml:mfrac><mml:mstyle displaystyle="true"><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>N</mml:mi></mml:munderover><mml:mrow><mml:mrow><mml:mo>|</mml:mo><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mover accent="true"><mml:mi>y</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mo>|</mml:mo></mml:mrow></mml:mrow></mml:mstyle></mml:mrow></mml:math>
</disp-formula>
<p>2. Root Mean Square Error (RMSE): measures the deviation between predicted and manually counted values, reflecting the robustness of predictions, the formula is shown in <xref ref-type="disp-formula" rid="eq8">Equation 8</xref>.</p>
<disp-formula id="eq8"><label>(8)</label>
<mml:math display="block" id="M8"><mml:mrow><mml:mi>M</mml:mi><mml:mi>S</mml:mi><mml:mi>E</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mi>N</mml:mi></mml:mfrac><mml:mstyle displaystyle="true"><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>N</mml:mi></mml:munderover><mml:mrow><mml:msup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>-</mml:mo><mml:msub><mml:mover accent="true"><mml:mi>y</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mi>i</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mn>2</mml:mn></mml:msup></mml:mrow></mml:mstyle><mml:mo>,</mml:mo><mml:mi>R</mml:mi><mml:mi>M</mml:mi><mml:mi>S</mml:mi><mml:mi>E</mml:mi><mml:mo>=</mml:mo><mml:msqrt><mml:mrow><mml:mi>M</mml:mi><mml:mi>S</mml:mi><mml:mi>E</mml:mi></mml:mrow></mml:msqrt></mml:mrow></mml:math>
</disp-formula>
<p>3. Coefficient of Determination (<inline-formula>
<mml:math display="inline" id="im18"><mml:mrow><mml:msup><mml:mi>R</mml:mi><mml:mn>2</mml:mn></mml:msup></mml:mrow></mml:math></inline-formula>): quantifies the proportion of variance in the observed data explained by the predictions. A <inline-formula>
<mml:math display="inline" id="im19"><mml:mrow><mml:msup><mml:mi>R</mml:mi><mml:mn>2</mml:mn></mml:msup></mml:mrow></mml:math></inline-formula>value closer to 1 indicates a better fit between the model and the data, the formula is shown in <xref ref-type="disp-formula" rid="eq9">Equation 9</xref>.</p>
<disp-formula id="eq9"><label>(9)</label>
<mml:math display="block" id="M9"><mml:mrow><mml:msup><mml:mi>R</mml:mi><mml:mn>2</mml:mn></mml:msup><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x2212;</mml:mo><mml:mfrac><mml:mrow><mml:mstyle displaystyle="true"><mml:msubsup><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>N</mml:mi></mml:msubsup><mml:mrow><mml:msup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mover accent="true"><mml:mi>y</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mi>i</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mn>2</mml:mn></mml:msup></mml:mrow></mml:mstyle></mml:mrow><mml:mrow><mml:mstyle displaystyle="true"><mml:msubsup><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>N</mml:mi></mml:msubsup><mml:mrow><mml:msup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>&#x2212;</mml:mo><mml:mover accent="true"><mml:mi>y</mml:mi><mml:mo stretchy="true">&#xaf;</mml:mo></mml:mover><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mn>2</mml:mn></mml:msup></mml:mrow></mml:mstyle></mml:mrow></mml:mfrac><mml:mo>,</mml:mo><mml:mi>&amp;#x0020;</mml:mi><mml:mover accent="true"><mml:mi>y</mml:mi><mml:mo stretchy="true">&#xaf;</mml:mo></mml:mover><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mi>N</mml:mi></mml:mfrac><mml:mstyle displaystyle="true"><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>N</mml:mi></mml:munderover><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:mstyle></mml:mrow></mml:math>
</disp-formula>
<p>4. 95% Confidence Interval (CI): used to quantify the uncertainty of the mean of each metric, calculated as <xref ref-type="disp-formula" rid="eq10">Equations 10</xref>&#x2013;<xref ref-type="disp-formula" rid="eq11">11</xref>.</p>
<disp-formula id="eq10"><label>(10)</label>
<mml:math display="block" id="M10"><mml:mrow><mml:msub><mml:mrow><mml:mtext>CI</mml:mtext></mml:mrow><mml:mrow><mml:mn>95</mml:mn><mml:mo>%</mml:mo></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mover accent="true"><mml:mi>x</mml:mi><mml:mo stretchy="true">&#xaf;</mml:mo></mml:mover><mml:mo>&#xb1;</mml:mo><mml:msub><mml:mi>t</mml:mi><mml:mrow><mml:mn>0.975</mml:mn><mml:mo>,</mml:mo><mml:mi>N</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>&#xb7;</mml:mo><mml:mfrac><mml:mi>s</mml:mi><mml:mrow><mml:msqrt><mml:mi>N</mml:mi></mml:msqrt></mml:mrow></mml:mfrac></mml:mrow></mml:math>
</disp-formula>
<disp-formula id="eq11"><label>(11)</label>
<mml:math display="block" id="M11"><mml:mrow><mml:mi>t</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mover accent="true"><mml:mi>d</mml:mi><mml:mo stretchy="true">&#xaf;</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:msub><mml:mi>S</mml:mi><mml:mi>d</mml:mi></mml:msub><mml:mo stretchy="false">/</mml:mo><mml:msqrt><mml:mi>N</mml:mi></mml:msqrt></mml:mrow></mml:mfrac><mml:mo>,</mml:mo><mml:msub><mml:mi>d</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msubsup><mml:mover accent="true"><mml:mi>y</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mi>i</mml:mi><mml:mrow><mml:mi>mod</mml:mi><mml:mi>e</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msubsup><mml:mo>&#x2212;</mml:mo><mml:msubsup><mml:mover accent="true"><mml:mi>y</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mi>i</mml:mi><mml:mrow><mml:mi>b</mml:mi><mml:mi>a</mml:mi><mml:mi>s</mml:mi><mml:mi>e</mml:mi><mml:mi>l</mml:mi><mml:mi>i</mml:mi><mml:mi>n</mml:mi><mml:mi>e</mml:mi></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:mover accent="true"><mml:mi>d</mml:mi><mml:mo>&#xaf;</mml:mo></mml:mover><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mi>N</mml:mi></mml:mfrac><mml:mstyle displaystyle="true"><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>N</mml:mi></mml:munderover><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:mstyle></mml:mrow></mml:math>
</disp-formula>
<p>Where N denotes the number of test samples, <inline-formula>
<mml:math display="inline" id="im20"><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> is the ground-truth count of the i-th sample, <inline-formula>
<mml:math display="inline" id="im21"><mml:mrow><mml:msub><mml:mover accent="true"><mml:mi>y</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>is the predicted count of the i-th sample, and <inline-formula>
<mml:math display="inline" id="im22"><mml:mrow><mml:mover accent="true"><mml:mi>y</mml:mi><mml:mo stretchy="true">&#xaf;</mml:mo></mml:mover></mml:mrow></mml:math></inline-formula>is the mean of the ground-truth counts. s represents the sample standard deviation, <inline-formula>
<mml:math display="inline" id="im23"><mml:mrow><mml:msub><mml:mi>s</mml:mi><mml:mi>d</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>is the standard error of the mean, and <inline-formula>
<mml:math display="inline" id="im24"><mml:mrow><mml:msub><mml:mi>t</mml:mi><mml:mrow><mml:mn>0.975</mml:mn><mml:mo>,</mml:mo><mml:mi>N</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> is the critical value of the t-distribution with N&#x2212;1 degrees of freedom.</p>
<p>Complexity is assessed through model parameters, floating point operations per second (FLOPs), and frames per second (FPS), indicating computational requirements. These combined metrics thus provide comprehensive insights into the model&#x2019;s efficiency and accuracy for soybean stem detection. The corresponding formulas are given in <xref ref-type="disp-formula" rid="eq12">Equations 12</xref>, <xref ref-type="disp-formula" rid="eq13">13</xref>:</p>
<disp-formula id="eq12"><label>(12)</label>
<mml:math display="block" id="M12"><mml:mrow><mml:mi>p</mml:mi><mml:mi>a</mml:mi><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>m</mml:mi><mml:mi>e</mml:mi><mml:mi>t</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mi>s</mml:mi><mml:mo>=</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:mi>r</mml:mi><mml:mo>&#xd7;</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mi>f</mml:mi><mml:mo>&#xd7;</mml:mo><mml:mi>f</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>&#xd7;</mml:mo><mml:mi>o</mml:mi><mml:mo stretchy="false">]</mml:mo><mml:mo>+</mml:mo><mml:mi>o</mml:mi></mml:mrow></mml:math>
</disp-formula>
<disp-formula id="eq13"><label>(13)</label>
<mml:math display="block" id="M13"><mml:mrow><mml:mi>F</mml:mi><mml:mi>L</mml:mi><mml:mi>O</mml:mi><mml:mi>P</mml:mi><mml:mi>s</mml:mi><mml:mo>=</mml:mo><mml:mn>2</mml:mn><mml:mo>&#xd7;</mml:mo><mml:msub><mml:mi>H</mml:mi><mml:mrow><mml:mi>o</mml:mi><mml:mi>u</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>&#xd7;</mml:mo><mml:msub><mml:mi>W</mml:mi><mml:mrow><mml:mi>o</mml:mi><mml:mi>u</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>&#xd7;</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>C</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub><mml:mo>&#xd7;</mml:mo><mml:msup><mml:mi>K</mml:mi><mml:mn>2</mml:mn></mml:msup><mml:mo>&#xd7;</mml:mo><mml:mi>b</mml:mi><mml:mi>i</mml:mi><mml:mi>a</mml:mi><mml:mi>s</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>&#xd7;</mml:mo><mml:msub><mml:mi>C</mml:mi><mml:mrow><mml:mi>o</mml:mi><mml:mi>u</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im25"><mml:mi>r</mml:mi></mml:math></inline-formula> is the input size, <inline-formula>
<mml:math display="inline" id="im26"><mml:mi>f</mml:mi></mml:math></inline-formula> is the size of the convolution kernel, <inline-formula>
<mml:math display="inline" id="im27"><mml:mi>o</mml:mi></mml:math></inline-formula> is the output size, <inline-formula>
<mml:math display="inline" id="im28"><mml:mrow><mml:mi>H</mml:mi><mml:mo>&#xd7;</mml:mo><mml:mi>W</mml:mi></mml:mrow></mml:math></inline-formula> is the size of the output feature map, <inline-formula>
<mml:math display="inline" id="im29"><mml:mrow><mml:msub><mml:mi>C</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>is the input channel, <inline-formula>
<mml:math display="inline" id="im30"><mml:mi>K</mml:mi></mml:math></inline-formula> is the kernel size, <inline-formula>
<mml:math display="inline" id="im31"><mml:mi>s</mml:mi></mml:math></inline-formula>is the stride, and <inline-formula>
<mml:math display="inline" id="im32"><mml:mrow><mml:msub><mml:mi>C</mml:mi><mml:mrow><mml:mi>o</mml:mi><mml:mi>u</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>is the output channel.</p>
<p>These metrics reflect the model&#x2019;s performance in the soybean seed counting task from multiple perspectives, providing a comprehensive evaluation of counting accuracy, robustness, goodness of fit, efficiency, and statistical significance.</p>
</sec>
</sec>
</sec>
<sec id="s3" sec-type="results">
<label>3</label>
<title>Results and analysis</title>
<p>Experiments were conducted on a Linux workstation equipped with an NVIDIA RTX 4090 GPU (24 GB memory) and a 16-core Intel<sup>&#xae;</sup> Xeon<sup>&#xae;</sup> Gold 6430 processor, with 120 GB of system RAM. The deep learning framework was PyTorch, and Python 3.12 was used for programming. Input images were resized to 640 &#xd7; 640 pixels with a batch size of 4. The model was trained using the Adam optimizer with an initial learning rate of 1&#xd7;10<sup>-4</sup>, and a learning rate of 1&#xd7;10<sup>-5</sup> for the FPN module, over 200 epochs. A warm-up followed by a cosine decay learning rate schedule was employed, and gradient clipping with a maximum norm of 0.1 was applied to prevent gradient explosion. Model parameters were categorized into weight decay, no weight decay, and bias groups, with regularization implemented using L2 weight decay (1&#xd7;10<sup>-4</sup>) and Dropout. Data augmentation during training included HSV color jittering, random horizontal flipping, Mosaic augmentation, and random affine transformations. During validation and testing, augmentation was disabled to ensure comparability and reproducibility. Training speed averaged ~4.5 iterations per second, processing approximately 18 images per second. Evaluation was performed every two epochs, with early stopping applied if no improvement was observed on the validation set.</p>
<sec id="s3_1">
<label>3.1</label>
<title>Comparison analysis of experimental results</title>
<p>On the self-constructed single-plant soybean dataset, SoyCountNet was evaluated against six mainstream point-based counting methods, including P2PNet, DM-Count (<xref ref-type="bibr" rid="B27">Wang et&#xa0;al., 2020a</xref>), CSRNet (<xref ref-type="bibr" rid="B11">Li et&#xa0;al., 2018</xref>), BL (<xref ref-type="bibr" rid="B16">Ma et&#xa0;al., 2019</xref>), CAN (<xref ref-type="bibr" rid="B13">Liu et&#xa0;al., 2019</xref>), and FIDTM (<xref ref-type="bibr" rid="B12">Liang et&#xa0;al., 2022</xref>). These methods encompass the major point-counting strategies, such as direct point regression, density distribution matching, density-map regression, holistic CNN-based counting, and convolution&#x2013;Transformer hybrid architectures. Model performance was evaluated using MAE, RMSE, and R&#xb2;, with 95% confidence intervals (CI) reported to quantify uncertainty (<xref ref-type="table" rid="T3"><bold>Table&#xa0;3</bold></xref>). The confidence intervals were computed based on paired statistical comparisons between SoyCountNet and each baseline method across the test samples, and statistical significance was assessed using a paired t-test at the 0.05 significance level.</p>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Comparison of the precision among different counting models.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Model</th>
<th valign="middle" align="center">Backbone</th>
<th valign="middle" align="center">MAE</th>
<th valign="middle" align="center">RMSE</th>
<th valign="middle" align="center">R&#xb2;</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center" style="">SoyCountNet</td>
<td valign="middle" align="center" style=""><italic>VGG19_BN</italic></td>
<td valign="middle" align="center" style="">4.61 &#xb1; 0.79<break/>(3.63, 5.59)</td>
<td valign="middle" align="center" style="">6.03 &#xb1; 0.93<break/>(4.87, 7.19)</td>
<td valign="middle" align="center" style="">0.94 &#xb1; 0.016<break/>(0.92, 0.96)</td>
</tr>
<tr>
<td valign="middle" align="center" style=""><italic>P2PNet</italic></td>
<td valign="middle" align="center" style=""><italic>VGG19_BN</italic></td>
<td valign="middle" align="center" style="">9.20 &#xb1; 0.94<break/>(8.03, 10.37)</td>
<td valign="middle" align="center" style="">11.80 &#xb1; 1.70<break/>(9.69, 13.91)</td>
<td valign="middle" align="center" style="">0.78 &#xb1; 0.07<break/>(0.69, 0.86)</td>
</tr>
<tr>
<td valign="middle" align="center" style=""><italic>DM-Count</italic></td>
<td valign="middle" align="center" style=""><italic>VGG19</italic></td>
<td valign="middle" align="center" style="">6.67 &#xb1; 0.68<break/>(6.00, 7.34)</td>
<td valign="middle" align="center" style="">8.90 &#xb1; 1.22<break/>(7.88, 9.92)</td>
<td valign="middle" align="center" style="">0.86 &#xb1; 0.05<break/>(0.85, 0.95)</td>
</tr>
<tr>
<td valign="middle" align="center" style=""><italic>CSRNet</italic></td>
<td valign="middle" align="center" style=""><italic>VGG16</italic></td>
<td valign="middle" align="center" style="">16.33 &#xb1; 1.77<break/>(14.71, 17.95)</td>
<td valign="middle" align="center" style="">20.15 &#xb1; 2.78<break/>(17.37, 22.93)</td>
<td valign="middle" align="center" style="">0.64 &#xb1; 0.09<break/>(0.55, 0.73)</td>
</tr>
<tr>
<td valign="middle" align="center" style=""><italic>BL</italic></td>
<td valign="middle" align="center" style=""><italic>VGG19</italic></td>
<td valign="middle" align="center" style="">10.35 &#xb1; 1.10<break/>(9.30, 11.40)</td>
<td valign="middle" align="center" style="">12.56 &#xb1; 1.70<break/>(11.10, 14.00)</td>
<td valign="middle" align="center" style="">0.73 &#xb1; 0.07<break/>(0.66, 0.79)</td>
</tr>
<tr>
<td valign="middle" align="center" style=""><italic>CAN</italic></td>
<td valign="middle" align="center" style=""><italic>VGG16</italic></td>
<td valign="middle" align="center" style="">17.67 &#xb1; 1.81<break/>(15.89, 19.44)</td>
<td valign="middle" align="center" style="">18.66 &#xb1; 2.57<break/>(16.53, 20.78)</td>
<td valign="middle" align="center" style="">0.74 &#xb1; 0.07<break/>(0.67, 0.80)</td>
</tr>
<tr>
<td valign="middle" align="center" style=""><italic>FIDTM</italic></td>
<td valign="middle" align="center" style=""><italic>VGG19</italic></td>
<td valign="middle" align="center" style="">9.80 &#xb1; 1.06<break/>(8.83, 10.70)</td>
<td valign="middle" align="center" style="">14.51 &#xb1; 2.00<break/>(12.51, 16.53)</td>
<td valign="middle" align="center" style="">0.78 &#xb1; 0.11<break/>(0.67, 0.89)</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Experimental results demonstrate that SoyCountNet consistently outperforms all baseline methods across all metrics. It achieves an MAE of 4.61 [95% CI 3.63, 5.59], approximately 30.9% lower than the best-performing baseline DM-Count (MAE 6.67 [95% CI 6.00, 7.34]), with an RMSE of 6.03 [95% CI 4.87, 7.19] and R&#xb2; of 0.94 [95% CI 0.92, 0.96], indicating strong agreement between predicted and manually counted values. In contrast, traditional density-map regression models such as CSRNet and CAN perform poorly in dense or occluded regions, exhibiting substantially higher MAE and RMSE, which suggests their susceptibility to counting bias in complex field environments. P2PNet retains the advantage of point localization but yields lower counting accuracy (MAE 9.20, RMSE 11.80). DM-Count achieves lower MAE and RMSE than conventional CNN-based approaches but still accumulates errors in overlapping regions. FIDTM attains an R&#xb2; comparable to P2PNet but exhibits higher MAE and RMSE, indicating limited robustness under challenging field conditions.</p>
<p>The superior performance of SoyCountNet can be attributed to its structural innovations: multi-scale feature extraction via VGG19_BN enhances feature representation; the SViT module enables global context modeling; the ECA mechanism strengthens channel attention; and the optimized loss function enforces spatial consistency. These components work synergistically to achieve precise counting and stable localization under dense, occluded, and complex backgrounds. Although the inference time slightly increases, the substantial improvements in counting accuracy and localization robustness highlight the strong potential of SoyCountNet for high-throughput field phenotyping and yield estimation applications.</p>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Ablation experiments</title>
<p>To evaluate the effectiveness of each key component in SoyCountNet, a series of ablation experiments were conducted on the VGG19_BN backbone, loss function, SViT, and ECA modules. The modified P2PNet (denoted as P2PNet_VGG19_Loss, abbreviated as PV19_L) was used as the baseline model. Under identical datasets and training configurations, the SViT and ECA modules were independently incorporated for comparison, and the complete SoyCountNet was subsequently constructed. The results are presented in <xref ref-type="table" rid="T4"><bold>Table&#xa0;4</bold></xref>, where the best-performing configuration is highlighted in bold.</p>
<table-wrap id="T4" position="float">
<label>Table&#xa0;4</label>
<caption>
<p>Results of ablation experiments. Add various combinations of modules to the baseline model.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Model</th>
<th valign="middle" align="center">MAE</th>
<th valign="middle" align="center">MSE</th>
<th valign="middle" align="center">RMSE</th>
<th valign="middle" align="center">R&#xb2;</th>
<th valign="middle" align="center">Params(M)</th>
<th valign="middle" align="center">FLOPs(G)</th>
<th valign="middle" align="center">FPS</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center" style=""><italic>PV16</italic></td>
<td valign="middle" align="center" style="">72.54 &#xb1; 14.86<break/>(54.08, 91.00)</td>
<td valign="middle" align="center" style="">8010.44 &#xb1; 2897.67 (4410.70, 11611.18)</td>
<td valign="middle" align="center" style="">88.72 &#xb1; 14.46<break/>(70.76, 106.68)</td>
<td valign="middle" align="center" style="">-11.46 &#xb1; 4.07<break/>(-16.51, -6.41)</td>
<td valign="middle" align="center" style="">16.03</td>
<td valign="middle" align="center" style="">317.11</td>
<td valign="middle" align="center" style="">128.24</td>
</tr>
<tr>
<td valign="middle" align="center" style=""><italic>PV19</italic></td>
<td valign="middle" align="center" style="">56.33 &#xb1; 11.10<break/>(42.53, 70.13)</td>
<td valign="middle" align="center" style="">5070.12 &#xb1; 1564.87 (3125.70, 7014.54)</td>
<td valign="middle" align="center" style="">70.80 &#xb1; 11.23<break/>(56.85, 84.75)</td>
<td valign="middle" align="center" style="">-6.91 &#xb1; 2.57<break/>(-10.10, -3.72)</td>
<td valign="middle" align="center" style="">21.34</td>
<td valign="middle" align="center" style="">385.06</td>
<td valign="middle" align="center" style="">110.93</td>
</tr>
<tr>
<td valign="middle" align="center" style=""><italic>PV16_L</italic></td>
<td valign="middle" align="center" style="">9.83 &#xb1; 1.03<break/>(8.55, 11.11)</td>
<td valign="middle" align="center" style="">163.99 &#xb1; 47.36<break/>(105.27, 222.71)</td>
<td valign="middle" align="center" style="">12.91 &#xb1; 1.68<break/>(10.83, 14.99)</td>
<td valign="middle" align="center" style="">0.75 &#xb1; 0.064<break/>(0.67, 0.82)</td>
<td valign="middle" align="center" style="">16.03</td>
<td valign="middle" align="center" style="">317.11</td>
<td valign="middle" align="center" style="">127.93</td>
</tr>
<tr>
<td valign="middle" align="center" style=""><italic>PV19_L</italic></td>
<td valign="middle" align="center" style="">9.20 &#xb1; 0.94<break/>(8.03, 10.37)</td>
<td valign="middle" align="center" style="">146.67 &#xb1; 44.06<break/>(91.94, 201.40)</td>
<td valign="middle" align="center" style="">11.80 &#xb1; 1.70<break/>(9.69, 13.91)</td>
<td valign="middle" align="center" style="">0.78 &#xb1; 0.07<break/>(0.69, 0.86)</td>
<td valign="middle" align="center" style="">21.34</td>
<td valign="middle" align="center" style="">385.06</td>
<td valign="middle" align="center" style="">110.37</td>
</tr>
<tr>
<td valign="middle" align="center" style=""><italic>PV19_L</italic><break/><italic>+SViT</italic></td>
<td valign="middle" align="center" style="">7.59 &#xb1; 0.84<break/>(6.55, 8.63)</td>
<td valign="middle" align="center" style="">102.85 &#xb1; 31.53<break/>(63.69, 142.01)</td>
<td valign="middle" align="center" style="">10.44 &#xb1; 1.47<break/>(8.61, 12.27)</td>
<td valign="middle" align="center" style="">0.85 &#xb1; 0.06<break/>(0.78, 0.93)</td>
<td valign="middle" align="center" style="">33.95</td>
<td valign="middle" align="center" style="">425.41</td>
<td valign="middle" align="center" style="">77.23</td>
</tr>
<tr>
<td valign="middle" align="center" style=""><italic>PV19_L</italic><break/><italic>+ECA</italic></td>
<td valign="middle" align="center" style="">8.49 &#xb1; 0.48<break/>(7.89, 9.09)</td>
<td valign="middle" align="center" style="">123.64 &#xb1; 18.46<break/>(100.70, 146.58)</td>
<td valign="middle" align="center" style="">11.09 &#xb1; 0.79<break/>(10.11, 12.07)</td>
<td valign="middle" align="center" style="">0.79 &#xb1; 0.05<break/>(0.74, 0.85)</td>
<td valign="middle" align="center" style="">21.34</td>
<td valign="middle" align="center" style="">385.06</td>
<td valign="middle" align="center" style="">110.42</td>
</tr>
<tr>
<td valign="middle" align="center" style=""><italic>PV19_L</italic><break/><italic>+SViT+ECA</italic></td>
<td valign="middle" align="center" style="">4.61 &#xb1; 0.79<break/>(3.63, 5.59)</td>
<td valign="middle" align="center" style="">37.87 &#xb1; 12.14<break/>(22.79, 52.95)</td>
<td valign="middle" align="center" style="">6.03 &#xb1; 0.93<break/>(4.87, 7.19)</td>
<td valign="middle" align="center" style="">0.94 &#xb1; 0.016<break/>(0.92, 0.96)</td>
<td valign="middle" align="center" style="">33.95</td>
<td valign="middle" align="center" style="">425.41</td>
<td valign="middle" align="center" style="">77.54</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>PV16 represents P2PNet_VGG16; PV19 represents P2PNet_VGG19; PV16_L and PV19_L denote P2PNet_VGG16 and P2PNet_VGG19 integrated with the proposed loss function, respectively.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>As shown in <xref ref-type="table" rid="T4"><bold>Table&#xa0;4</bold></xref>, the baseline model PV19_L achieved an MAE of 9.20, RMSE of 11.80, and R&#xb2; of 0.78, demonstrating basic counting capability but exhibiting significant errors in regions with high density or occlusion. Incorporating the SViT module reduced the MAE to 7.59, RMSE to 10.44, and increased R<sup>2</sup> to 0.85, indicating that SViT effectively captures global context and long-range dependencies, thereby enhancing feature representation and semantic consistency. In contrast, the PV19_L+ECA model achieved an MAE of 8.49, RMSE of 11.09, and R<sup>2</sup> of 0.79, showing only modest gains over the baseline. This suggests that the lightweight channel attention mechanism ECA contributes to emphasizing key features and suppressing background noise, but its effect is limited without the support of global modeling. Notably, several backbone variants, such as PV16 and PV16_L, perform noticeably worse, highlighting that backbone selection significantly impacts overall performance.</p>
<p>The fully integrated SoyCountNet, combining VGG19_BN, SViT, and ECA, achieved the best performance, with reductions of approximately 49.9% and 48.9% in MAE and RMSE, and a 20.5% improvement in R<sup>2</sup> relative to the baseline. These results demonstrate the complementary effects of the modules: SViT reinforces global feature extraction, ECA enhances local channel responses, and the loss function constrains counting objectives. Together, these components synergistically improve accuracy, robustness, and stability across varying densities. The ablation study thus highlights the nuanced contributions of each module: some components (e.g., SViT) provide substantial improvement, others (e.g., ECA alone) offer moderate gains, and certain backbone choices can degrade performance, providing insight into the design rationale of SoyCountNet in complex field environments.</p>
<p>To further evaluate the lightweight design and deployability of SoyCountNet on intelligent agricultural platforms, different model configurations were assessed in terms of parameter, FLOPs, and FPS. The results indicate that PV16 exhibits lower Params and FLOPs and higher inference speed compared to PV19. Incorporating the ECA module into PV19_L does not significantly increase Params or FLOPs, nor does it noticeably reduce FPS, as the one-dimensional convolutional channel attention mechanism introduces negligible additional computation. Adding the SViT module increases Params and FLOPs to 33.95 M and 425.41 G, respectively, and reduces FPS to 77.54. Nevertheless, the model maintains single-frame inference within the sub-second range, satisfying real-time or near-real-time requirements for field phenotyping and intelligent agricultural applications. Overall, these results demonstrate that SoyCountNet remains lightweight and suitable for practical deployment.</p>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Visualization results and analysis of soybean pod counting</title>
<p>As shown in <xref ref-type="fig" rid="f8"><bold>Figure&#xa0;8</bold></xref>, this study visualized and compared the prediction results of the baseline model (PV19_L) and its improved versions (PV19_L+SViT and PV19_L+ECA) on selected single-plant soybean test samples. The results indicate that the traditional point-based P2PNet exhibits significant limitations in single-plant soybean seed counting, prone to both missed and false detections. On plants with minimal occlusion, P2PNet can achieve relatively accurate counts; however, in densely seeded or heavily occluded regions, the counting errors and standard deviations increase substantially, reflecting insufficient robustness to complex backgrounds.</p>
<fig id="f8" position="float">
<label>Figure&#xa0;8</label>
<caption>
<p>Visualization of prediction results from different counting models: <bold>(a)</bold> SoyCountNet, <bold>(b)</bold> PV19_L+SViT, <bold>(c)</bold> PV19_L+ECA, and <bold>(d)</bold> PV19_L.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1743104-g008.tif">
<alt-text content-type="machine-generated">Five side-by-side panels display two plant stems each with pods, compared across methods. From left to right: original image, followed by predictions from four machine learning models, with red dots marking identified pods and pod counts labeled below.</alt-text>
</graphic></fig>
<p>The PV19_L+SViT model, incorporating the SViT module, captures both local and global features during extraction, effectively improving recognition of dense and occluded seeds. Visualization results show that this model significantly reduces repeated missed detections among adjacent seeds and achieves higher detection accuracy in mildly occluded areas, demonstrating enhanced anti-interference capability and generalization. Moreover, the PV19_L+ECA model, integrating the ECA channel attention mechanism, adaptively adjusts feature channel weights to highlight information relevant to seed counting while suppressing background noise. Experimental observations indicate that this model maintains stable counting performance under severe occlusion and complex backgrounds, further reducing missed detection rates and producing predictions closer to the true distribution.</p>
<p><xref ref-type="fig" rid="f9"><bold>Figure&#xa0;9</bold></xref> provides a more detailed view of model performance in fine-grained regions. Compared with PV19_L, SoyCountNet substantially reduces missed detections in local areas. In the magnified regions, PV19_L still misses some seeds under partial occlusion or uneven seed distribution, whereas SoyCountNet accurately identifies these seeds. Additionally, SoyCountNet maintains stable performance under varying occlusion levels and uneven seed development, demonstrating strong noise resistance and feature robustness. Overall, both the global visualization and local detail analysis further validate the effectiveness and stability of SoyCountNet for high-precision single-plant soybean seed counting in complex field environments.</p>
<fig id="f9" position="float">
<label>Figure&#xa0;9</label>
<caption>
<p>Comparison of counting results between <bold>(a)</bold> SoyCountNet and <bold>(b)</bold> PV19_L.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1743104-g009.tif">
<alt-text content-type="machine-generated">Two vertical soybean plants with dried pods are shown on the left and right. Insets display close-ups of specific pod clusters labeled a and b, each marked with red dots indicating particular features or points of interest within the pods.</alt-text>
</graphic></fig>
<p>Furthermore, the predicted results of different models were fitted against the ground truth on the test set, as shown in <xref ref-type="fig" rid="f10"><bold>Figure&#xa0;10</bold></xref>. The fitting curves clearly illustrate differences in counting performance among the models. SoyCountNet exhibits the highest agreement between predicted and true values, achieving the best overall performance, indicating strong robustness and reliability in complex field conditions and meeting practical requirements for single-plant seed counting. In contrast, the baseline PV19_L demonstrates weaker fitting performance and larger prediction deviations, highlighting the limitations of traditional point-based counting methods under complex backgrounds. By incorporating the SViT and ECA modules, SoyCountNet enhances feature extraction and channel attention, significantly improving adaptability and counting accuracy.</p>
<fig id="f10" position="float">
<label>Figure&#xa0;10</label>
<caption>
<p>Fitting analysis of predicted soybean grain numbers and ground truth values for different models: <bold>(a)</bold> SoyCountNet, <bold>(b)</bold> P2PNetVGG19_BN+SViT, <bold>(c)</bold> P2PNetVGG19_BN+ECA, <bold>(d)</bold> PV19_L.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1743104-g010.tif">
<alt-text content-type="machine-generated">Four-panel scatter plot comparing predicted values to actual values for a regression model, labeled (a) through (d). Each plot shows blue data points, a dashed ideal line, and a solid red fitted line. The panels report regression metrics: (a) shows high accuracy with MAE 4.31 and R squared 0.959; (b) and (c) show moderate accuracy with increasing error values and R squared 0.888 and 0.863 respectively; (d) shows the lowest accuracy, with MAE 9.28 and R squared 0.816. All axes are labeled &#x201c;Actual Values&#x201d; and &#x201c;Predicted Values."</alt-text>
</graphic></fig>
</sec>
<sec id="s3_4">
<label>3.4</label>
<title>Generalization performance across different soybean varieties</title>
<p>Soybean varieties exhibit significant differences in pod morphology, color, pod position, and density. Therefore, the performance of a single-plant seed counting model under cross-varietal conditions is an important indicator of its generalization capability. To evaluate the adaptability of SoyCountNet across diverse genetic backgrounds, six representative salt-tolerant soybean varieties were selected as test samples. For each variety, twenty images were randomly chosen for validation, and the prediction results are summarized in <xref ref-type="table" rid="T5"><bold>Table&#xa0;5</bold></xref>. The results indicate that SoyCountNet maintains high counting accuracy across all varieties, with R&#xb2; values ranging from 0.87 to 0.95, demonstrating stability and robustness under cross-varietal conditions. Among these, D3 and D4 exhibited the best performance, indicating that the model can accurately capture the phenotypic characteristics of these varieties and achieve consistent seed counting. D1 and D2 also showed high consistency with relatively small prediction errors, confirming the model&#x2019;s reliability for varieties with moderate morphological variation. In contrast, D5 and D6 had slightly lower R&#xb2; values and higher RMSE, reflecting certain prediction deviations, which may be attributed to pod overlap, color similarity with the background, or edge blurring caused by light reflection.</p>
<table-wrap id="T5" position="float">
<label>Table&#xa0;5</label>
<caption>
<p>Counting accuracy of SoyCountNet on different soybean varieties.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Number</th>
<th valign="middle" align="center">D1</th>
<th valign="middle" align="center">D2</th>
<th valign="middle" align="center">D3</th>
<th valign="middle" align="center">D4</th>
<th valign="middle" align="center">D5</th>
<th valign="middle" align="center">D6</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">Variety</td>
<td valign="middle" align="center">Qihuang34</td>
<td valign="middle" align="center">Qingnong<break/>Bean 2312</td>
<td valign="middle" align="center">AFA1</td>
<td valign="middle" align="center">AFA1</td>
<td valign="middle" align="center">AFA2</td>
<td valign="middle" align="center">Qingnong Bean 2306</td>
</tr>
<tr>
<td valign="middle" align="center">MAE</td>
<td valign="middle" align="center">3.60</td>
<td valign="middle" align="center">3.20</td>
<td valign="middle" align="center">4.10</td>
<td valign="middle" align="center">3.90</td>
<td valign="middle" align="center">4.65</td>
<td valign="middle" align="center">4.40</td>
</tr>
<tr>
<td valign="middle" align="center">SMSE</td>
<td valign="middle" align="center">4.48</td>
<td valign="middle" align="center">3.92</td>
<td valign="middle" align="center">4.64</td>
<td valign="middle" align="center">4.77</td>
<td valign="middle" align="center">5.73</td>
<td valign="middle" align="center">5.39</td>
</tr>
<tr>
<td valign="middle" align="center">R<sup>2</sup></td>
<td valign="middle" align="center">0.90</td>
<td valign="middle" align="center">0.92</td>
<td valign="middle" align="center">0.95</td>
<td valign="middle" align="center">0.95</td>
<td valign="middle" align="center">0.90</td>
<td valign="middle" align="center">0.87</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The visualization results in <xref ref-type="fig" rid="f9"><bold>Figure&#xa0;9</bold></xref> further support these findings. Overall, the predicted values exhibit a strong linear correlation with ground-truth values, and most scatter points are closely aligned along the diagonal, indicating high consistency between model predictions and manual annotations. D3 and D4 displayed the most compact scatter distributions with minimal deviation, demonstrating superior stability and agreement, while D1 and D2 were similarly well-aligned, indicating accurate counting for samples with regular pod morphology and clear background contrast. By comparison, some points for D5 and D6 fall slightly below the diagonal, suggesting minor underestimation, likely due to pod clustering or complex illumination conditions reducing local feature responses.</p>
<p>As shown in <xref ref-type="fig" rid="f11"><bold>Figure&#xa0;11</bold></xref>, Bar chart comparisons show that although MAE and RMSE vary slightly among different varieties, the overall range is limited (MAE variation within 1.5), indicating consistent performance in cross-varietal transfer. This suggests that the deep semantic features learned by SoyCountNet are highly transferable and not restricted to specific morphological patterns. The feature extraction module (VGG19_SN + SViT) and channel attention mechanism (ECA) effectively focus on seed regions while suppressing background interference, enabling reliable performance under diverse phenotypic conditions. In summary, SoyCountNet demonstrates robust feature representation and generalization ability across multiple soybean varieties, maintaining high-precision seed counting under varying morphological and environmental conditions. This indicates its potential for large-scale, multi-variety field phenotyping and provides reliable methodological support for intelligent yield estimation and varietal evaluation.</p>
<fig id="f11" position="float">
<label>Figure&#xa0;11</label>
<caption>
<p>SoyCountNet detection performance across different soybean varieties.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1743104-g011.tif">
<alt-text content-type="machine-generated">Set of nine panels displaying model performance across six varieties. Top row shows horizontal bar charts comparing MAE, RMSE, and R squared across varieties D1 to D6. Bottom two rows feature scatter plots for each variety, presenting predicted versus actual values with ideal and fitted lines, MAE, RMSE, and R squared annotations per panel.</alt-text>
</graphic></fig>
<p>As shown in <xref ref-type="fig" rid="f12"><bold>Figure&#xa0;12</bold></xref>, SoyCountNet still exhibits certain counting errors under complex field conditions, primarily in the form of missed detections (highlighted by blue boxes). These missed detections generally occur in regions with dense pod overlap or low contrast between pods and the background, where foreground features are difficult to distinguish. Additionally, uneven illumination, shadow occlusion, and surface reflections may lead to information loss during feature extraction, affecting counting completeness. Future work could focus on integrating adaptive feature enhancement modules, cross-scale feature alignment mechanisms, or illumination-invariant enhancement strategies to further improve the model&#x2019;s robustness and generalization in complex field environments.</p>
<fig id="f12" position="float">
<label>Figure&#xa0;12</label>
<caption>
<p>SoyCountNet detection results and missed detections across different soybean varieties (blue boxes indicate missed detections).</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1743104-g012.tif">
<alt-text content-type="machine-generated">Six soybean plant images labeled D1 to D6 are shown in vertical panels, with blue boxes highlighting undetected soybeans on the plants; red dots mark detected soybeans, and a key explains the blue boxes.</alt-text>
</graphic></fig>
<p>Additionally, the dataset of 120 samples from six cultivars was stratified into three groups based on seed density: Sparse, Medium, and Dense. <xref ref-type="fig" rid="f13"><bold>Figure&#xa0;13</bold></xref> presents scatter plots of the ground-truth versus predicted values for each group, with overlaid fitted lines and the global ideal reference line (y = x). Quantitative analysis reveals that the model performs best on sparse samples (MAE = 0.85, RMSE = 1.07, R&#xb2; = 0.941), shows moderate accuracy on medium-density samples (MAE = 2.48, RMSE = 3.08, R&#xb2; = 0.823), and exhibits larger deviations on dense samples (MAE = 5.36, RMSE = 6.95, R&#xb2; = 0.742). The fitted lines indicate a slight underestimation as seed density increases, but overall, the model remains well-aligned with the ideal line, demonstrating its robustness across different density levels. The results suggest that SoyCountNet achieves high-precision counting under sparse to medium-density conditions, while its performance deteriorates under high-density conditions with heavy occlusion, highlighting areas for future improvement.</p>
<fig id="f13" position="float">
<label>Figure&#xa0;13</label>
<caption>
<p>Scatter plots of ground-truth vs. predicted seed counts for different density groups (Sparse, Medium, and Dense).</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1743104-g013.tif">
<alt-text content-type="machine-generated">Scatter plot comparing ground truth versus predicted seed counts is grouped by density: green circles for sparse, yellow squares for medium, red triangles for dense, with corresponding normal and outlier groups. Three fitted lines and an ideal dashed line are shown alongside a legend. Error metrics MAE, RMSE, and R squared for each group are provided in a text box at the bottom right.</alt-text>
</graphic></fig>
</sec>
</sec>
<sec id="s4" sec-type="discussion">
<label>4</label>
<title>Discussion</title>
<sec id="s4_1">
<label>4.1</label>
<title>Architectural advantages and performance gains</title>
<p>Through the synergistic integration of the VGG19_BN, SViT, and ECA modules with a spatial distribution&#x2013;constrained loss function, SoyCountNet achieves notable improvements in soybean seed counting accuracy under complex field conditions. Quantitative evaluations show that SoyCountNet reduces MAE by approximately 49.9% and 30.9% compared with P2PNet and DM-Count, respectively. The non-overlapping 95% confidence intervals of MAE between SoyCountNet and the best baseline indicate a statistically significant improvement (p&lt; 0.05). At the architectural level, the three core modules play complementary roles: VGG19_BN provides multi-scale, fine-grained feature representations as a high-resolution basis for semantic modeling; SViT captures long-range dependencies to alleviate feature confusion in dense and occluded regions; and ECA adaptively reweights channel responses to enhance target discrimination and suppress background interference. Additionally, the proposed Near-Point and Overlap Penalties explicitly constrain the spatial distribution of predicted points, reducing redundancy and aggregation, thereby improving both counting accuracy and spatial consistency. Unlike conventional object detection frameworks (<xref ref-type="bibr" rid="B25">Vijayakumar and Vairavasundaram, 2024</xref>; <xref ref-type="bibr" rid="B20">Muzammul and Li, 2025</xref>), SoyCountNet employs a point-level supervision paradigm that enables simultaneous counting and localization without bounding-box annotations (<xref ref-type="bibr" rid="B42">Zheng et&#xa0;al., 2023</xref>). This paradigm significantly reduces annotation cost and training complexity, while promoting finer spatial alignment between predicted density peaks and seed centroids. Its end-to-end architecture eliminates post-processing steps such as segmentation, filtering, and non-maximum suppression (NMS), thereby enhancing efficiency and facilitating real-time, high-throughput field phenotyping. In terms of generalization, SoyCountNet maintains stable performance across six soybean cultivars, with MAE variation within 1.5, demonstrating robust cross-varietal adaptability and environmental resilience. Overall, SoyCountNet achieves a well-balanced improvement in counting accuracy, localization precision, and computational efficiency through coordinated architectural and loss design, providing a reliable and scalable framework for intelligent breeding and precision agriculture. Its architecture and training paradigm can further be extended to other crop phenotyping applications requiring dense object localization.</p>
</sec>
<sec id="s4_2">
<label>4.2</label>
<title>Occlusion problem and mitigation strategies</title>
<p>In field conditions, soybean pods, stems, and leaves are often densely intertwined, resulting in varying degrees of pod and stem occlusion. Such occlusion obscures seed texture and boundary features, thereby degrading the model&#x2019;s counting accuracy and stability (<xref ref-type="bibr" rid="B32">Wu et&#xa0;al., 2025</xref>). To address these challenges, this study introduces a series of targeted optimization strategies from both architectural and data perspectives. First, inspired by the point-based density estimation paradigm of P2PNet, the soybean seed counting task is reformulated from an explicit detection problem into a density regression problem. By modeling each seed center as a Gaussian kernel, the network achieves a continuous representation of the spatial target distribution. This formulation enables the model to infer potential targets in occluded regions through neighborhood feature aggregation, thereby maintaining high counting accuracy and effectively reducing missed detections even under severe pod overlap or partial occlusion (<xref ref-type="bibr" rid="B29">Wang et&#xa0;al., 2025</xref>). Second, at the feature modeling level, the SViT module is introduced to strengthen long-range semantic dependencies across spatial dimensions. Through its self-attention mechanism, SViT adaptively redistributes spatial feature weights, allowing the network to more effectively perceive contextual cues in occluded regions and recover the semantic integrity of partially hidden pods. Meanwhile, the ECA module enhances feature discriminability in dense seed regions by emphasizing informative channels and suppressing redundant background noise. Finally, at the data level, an occlusion-aware data augmentation strategy is designed to generate training samples with varying occlusion ratios and spatial configurations, thereby improving the model&#x2019;s generalization and robustness under real-world field conditions. In summary, the proposed multi-level occlusion mitigation framework&#x2014;encompassing structural design, feature enhancement, and data augmentation&#x2014;forms a synergistic optimization mechanism. This framework enables SoyCountNet to maintain high counting accuracy and stability even in densely occluded scenarios, providing a reliable technical foundation for high-throughput and automated soybean phenotyping.</p>
</sec>
<sec id="s4_3">
<label>4.3</label>
<title>Limitations and mitigation strategies</title>
<p>Maintaining consistent cultivation and management practices&#x2014;such as planting density, fertilization, and irrigation&#x2014;is critical for data comparability and reproducibility (<xref ref-type="bibr" rid="B1">Angidi et&#xa0;al., 2025</xref>). The relatively low metrics observed here largely reflect the stress imposed by saline-alkali soil on soybean physiology, which reduces root uptake and photosynthetic efficiency, thereby decreasing pod number and yield (<xref ref-type="bibr" rid="B6">Hasanuzzaman et&#xa0;al., 2022</xref>). Although SoyCountNet performed robustly under these conditions, its accuracy may still be affected by soil salinity fluctuations. For image acquisition, a ground-based platform enabled close-range photography with ease of deployment and operational efficiency, suitable for small-scale field experiments. However, imaging is constrained by plant height and canopy density, often requiring manual adjustment of camera angles or plant manipulation to expose pods, limiting data collection flexibility and continuity (<xref ref-type="bibr" rid="B33">Xu et&#xa0;al., 2023</xref>). The current platform offers only three degrees of freedom and relies on manual assistance. Future improvements may involve multi-degree-of-freedom systems, such as adjustable 3D gimbals or robotic arms, to enable multi-angle, non-destructive imaging and enhance model transferability. For dataset construction, point-level annotations were used instead of polygonal or bounding-box labels, reducing labeling time and cost by roughly 80% while providing supervision suited for dense counting tasks (<xref ref-type="bibr" rid="B34">Xu et&#xa0;al., 2025</xref>). Nonetheless, minor human errors, such as missed or slightly shifted points, may subtly affect density learning and spatial modeling. Semi-automatic annotation combined with active learning could iteratively refine labels, improving consistency and accuracy. Overall, SoyCountNet achieves high-precision, robust soybean seed counting under <italic>in situ</italic> conditions, though data acquisition flexibility, environmental adaptability, and annotation consistency remain areas for improvement. Future work will explore multimodal data fusion, high-throughput dynamic phenotyping, and weakly supervised learning to advance intelligent, automated, and generalizable crop phenotyping systems.</p>
</sec>
<sec id="s4_4">
<label>4.4</label>
<title>Implications for future applications and extensions</title>
<p>Building on the identified limitations, SoyCountNet provides a novel technical framework and practical approach for addressing the challenges of automatic crop counting in complex field environments. Its lightweight architecture and strong generalization capability enable real-time deployment across diverse intelligent agricultural platforms, including mobile imaging systems, ground robots, and automated phenotyping stations, thereby significantly enhancing the efficiency and timeliness of field data acquisition and phenotypic analysis. Future improvements can be pursued along three main directions: data acquisition, feature modeling, and cross-domain transfer. First, in data acquisition, multi-degree-of-freedom imaging systems can enable multi-angle, non-destructive <italic>in situ</italic> capture, mitigating occlusion and incomplete visual information while enhancing model adaptability and robustness under complex natural conditions. Second, in challenging environments such as saline-alkali fields, where data distributions are uneven and visual features are degraded, integrating multimodal data sources (e.g., multispectral, infrared, or depth images) is recommended to improve environmental robustness and yield estimation reliability (<xref ref-type="bibr" rid="B23">Singh et&#xa0;al., 2024</xref>). In terms of annotation and model training, semi-supervised, self-supervised, or active learning strategies can reduce reliance on manual labeling through iterative human&#x2013;model interaction. These approaches can mitigate annotation bias, improve generalization, and facilitate the construction of larger and more diverse datasets (<xref ref-type="bibr" rid="B21">Nivetha et&#xa0;al., 2025</xref>). Furthermore, incorporating additional soybean cultivars in future studies could provide a more comprehensive evaluation of SoyCountNet&#x2019;s generalization ability. In addition, cross-crop transfer learning&#x2014;from soybean to other crops such as rice, maize, and wheat&#x2014;could also be explored for applicability beyond soybean. Finally, systematic research into real-time optimization and multimodal fusion mechanisms may lay the foundation for a unified intelligent phenotyping framework. Through large-scale validation across diverse ecological conditions, SoyCountNet has the potential to evolve into a core technological tool linking field observation with intelligent decision-making, thereby advancing precision agriculture, digital breeding, and sustainable crop production (<xref ref-type="bibr" rid="B26">Visakh et&#xa0;al., 2024</xref>).</p>
</sec>
</sec>
<sec id="s5" sec-type="conclusions">
<label>5</label>
<title>Conclusion</title>
<p>Accurate counting of soybean seeds per plant is critical for yield estimation and cultivar evaluation. This study presents SoyCountNet, a deep learning framework for non-destructive, <italic>in situ</italic> counting and localization of individual soybean seeds under field conditions, built upon a self-developed high-throughput phenotyping platform. The model integrates VGG19_BN backbone, SViT, and ECA modules, enabling effective capture of both local and global features while mitigating occlusion from stems and pods and feature loss due to incomplete seed development. An improved loss function incorporating nearest-neighbor and target-overlap penalties further enhances counting accuracy and spatial localization consistency. Experimental results demonstrate that SoyCountNet significantly outperforms existing methods on the in-house field soybean dataset, achieving a mean absolute error of 4.61 (95% CI: 3.63&#x2013;5.59) and a coefficient of determination of 0.94 (95% CI: 0.92&#x2013;0.96), with robust performance across different cultivars, providing reliable support for yield prediction and precision breeding. The lightweight architecture and strong generalization ability of SoyCountNet enable deployment across diverse intelligent agricultural platforms. Future integration with multi-view imaging, multimodal sensing, and cross-crop transfer learning could further improve robustness, adaptability, and real-time performance, supporting the development of unified intelligent phenotyping systems, automated yield estimation, and sustainable precision agriculture.</p>
</sec>
</body>
<back>
<sec id="s6" sec-type="data-availability">
<title>Data availability statement</title>
<p>The data analyzed in this study is subject to the following licenses/restrictions: The dataset generated and analyzed during the current study is not publicly available due to institutional policy and ongoing related research, but it can be made available from the corresponding author upon reasonable request. Requests to access these datasets should be directed to FL, <email xlink:href="mailto:dbkh@qau.edu.cn">dbkh@qau.edu.cn</email>.</p></sec>
<sec id="s7" sec-type="author-contributions">
<title>Author contributions</title>
<p>FL: Writing &#x2013; original draft. QW: Writing &#x2013; original draft. HW: Data curation, Writing &#x2013; review &amp; editing. ZH: Writing &#x2013; review &amp; editing. SW: Writing &#x2013; review &amp; editing. LZ: Writing &#x2013; review &amp; editing. ZW: Writing &#x2013; review &amp; editing. HL: Writing &#x2013; review &amp; editing.</p></sec>
<sec id="s9" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p></sec>
<sec id="s10" sec-type="ai-statement">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p></sec>
<sec id="s11" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p></sec>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Angidi</surname> <given-names>S.</given-names></name>
<name><surname>Madankar</surname> <given-names>K.</given-names></name>
<name><surname>Tehseen</surname> <given-names>M. M.</given-names></name>
<name><surname>Bhatla</surname> <given-names>A.</given-names></name>
</person-group> (<year>2025</year>). 
<article-title>Advanced high-throughput phenotyping techniques for managing abiotic stress in agricultural crops&#x2014;A comprehensive review</article-title>. <source>Crops</source> <volume>5</volume>, <fpage>8</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/crops5020008</pub-id>
</mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Behera</surname> <given-names>T. K.</given-names></name>
<name><surname>Bakshi</surname> <given-names>S.</given-names></name>
<name><surname>Nappi</surname> <given-names>M.</given-names></name>
<name><surname>Sa</surname> <given-names>P. K.</given-names></name>
</person-group> (<year>2023</year>). 
<article-title>Superpixel-based multiscale CNN approach toward multiclass object segmentation from UAV-captured aerial images</article-title>. <source>IEEE Journal of Selected Topics in Applied Earth Observations and Remote Sensing</source>. <volume>16</volume>, <fpage>1771</fpage>&#x2013;<lpage>1784</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/JSTARS.2023.3239119</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Chen</surname> <given-names>Z.</given-names></name>
<name><surname>Wang</surname> <given-names>J.</given-names></name>
<name><surname>Jin</surname> <given-names>J.</given-names></name>
</person-group> (<year>2023</year>). 
<article-title>Fully automated proximal hyperspectral imaging system for high-resolution and high-quality <italic>in vivo</italic> soybean phenotyping</article-title>. <source>Precis. Agric.</source> <volume>24</volume>, <fpage>2395</fpage>&#x2013;<lpage>2415</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11119-023-10045-5</pub-id>
</mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Fan</surname> <given-names>J.</given-names></name>
<name><surname>Zhang</surname> <given-names>Y.</given-names></name>
<name><surname>Wen</surname> <given-names>W.</given-names></name>
<name><surname>Gu</surname> <given-names>S.</given-names></name>
<name><surname>Lu</surname> <given-names>X.</given-names></name>
<name><surname>Guo</surname> <given-names>X.</given-names></name>
</person-group> (<year>2021</year>). 
<article-title>The future of Internet of Things in agriculture: Plant high-throughput phenotypic platform</article-title>. <source>J. Cleaner Production</source> <volume>280</volume>, <fpage>123651</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jclepro.2020.123651</pub-id>
</mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Han</surname> <given-names>K.</given-names></name>
<name><surname>Wang</surname> <given-names>Y.</given-names></name>
<name><surname>Chen</surname> <given-names>H.</given-names></name>
<name><surname>Chen</surname> <given-names>X.</given-names></name>
<name><surname>Guo</surname> <given-names>J.</given-names></name>
<name><surname>Liu</surname> <given-names>Z.</given-names></name>
<etal/>
</person-group>. (<year>2022</year>). 
<article-title>A survey on vision transformer</article-title>. <source>IEEE Trans. Pattern Anal. Mach. Intell.</source> <volume>45</volume>, <fpage>87</fpage>&#x2013;<lpage>110</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TPAMI.2022.3152247</pub-id>, PMID: <pub-id pub-id-type="pmid">35180075</pub-id>
</mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name><surname>Hasanuzzaman</surname> <given-names>M.</given-names></name>
<name><surname>Parvin</surname> <given-names>K.</given-names></name>
<name><surname>Anee</surname> <given-names>T. I.</given-names></name>
<name><surname>Masud</surname> <given-names>A. A. C.</given-names></name>
<name><surname>Nowroz</surname> <given-names>F.</given-names></name>
</person-group> (<year>2022</year>). &#x201c;
<article-title>Salt stress responses and tolerance in soybean</article-title>,&#x201d; in <source>Plant Stress Physiology &#x2013; Perspectives in Agriculture</source>. (<publisher-loc>London, UK</publisher-loc>: 
<publisher-name>IntechOpen</publisher-name>).
</mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Jin</surname> <given-names>C.</given-names></name>
<name><surname>Zhou</surname> <given-names>L.</given-names></name>
<name><surname>Pu</surname> <given-names>Y.</given-names></name>
<name><surname>Zhang</surname> <given-names>C.</given-names></name>
<name><surname>Qi</surname> <given-names>H.</given-names></name>
<name><surname>Zhao</surname> <given-names>Y.</given-names></name>
</person-group> (<year>2025</year>). 
<article-title>Application of deep learning for high-throughput phenotyping of seed: a review</article-title>. <source>Artif. Intell. Rev.</source> <volume>58</volume>, <fpage>76</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s10462-024-11079-5</pub-id>
</mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Kwon</surname> <given-names>H.</given-names></name>
<name><surname>Lee</surname> <given-names>S. H.</given-names></name>
<name><surname>Kim</surname> <given-names>M. Y.</given-names></name>
<name><surname>Ha</surname> <given-names>J.</given-names></name>
</person-group> (<year>2025</year>). 
<article-title>Development of an automated phenotyping platform and identification of a novel QTL for drought tolerance in soybean</article-title>. <source>Plant Phenomics</source>. <fpage>100102</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.plaphe.2025.100102</pub-id>, PMID: <pub-id pub-id-type="pmid">41416200</pub-id>
</mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Li</surname> <given-names>M.</given-names></name>
<name><surname>Liu</surname> <given-names>Y.</given-names></name>
<name><surname>Wang</surname> <given-names>C.</given-names></name>
<name><surname>Yang</surname> <given-names>X.</given-names></name>
<name><surname>Li</surname> <given-names>D.</given-names></name>
<name><surname>Zhang</surname> <given-names>X.</given-names></name>
<etal/>
</person-group>. (<year>2020</year>). 
<article-title>Identification of traits contributing to high and stable yields in different soybean varieties across three Chinese latitudes</article-title>. <source>Front. Plant Sci.</source> <volume>10</volume>, <elocation-id>1642</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpls.2019.01642</pub-id>, PMID: <pub-id pub-id-type="pmid">32038668</pub-id>
</mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Li</surname> <given-names>J.</given-names></name>
<name><surname>Magar</surname> <given-names>R. T.</given-names></name>
<name><surname>Chen</surname> <given-names>D.</given-names></name>
<name><surname>Lin</surname> <given-names>F.</given-names></name>
<name><surname>Wang</surname> <given-names>D.</given-names></name>
<name><surname>Yin</surname> <given-names>X.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>SoybeanNet: Transformer-based convolutional neural network for soybean pod counting from Unmanned Aerial Vehicle (UAV) images</article-title>. <source>Comput. Electron. Agric.</source> <volume>220</volume>, <fpage>108861</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2024.108861</pub-id>
</mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Li</surname> <given-names>Y.</given-names></name>
<name><surname>Zhang</surname> <given-names>X.</given-names></name>
<name><surname>Chen</surname> <given-names>D.</given-names></name>
</person-group> (<year>2018</year>). &#x201c;
<article-title>Csrnet: Dilated convolutional neural networks for understanding the highly congested scenes</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE conference on computer vision and pattern recognition</conf-name>. <fpage>1091</fpage>&#x2013;<lpage>1100</lpage>.
</mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Liang</surname> <given-names>D.</given-names></name>
<name><surname>Xu</surname> <given-names>W.</given-names></name>
<name><surname>Zhu</surname> <given-names>Y.</given-names></name>
<name><surname>Zhou</surname> <given-names>Y.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>Focal inverse distance transform maps for crowd localization</article-title>. <source>IEEE Trans. Multimedia</source> <volume>25</volume>, <fpage>6040</fpage>&#x2013;<lpage>6052</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TMM.2022.3203870</pub-id>
</mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Liu</surname> <given-names>W.</given-names></name>
<name><surname>Salzmann</surname> <given-names>M.</given-names></name>
<name><surname>Fua</surname> <given-names>P.</given-names></name>
</person-group> (<year>2019</year>). &#x201c;
<article-title>Context-aware crowd counting</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE/CVF conference on computer vision and pattern recognition</conf-name>. <fpage>5099</fpage>&#x2013;<lpage>5108</lpage>.
</mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Liu</surname> <given-names>F.</given-names></name>
<name><surname>Wang</surname> <given-names>S.</given-names></name>
<name><surname>Pang</surname> <given-names>S.</given-names></name>
<name><surname>Han</surname> <given-names>Z.</given-names></name>
<name><surname>Zhao</surname> <given-names>L.</given-names></name>
</person-group> (<year>2025</year>a). 
<article-title>SmartPod: an automated framework for high-precision soybean pod counting in field phenotyping</article-title>. <source>Agronomy</source> <volume>15</volume>, <fpage>791</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agronomy15040791</pub-id>
</mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Liu</surname> <given-names>F.</given-names></name>
<name><surname>Wang</surname> <given-names>S.</given-names></name>
<name><surname>Zhao</surname> <given-names>L.</given-names></name>
</person-group> (<year>2025</year>b). 
<article-title>Research progress and prospect of intelligent high-throughput crop phenotyping platform</article-title>. <source>J. Crop Health</source> <volume>77</volume>, <fpage>156</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s10343-025-01228-3</pub-id>
</mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Ma</surname> <given-names>Z.</given-names></name>
<name><surname>Wei</surname> <given-names>X.</given-names></name>
<name><surname>Hong</surname> <given-names>X.</given-names></name>
<name><surname>Gong</surname> <given-names>Y.</given-names></name>
</person-group> (<year>2019</year>). &#x201c;
<article-title>Bayesian loss for crowd count estimation with point supervision</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE/CVF international conference on computer vision</conf-name>. <fpage>6142</fpage>&#x2013;<lpage>6151</lpage>.
</mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Mishra</surname> <given-names>R.</given-names></name>
<name><surname>Tripathi</surname> <given-names>M. K.</given-names></name>
<name><surname>Sikarwar</surname> <given-names>R. S.</given-names></name>
<name><surname>Singh</surname> <given-names>Y.</given-names></name>
<name><surname>Tripathi</surname> <given-names>N.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Soybean (Glycine max L. Merrill): A multipurpose legume shaping our world</article-title>. <source>Plant Cell Biotechnol. Mol. Biol.</source> <volume>25</volume>, <fpage>17</fpage>&#x2013;<lpage>37</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.56557/pcbmb/2024/v25i3-48643</pub-id>
</mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Moeinizade</surname> <given-names>S.</given-names></name>
<name><surname>Pham</surname> <given-names>H.</given-names></name>
<name><surname>Han</surname> <given-names>Y.</given-names></name>
<name><surname>Dobbels</surname> <given-names>A.</given-names></name>
<name><surname>Hu</surname> <given-names>G.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>An applied deep learning approach for estimating soybean relative maturity from UAV imagery to aid plant breeding decisions</article-title>. <source>Mach. Learn. Appl.</source> <volume>7</volume>, <fpage>100233</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.mlwa.2021.100233</pub-id>
</mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Murphy</surname> <given-names>K. M.</given-names></name>
<name><surname>Ludwig</surname> <given-names>E.</given-names></name>
<name><surname>Gutierrez</surname> <given-names>J.</given-names></name>
<name><surname>Gehan</surname> <given-names>M. A.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Deep learning in image-based plant phenotyping</article-title>. <source>Annu. Rev. Plant Biol.</source> <volume>75</volume>, <fpage>771</fpage>&#x2013;<lpage>795</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1146/annurev-arplant-070523-042828</pub-id>, PMID: <pub-id pub-id-type="pmid">38382904</pub-id>
</mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Muzammul</surname> <given-names>M.</given-names></name>
<name><surname>Li</surname> <given-names>X.</given-names></name>
</person-group> (<year>2025</year>). 
<article-title>Comprehensive review of deep learning-based tiny object detection: challenges, strategies, and future directions</article-title>. <source>Knowledge Inf. Syst.</source> <volume>67</volume>, <fpage>3825</fpage>&#x2013;<lpage>3913</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s10115-025-02375-9</pub-id>
</mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Nivetha</surname> <given-names>R.</given-names></name>
<name><surname>Sriharipriya</surname> <given-names>K. C.</given-names></name>
<name><surname>Balusamy</surname> <given-names>B.</given-names></name>
</person-group> (<year>2025</year>). 
<article-title>Self-supervised learning graphical neural network driven prediction model for path-planning and navigation in smart sustainable agriculture</article-title>. <source>IEEE Access</source>. <volume>13</volume>, <fpage>151235</fpage>&#x2013;<lpage>151257</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ACCESS.2025.3602476</pub-id>
</mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Okada</surname> <given-names>M.</given-names></name>
<name><surname>Barras</surname> <given-names>C.</given-names></name>
<name><surname>Toda</surname> <given-names>Y.</given-names></name>
<name><surname>Hamazaki</surname> <given-names>K.</given-names></name>
<name><surname>Ohmori</surname> <given-names>Y.</given-names></name>
<name><surname>Yamasaki</surname> <given-names>Y.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>High-throughput phenotyping of soybean biomass: conventional trait estimation and novel latent feature extraction using UAV remote sensing and deep learning models</article-title>. <source>Plant Phenomics</source>. <volume>6</volume>, <fpage>0244</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.34133/plantphenomics.0244</pub-id>, PMID: <pub-id pub-id-type="pmid">39252878</pub-id>
</mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Singh</surname> <given-names>R.</given-names></name>
<name><surname>Nisha</surname> <given-names>R.</given-names></name>
<name><surname>Naik</surname> <given-names>R.</given-names></name>
<name><surname>Upendar</surname> <given-names>K.</given-names></name>
<name><surname>Nickhil</surname> <given-names>C.</given-names></name>
<name><surname>Deka</surname> <given-names>S. C.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Sensor fusion techniques in deep learning for multimodal fruit and vegetable quality assessment: A comprehensive review</article-title>. <source>J. Food measurement characterization</source> <volume>18</volume>, <fpage>8088</fpage>&#x2013;<lpage>8109</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11694-024-02789-z</pub-id>
</mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Tian</surname> <given-names>C.</given-names></name>
<name><surname>Wang</surname> <given-names>J.</given-names></name>
<name><surname>Zheng</surname> <given-names>D.</given-names></name>
<name><surname>Li</surname> <given-names>Y.</given-names></name>
<name><surname>Zhang</surname> <given-names>X.</given-names></name>
</person-group> (<year>2025</year>). 
<article-title>Oat Ears Detection and Counting Model in Natural Environment Based on Improved Faster R-CNN</article-title>. <source>Agronomy</source>. <volume>15</volume>:<issue>3</issue>, <fpage>536</fpage>.
</mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Vijayakumar</surname> <given-names>A.</given-names></name>
<name><surname>Vairavasundaram</surname> <given-names>S.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Yolo-based object detection models: A review and its applications</article-title>. <source>Multimedia Tools Appl.</source> <volume>83</volume>, <fpage>83535</fpage>&#x2013;<lpage>83574</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11042-024-18872-y</pub-id>
</mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Visakh</surname> <given-names>R. L.</given-names></name>
<name><surname>Anand</surname> <given-names>S.</given-names></name>
<name><surname>Reddy</surname> <given-names>S. B.</given-names></name>
<name><surname>Jha</surname> <given-names>U. C.</given-names></name>
<name><surname>Sah</surname> <given-names>R. P.</given-names></name>
<name><surname>Beena</surname> <given-names>R.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Precision phenotyping in crop science: from plant traits to gene discovery for climate-smart agriculture</article-title>. <source>Plant Breed</source>. <volume>0</volume>, <fpage>1</fpage>&#x2013;<lpage>29</lpage> doi:&#xa0;<pub-id pub-id-type="doi">10.1111/pbr.13228</pub-id>
</mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>B.</given-names></name>
<name><surname>Liu</surname> <given-names>H.</given-names></name>
<name><surname>Samaras</surname> <given-names>D.</given-names></name>
<name><surname>Nguyen</surname> <given-names>M. H.</given-names></name>
</person-group> (<year>2020</year>a). 
<article-title>Distribution matching for crowd counting</article-title>. <source>Adv. Neural Inf. Process. Syst.</source> <volume>33</volume>, <fpage>1595</fpage>&#x2013;<lpage>1607</lpage>.
</mixed-citation>
</ref>
<ref id="B28">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>Q.</given-names></name>
<name><surname>Wu</surname> <given-names>B.</given-names></name>
<name><surname>Zhu</surname> <given-names>P.</given-names></name>
<name><surname>Li</surname> <given-names>P.</given-names></name>
<name><surname>Zuo</surname> <given-names>W.</given-names></name>
<name><surname>Hu</surname> <given-names>Q.</given-names></name>
</person-group> (<year>2020</year>b). &#x201c;
<article-title>ECA-Net: Efficient channel attention for deep convolutional neural networks</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE/CVF conference on computer vision and pattern recognition</conf-name>. <fpage>11534</fpage>&#x2013;<lpage>11542</lpage>.
</mixed-citation>
</ref>
<ref id="B29">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>M.</given-names></name>
<name><surname>Zhou</surname> <given-names>X.</given-names></name>
<name><surname>Chen</surname> <given-names>Y.</given-names></name>
</person-group> (<year>2025</year>). 
<article-title>A comprehensive survey of crowd density estimation and counting</article-title>. <source>IET Image Process.</source> <volume>19</volume>, <fpage>e13328</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1049/ipr2.13328</pub-id>
</mixed-citation>
</ref>
<ref id="B30">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wattana</surname> <given-names>M.</given-names></name>
<name><surname>Siriluk</surname> <given-names>B.</given-names></name>
<name><surname>Khotwit</surname> <given-names>S.</given-names></name>
</person-group> (<year>2018</year>). 
<article-title>Counting and separating damaged seeds of soybean seeds using image processing</article-title>. <source>Int. J. Advanced Science Eng. Inf. Technol.</source> <volume>8</volume>, <fpage>1366</fpage>&#x2013;<lpage>1371</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.18517/ijaseit.8.4.6513</pub-id>
</mixed-citation>
</ref>
<ref id="B31">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wei</surname> <given-names>B.</given-names></name>
<name><surname>Ma</surname> <given-names>X.</given-names></name>
<name><surname>Guan</surname> <given-names>H.</given-names></name>
<name><surname>Yu</surname> <given-names>M.</given-names></name>
<name><surname>Yang</surname> <given-names>C.</given-names></name>
<name><surname>He</surname> <given-names>H.</given-names></name>
<etal/>
</person-group>. (<year>2023</year>). 
<article-title>Dynamic simulation of leaf area index for the soybean canopy based on 3D reconstruction</article-title>. <source>Ecol. Inf.</source> <volume>75</volume>, <fpage>102070</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ecoinf.2023.102070</pub-id>
</mixed-citation>
</ref>
<ref id="B32">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wu</surname> <given-names>Q.</given-names></name>
<name><surname>Liu</surname> <given-names>F.</given-names></name>
<name><surname>Han</surname> <given-names>Z.</given-names></name>
<name><surname>Wang</surname> <given-names>H.</given-names></name>
<name><surname>Liu</surname> <given-names>H.</given-names></name>
<name><surname>Xin</surname> <given-names>N.</given-names></name>
<etal/>
</person-group>. (<year>2025</year>). 
<article-title>SPCNet: an intelligent field-based soybean seed counting algorithm for salinity stress response evaluation</article-title>. <source>J. Crop Health</source> <volume>77</volume>, <fpage>145</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s10343-025-01215-8</pub-id>
</mixed-citation>
</ref>
<ref id="B33">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Xu</surname> <given-names>X.</given-names></name>
<name><surname>Geng</surname> <given-names>Q.</given-names></name>
<name><surname>Gao</surname> <given-names>F.</given-names></name>
<name><surname>Xiong</surname> <given-names>D.</given-names></name>
<name><surname>Qiao</surname> <given-names>H.</given-names></name>
<name><surname>Ma</surname> <given-names>X.</given-names></name>
</person-group> (<year>2023</year>). 
<article-title>Segmentation and counting of wheat spike grains based on deep learning and textural feature</article-title>. <source>Plant Methods</source> <volume>19</volume>, <fpage>77</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s13007-023-01062-6</pub-id>, PMID: <pub-id pub-id-type="pmid">37528413</pub-id>
</mixed-citation>
</ref>
<ref id="B34">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Xu</surname> <given-names>B.</given-names></name>
<name><surname>Zhang</surname> <given-names>J.</given-names></name>
<name><surname>Tang</surname> <given-names>Z.</given-names></name>
<name><surname>Zhang</surname> <given-names>Y.</given-names></name>
<name><surname>Xu</surname> <given-names>L.</given-names></name>
<name><surname>Lu</surname> <given-names>H.</given-names></name>
<etal/>
</person-group>. (<year>2025</year>). 
<article-title>Nighttime environment enables robust field-based high-throughput plant phenotyping: A system platform and a case study on rice</article-title>. <source>Comput. Electron. Agric.</source> <volume>235</volume>, <fpage>110337</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2025.110337</pub-id>
</mixed-citation>
</ref>
<ref id="B35">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Yang</surname> <given-names>S.</given-names></name>
<name><surname>Zheng</surname> <given-names>L.</given-names></name>
<name><surname>Wu</surname> <given-names>T.</given-names></name>
<name><surname>Sun</surname> <given-names>S.</given-names></name>
<name><surname>Zhang</surname> <given-names>M.</given-names></name>
<name><surname>Li</surname> <given-names>M.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>High-throughput soybean pods high-quality segmentation and seed-per-pod estimation for soybean plant breeding</article-title>. <source>Eng. Appl. Artif. Intell.</source> <volume>129</volume>, <fpage>107580</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.engappai.2023.107580</pub-id>
</mixed-citation>
</ref>
<ref id="B36">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Yu</surname> <given-names>C.</given-names></name>
<name><surname>Feng</surname> <given-names>J.</given-names></name>
<name><surname>Zheng</surname> <given-names>Z.</given-names></name>
<name><surname>Guo</surname> <given-names>J.</given-names></name>
<name><surname>Hu</surname> <given-names>Y.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>A lightweight SOD-YOLOv5n model-based winter jujube detection and counting method deployed on Android</article-title>. <source>Computers and Electronics in Agriculture</source>. <volume>218</volume>, <fpage>108701</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2024.108701</pub-id>
</mixed-citation>
</ref>
<ref id="B37">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zavafer</surname> <given-names>A.</given-names></name>
<name><surname>Bates</surname> <given-names>H.</given-names></name>
<name><surname>Mancilla</surname> <given-names>C.</given-names></name>
<name><surname>Ralph</surname> <given-names>P. J.</given-names></name>
</person-group> (<year>2023</year>). 
<article-title>Phenomics: conceptualization and importance for plant physiology</article-title>. <source>Trends Plant Sci.</source> <volume>28</volume>, <fpage>1004</fpage>&#x2013;<lpage>1013</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.tplants.2023.03.023</pub-id>, PMID: <pub-id pub-id-type="pmid">37137749</pub-id>
</mixed-citation>
</ref>
<ref id="B38">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhang</surname> <given-names>Q. Y.</given-names></name>
<name><surname>Fan</surname> <given-names>K. J.</given-names></name>
<name><surname>Tian</surname> <given-names>Z.</given-names></name>
<name><surname>Guo</surname> <given-names>K.</given-names></name>
<name><surname>Su</surname> <given-names>W. H.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>High-Precision automated soybean phenotypic feature extraction based on deep learning and computer vision</article-title>. <source>Plants</source> <volume>13</volume>, <fpage>2613</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/plants13182613</pub-id>, PMID: <pub-id pub-id-type="pmid">39339587</pub-id>
</mixed-citation>
</ref>
<ref id="B39">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhang</surname> <given-names>Z.</given-names></name>
<name><surname>Jin</surname> <given-names>X.</given-names></name>
<name><surname>Rao</surname> <given-names>Y.</given-names></name>
<name><surname>Wan</surname> <given-names>T.</given-names></name>
<name><surname>Wang</surname> <given-names>X.</given-names></name>
<name><surname>Li</surname> <given-names>J.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>DSBEAN: An innovative framework for intelligent soybean breeding phenotype analysis based on various main stem structures and deep learning methods</article-title>. <source>Comput. Electron. Agric.</source> <volume>224</volume>, <fpage>109135</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2024.109135</pub-id>
</mixed-citation>
</ref>
<ref id="B40">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhang</surname> <given-names>C.</given-names></name>
<name><surname>Zha</surname> <given-names>B.</given-names></name>
<name><surname>Yuan</surname> <given-names>R.</given-names></name>
<name><surname>Zhao</surname> <given-names>K.</given-names></name>
<name><surname>Sun</surname> <given-names>J.</given-names></name>
<name><surname>Liu</surname> <given-names>X.</given-names></name>
<etal/>
</person-group>. (<year>2025</year>). 
<article-title>Identification of quantitative trait loci for node number, pod number, and seed number in soybean</article-title>. <source>Int. J. Mol. Sci.</source> <volume>26</volume>, <fpage>2300</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/ijms26052300</pub-id>, PMID: <pub-id pub-id-type="pmid">40076921</pub-id>
</mixed-citation>
</ref>
<ref id="B41">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhao</surname> <given-names>J.</given-names></name>
<name><surname>Kaga</surname> <given-names>A.</given-names></name>
<name><surname>Yamada</surname> <given-names>T.</given-names></name>
<name><surname>Komatsu</surname> <given-names>K.</given-names></name>
<name><surname>Hirata</surname> <given-names>K.</given-names></name>
<name><surname>Kikuchi</surname> <given-names>A.</given-names></name>
<etal/>
</person-group>. (<year>2023</year>). 
<article-title>Improved field-based soybean seed counting and localization with feature level considered</article-title>. <source>Plant Phenomics</source> <volume>5</volume>, <fpage>0026</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.34133/plantphenomics.0026</pub-id>, PMID: <pub-id pub-id-type="pmid">36939414</pub-id>
</mixed-citation>
</ref>
<ref id="B42">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zheng</surname> <given-names>H.</given-names></name>
<name><surname>Fan</surname> <given-names>X.</given-names></name>
<name><surname>Bo</surname> <given-names>W.</given-names></name>
<name><surname>Yang</surname> <given-names>X.</given-names></name>
<name><surname>Tjahjadi</surname> <given-names>T.</given-names></name>
<name><surname>Jin</surname> <given-names>S.</given-names></name>
</person-group> (<year>2023</year>). 
<article-title>A multiscale point-supervised network for counting maize tassels in the wild</article-title>. <source>Plant Phenomics</source> <volume>5</volume>, <fpage>0100</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.34133/plantphenomics.0100</pub-id>, PMID: <pub-id pub-id-type="pmid">37791249</pub-id>
</mixed-citation>
</ref>
<ref id="B43">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhou</surname> <given-names>L.</given-names></name>
<name><surname>Han</surname> <given-names>D.</given-names></name>
<name><surname>Sun</surname> <given-names>G.</given-names></name>
<name><surname>Liu</surname> <given-names>Y.</given-names></name>
<name><surname>Yan</surname> <given-names>X.</given-names></name>
<name><surname>Jia</surname> <given-names>H.</given-names></name>
<etal/>
</person-group>. (<year>2025</year>). 
<article-title>Soybean yield estimation and lodging discrimination based on lightweight UAV and point cloud deep learning</article-title>. <source>Plant Phenomics</source>. <volume>7</volume>, <fpage>100028</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.plaphe.2025.100028</pub-id>, PMID: <pub-id pub-id-type="pmid">41415158</pub-id>
</mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn id="n1" fn-type="custom" custom-type="edited-by">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2091313">Qingwei Zhuang</ext-link>, Wuhan University, China</p></fn>
<fn id="n2" fn-type="custom" custom-type="reviewed-by">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2960497">Jiale Feng</ext-link>, Northernvue Corporation, United States</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3086797">Anto Lourdu Xavier Raj Arockia Selvarathinam</ext-link>, Grand Valley State University, United States</p></fn>
</fn-group>
</back>
</article>