<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Plant Sci.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Plant Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Plant Sci.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">1664-462X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpls.2026.1783587</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>A federated learning with Large-Small Kernel Attention Network for image classification</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Liu</surname><given-names>Tianzhe</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3351858/overview"/>
<xref ref-type="author-notes" rid="fn003"><sup>&#x2020;</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project-administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Xie</surname><given-names>Jing</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Dong</surname><given-names>Heng</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>*</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2924957/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project-administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>Fujian Police College</institution>, <city>Fuzhou</city>,&#xa0;<country country="cn">China</country></aff>
<aff id="aff2"><label>2</label><institution>Logistics Management Center of Fuzhou Customs District</institution>, <city>Fuzhou</city>,&#xa0;<country country="cn">China</country></aff>
<aff id="aff3"><label>3</label><institution>Fuzhou Institute of Technology</institution>, <city>Fuzhou</city>,&#xa0;<country country="cn">China</country></aff>
<author-notes>
<corresp id="c001"><label>*</label>Correspondence: Heng Dong, <email xlink:href="mailto:ypoofml@126.com">ypoofml@126.com</email></corresp>
<fn fn-type="other" id="fn003">
<p>&#x2020;ORCID: Tianzhe Liu, <uri xlink:href="https://orcid.org/0009-0009-6315-4262">orcid.org/0009-0009-6315-4262</uri></p></fn>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-20">
<day>20</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>17</volume>
<elocation-id>1783587</elocation-id>
<history>
<date date-type="received">
<day>08</day>
<month>01</month>
<year>2026</year>
</date>
<date date-type="accepted">
<day>30</day>
<month>01</month>
<year>2026</year>
</date>
<date date-type="rev-recd">
<day>28</day>
<month>01</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2026 Liu, Xie and Dong.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Liu, Xie and Dong</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-20">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<p>Image data acquisition often involves cross-platform, cross-device, and multi-source heterogeneous data issues, posing challenges for data security and privacy protection in collaborative learning. Traditional centralized learning paradigms struggle to balance multi-institutional collaboration needs with stringent data security requirements, while existing Federated Learning (FL) frameworks frequently exhibit significant performance degradation when handling the complex features inherent in images. To address these gaps, this study introduces FL-LSNet, a novel federated learning framework integrated with a lightweight Large-Small Network (LSNet). Built upon a robust client-server architecture, FL-LSNet safeguards local data privacy through decentralized preprocessing while addressing the challenges of long-tailed data via dynamic weight adjustment mechanisms within the server-side aggregator. The core of the framework, LSNet, implements a &#x201c;See Large, Focus Small&#x201d; strategy: (1) Large Kernel Perceptrons (LKP): Capture global contextual dependencies. (2) Small Kernel Attention (SKA): Facilitate fine-grained local feature fusion. Empirical results demonstrate that LSNet reduces computational overhead by 7% compared with Swin Transformer, while enhancing feature representation capability by 19% relative to the baseline model. Extensive evaluations across three diverse datasets reveal that FL-LSNet consistently outperforms state-of-the-art federated algorithms, including FedAvg and MOON, achieving an accuracy range of 84.32% to 98.92%. Ablation studies further validate the efficacy of the FedAvg-LSNet integration, which surpassed the baseline by 6.15%, achieving performance metrics exceeding 98%. This research establishes a scalable paradigm for multi-stakeholder data collaboration and offers new insights into the lightweight vertical adaptation of federated learning in public safety, dynamic monitoring, risk early warning, intelligent agriculture and medical diagnosis.</p>
</abstract>
<kwd-group>
<kwd>attention network</kwd>
<kwd>federated learning</kwd>
<kwd>image classification</kwd>
<kwd>Large-Scale Kernel Attention</kwd>
<kwd>lightweight</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was received for this work and/or its publication. This work was supported in part by the Major Scientific Research Project for Technology Promotes Police under Grant (2025YZ040003).</funding-statement>
</funding-group>
<counts>
<fig-count count="6"/>
<table-count count="7"/>
<equation-count count="14"/>
<ref-count count="29"/>
<page-count count="16"/>
<word-count count="9477"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Sustainable and Intelligent Phytoprotection</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>With the rapid advancement of digital transformation, image classification has become a fundamental technique supporting a wide range of real-world applications, including dynamic monitoring and risk early warning, intelligent agriculture and medical diagnosis. In practical deployment scenarios, image data are typically collected by heterogeneous devices operating under diverse acquisition standards, sensor characteristics, and environmental conditions. As a result, multi-source datasets often exhibit substantial distributional discrepancies, which severely impair the generalization ability of conventional centralized learning models. Effectively exploiting such decentralized and heterogeneous data while preserving data privacy has therefore become a critical challenge in modern artificial intelligence research.</p>
<p>Federated learning (FL) (<xref ref-type="bibr" rid="B14">Luo et&#xa0;al., 2025</xref>; <xref ref-type="bibr" rid="B25">Yurdem et&#xa0;al., 2024</xref>) has emerged as a promising distributed learning paradigm to address data isolation and privacy concerns. In FL, multiple clients collaboratively train a shared global model by transmitting model parameters or gradient updates instead of raw data, thereby enabling privacy-preserving knowledge aggregation. In theory, this paradigm facilitates cross-device and cross-institutional collaboration without violating data ownership constraints, offering a viable solution for large-scale learning under decentralized data settings.</p>
<p>To investigate the applicability of federated learning in realistic and high-impact scenarios, this study focuses on tomato disease image classification. Tomatoes are among the most economically significant vegetable crops worldwide, and ensuring stable and efficient production is vital for global food security. However, tomato diseases cause tens of billions of dollars in annual economic losses and pose persistent threats to sustainability (<xref ref-type="bibr" rid="B23">Wang et&#xa0;al., 2025a</xref>; <xref ref-type="bibr" rid="B24">Wu et&#xa0;al., 2024</xref>). Traditional disease diagnosis methods rely heavily on manual inspection and expert experience, which suffer from low efficiency, high subjectivity, limited scalability, and high labor costs. These limitations make them inadequate for precision control that demands large-scale, real-time, and accurate disease monitoring (<xref ref-type="bibr" rid="B28">Zhao et&#xa0;al., 2024</xref>).</p>
<p>Although federated learning offers numerous advantages, it still faces inherent challenges when applied to image classification tasks. As illustrated in <xref ref-type="fig" rid="f1"><bold>Figure&#xa0;1</bold></xref>, both data heterogeneity and system heterogeneity jointly affect the convergence behavior and stability of the model. In this context, data heterogeneity refers to the Non-Independent and Identically Distributed (non-IID) nature of data across clients. Such non-IID distributions typically arise from discrepancies along two primary dimensions: differences in data acquisition protocols and variations in sensor characteristics. Specifically, differences in acquisition protocols are often caused by the lack of unified standards during data collection, such as inconsistencies in imaging angles, illumination conditions, or annotation criteria for tomato disease images. In contrast, variations in sensor characteristics are related to the inherent properties of data acquisition devices, including differences in camera resolution (e.g., 1080p versus 4K cameras, which lead to varying levels of image detail) and sensor sensitivity, which in turn affect noise levels and color fidelity in the captured images. Consequently, in a typical federated learning workflow, each client independently optimizes its local model and periodically uploads model updates to a central server for aggregation. Beyond these challenges, the widely adopted Federated Averaging (FedAvg) (<xref ref-type="bibr" rid="B15">McMahan et&#xa0;al., 2017</xref>) algorithm is particularly susceptible to the issue of client drift, whereby local models converge to distinct client-specific optima due to data heterogeneity. This divergence often results in slow convergence and degraded performance of the aggregated global model under heterogeneous data distributions.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Federated learning framework for image classification across heterogeneous client devices, illustrating local training and server-side aggregation under non-IID data distributions.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1783587-g001.tif">
<alt-text content-type="machine-generated">Diagram illustrating a cloud server at the top managing machine learning model relationships for three tasks labeled A, B, and C, each with networked icons for mobile, surveillance, and camcorder devices, showing arrows for uploading model parameters and distributing model relationships.</alt-text>
</graphic></fig>
<p>To address the above challenges, this work aims to enhance both feature representation capability and aggregation robustness in federated image classification. Specifically, we propose a novel framework termed Federated Learning with Large&#x2013;Small Kernel Attention Network (FL-LSNet). In this framework, LSNet (<xref ref-type="bibr" rid="B22">Wang et&#xa0;al., 2025b</xref>) denotes a lightweight convolutional neural network backbone designed for federated settings, whose core objective is to improve disease feature extraction while maintaining computational efficiency on heterogeneous client devices.</p>
<p>At the architectural level, LSNet is built upon a key building block called Large&#x2013;Small Kernel Convolution (LS Convolution) (<xref ref-type="bibr" rid="B22">Wang et&#xa0;al., 2025b</xref>). LS Convolution is a hybrid convolutional module that integrates large-kernel convolution for capturing global contextual information with small-kernel convolution for fine-grained local feature refinement. To further emphasize disease-relevant regions, an attention mechanism is embedded within the LS Convolution, enabling the network to adaptively focus on salient lesion patterns while suppressing background noise. It is important to note that LSNet (<xref ref-type="bibr" rid="B22">Wang et&#xa0;al., 2025b</xref>) refers to the overall network architecture, whereas LS Convolution constitutes its fundamental convolutional module; the two concepts are related but not interchangeable.</p>
<p>Furthermore, to mitigate feature-level discrepancies induced by non-IID data distributions across clients, FL-LSNet incorporates an adaptive feature-matching attention module based on an encoder&#x2013;decoder structure. This module facilitates implicit alignment of intermediate feature representations among clients, thereby improving the consistency of learned representations during federated aggregation.</p>
<p>The main contributions of this study are summarized as follows:</p>
<list list-type="order">
<list-item>
<p>We propose FL-LSNet, a federated learning framework that adaptively adjusts aggregation weights according to local model convergence behavior and data quality, improving training stability under heterogeneous conditions.</p></list-item>
<list-item>
<p>Inspired by LSNet (<xref ref-type="bibr" rid="B22">Wang et&#xa0;al., 2025b</xref>), a lightweight neural network backbone tailored for federated image classification, whose core component is the proposed LS Convolution that combines large-kernel global perception, small-kernel local refinement, and attention-based feature enhancement.</p></list-item>
<list-item>
<p>We introduce an adaptive feature-matching attention module with an encoder&#x2013;decoder architecture to alleviate feature misalignment across clients caused by non-IID data.</p></list-item>
<list-item>
<p>We implement an efficient and scalable FL prototype system integrating communication optimization, asynchronous updates, and heterogeneous device support. Extensive experiments on tomato disease datasets, along with ablation and statistical analyses, validate the effectiveness of each proposed component.</p></list-item>
</list>
</sec>
<sec id="s2">
<label>2</label>
<title>Related work</title>
<sec id="s2_1">
<label>2.1</label>
<title>Advanced architectures in image classification</title>
<p>Significant progress has been made specifically in tomato leaf disease classification through the application of diverse deep learning architectures. <xref ref-type="bibr" rid="B2">Annabel et&#xa0;al. (2019)</xref> proposed a novel image-based detection pipeline encompassing preprocessing, segmentation, feature extraction, and classification, achieving a 94.1% accuracy rate (<xref ref-type="bibr" rid="B2">Annabel et&#xa0;al., 2019</xref>). This foundational work underscores the importance of multi-stage processing in efficient disease classification.</p>
<p>Building on this, recent studies have adopted more sophisticated models. Chowdhury et&#xa0;al. (2021) utilized the EfficientNet architecture on a large-scale dataset of over 18,000 tomato leaf images, significantly outperforming traditional methods (<xref ref-type="bibr" rid="B7">Chowdhury et&#xa0;al., 2021b</xref>). Another study evaluated several cutting-edge CNN architectures, including ResNet18, MobileNet, DenseNet201, and InceptionV3, all of which surpassed existing literature in classification accuracy (<xref ref-type="bibr" rid="B6">Chowdhury et&#xa0;al., 2021a</xref>). These results highlight the superior efficacy of deep CNNs in capturing complex features of tomato leaf diseases. Furthermore, the XSE-TomatoNet model proposed by <xref ref-type="bibr" rid="B4">Assaduzzaman et&#xa0;al. (2025)</xref> integrates EfficientNetB0 with Squeeze-and-Excitation modules and multi-scale feature fusion, ensuring both high accuracy and model interpretability (<xref ref-type="bibr" rid="B4">Assaduzzaman et&#xa0;al., 2025</xref>). Additionally, <xref ref-type="bibr" rid="B8">Gookyi et&#xa0;al. (2024)</xref> demonstrated the deployment of deep learning models on edge devices via the Edge Impulse platform, enabling rapid on-site diagnosis in resource-constrained environments (?).</p>
<p>Further innovations involve hybrid and specialized network structures. <xref ref-type="bibr" rid="B29">Zhou et&#xa0;al. (2021)</xref> introduced a Recombined Residual Dense Network (RDN) that merges the advantages of residual and dense connections, reducing training parameters while improving information flow during the recognition process (<xref ref-type="bibr" rid="B29">Zhou et&#xa0;al., 2021</xref>). Complementarily, <xref ref-type="bibr" rid="B26">Zhang et&#xa0;al. (2024)</xref> developed the SE-SK-CapResNet model, fusing capsule networks with residual networks to more precisely capture spatial relationships and lesion morphology, achieving an impressive accuracy of 98.58% (<xref ref-type="bibr" rid="B26">Zhang et&#xa0;al., 2024</xref>). This fusion effectively addresses the limitations of traditional CNNs in identifying disease patterns with complex spatial features.</p>
<p>Despite these advancements, existing image classification solutions face two fundamental limitations. First, they are typically centralized and rely on extensive data sharing, which fails to comply with data privacy regulations and multi-institutional collaboration requirements in the real world. Second, model designs often prioritize peak accuracy on specific datasets while overlooking the unique constraints of Federated Learning (FL) frameworks, such as the impact of model size on communication overhead, generalization capability across extremely heterogeneous data, and deployability on hardware-heterogeneous devices. Directly applying advanced vision architectures, such as Swin Transformers or high-parameter CNNs, to FL settings may lead to communication bottlenecks or performance degradation due to their parameter intensity or sensitivity to data heterogeneity.</p>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Federated learning algorithms</title>
<p>Since the introduction of the Federated Averaging (FedAvg) algorithm by <xref ref-type="bibr" rid="B15">McMahan et&#xa0;al. (2017)</xref> (<xref ref-type="bibr" rid="B15">McMahan et&#xa0;al., 2017</xref>), Federated Learning (FL) has established itself as the cornerstone paradigm for overcoming the &#x201c;data silo&#x201d; problem. FedAvg facilitates distributed training by iteratively aggregating local model updates through weight averaging on a central server. However, its performance significantly deteriorates under Non-Independent and Identically Distributed (non-IID) data settings&#x2014;a challenge that is particularly acute in distributed scenarios. To mitigate data heterogeneity, several sophisticated algorithms have been proposed. FedProx (<xref ref-type="bibr" rid="B13">Li et&#xa0;al., 2020</xref>) introduces a proximal regularization term to restrict local updates from deviating excessively from the global model. Similarly, the SCAFFOLD algorithm (<xref ref-type="bibr" rid="B11">Karimireddy et&#xa0;al., 2020</xref>) employs control variables to estimate and correct the gradient drift between client-side updates and the global objective.</p>
<p>Building upon these concepts, multi-task collaborative frameworks have introduced joint optimization mechanisms between the cloud and clients across multiple endpoints. By exploiting intrinsic correlations between related tasks, these frameworks enhance individual model performance while simultaneously reducing computational and communication overhead. This creates a robust foundation for intelligent task learning in tomato cultivation management, where interrelated objectives&#x2014;such as disease classification, severity assessment, and yield prediction&#x2014;can be co-optimized within a distributed, privacy-preserving mechanism. For instance, Piccialli et&#xa0;al. proposed AGRIFOLD, a lightweight CNN-based FL framework designed to maintain data privacy across diverse distributed datasets (<xref ref-type="bibr" rid="B18">Piccialli et&#xa0;al., 2022</xref>). Zhao et&#xa0;al. introduced HEFL-LDP, which fuses semi-homomorphic encryption with local differential privacy (LDP) (<xref ref-type="bibr" rid="B27">Zhao et&#xa0;al., 2023</xref>). Wang et&#xa0;al. presented FedUAA, an uncertainty-aware aggregation paradigm that accounts for client reliability and generates confidence estimates for decision tree hierarchies (<xref ref-type="bibr" rid="B21">Wang et&#xa0;al., 2023</xref>). Similarly, Rieyan et&#xa0;al. proposed a secure medical image analysis scheme based on distributed data fabric and partial homomorphic encryption (<xref ref-type="bibr" rid="B20">Rieyan et&#xa0;al., 2024</xref>), while others have developed multi-key or multi-user encrypted machine learning systems to support collaborative environments without raw data exchange.</p>
<p>Despite these advancements, existing deep learning and FL algorithms predominantly focus on point-estimate predictive performance, often overlooking the critical aspect of predictive confidence. There is an urgent need for a new FL paradigm that maintains high classification accuracy while securely generating reliable, confidence-aware results for stakeholders. Such a paradigm would not only mitigate data privacy risks but also bolster user trust in AI systems deployed in real-world field environments.</p>
<p>In summary, current research exhibits a notable disconnect: the FL community focuses on optimizing distributed protocols using generic vision models; the computer vision community continues to innovate powerful recognition architectures under the assumption of centralized data access; and smart agriculture research often operates under idealized or centralized settings. This study aims to bridge this gap by designing a deeply collaborative FL framework. This framework integrates a lightweight Large-Small network backbone (LSNet) (<xref ref-type="bibr" rid="B22">Wang et&#xa0;al., 2025b</xref>) optimized for image characteristics with an adaptive aggregation and training mechanism (FL-LSNet) designed to address data and system heterogeneity in federated environments.</p>
</sec>
</sec>
<sec id="s3">
<label>3</label>
<title>Methodology</title>
<sec id="s3_1">
<label>3.1</label>
<title>System architecture</title>
<p>The proposed federated learning framework follows a hierarchical distributed architecture composed of a Client Layer and a Server Layer, as illustrated in <xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2</bold></xref>. This design aims to support heterogeneous image data collected from geographically distributed farms while ensuring efficient feature learning, privacy preservation, and stable global optimization.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Hierarchical system architecture of the proposed LSNet-based federated learning framework, illustrating local feature encoding at heterogeneous clients and server-side aggregation based on foreground statistics, loss feedback, model parameters, and adaptive weights.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1783587-g002.tif">
<alt-text content-type="machine-generated">Diagram illustrating federated learning for farms, where each farm&#x2019;s client device processes data with an encoder and transmits results to a central cloud server. The server aggregates foreground statistics, loss, and model parameter sets using normalization and weighting, leading to a combined server model labeled LSNet.</alt-text>
</graphic></fig>
<p>At the Client Layer, <italic>n</italic> data owners (Farm 1 to Farm <italic>n</italic>) are each associated with a local computing unit (Client 1 to Client <italic>n</italic>). Each client is equipped with a Local Encoder, which serves as the front-end feature extraction component of the proposed system. The Local Encoder is a lightweight convolutional sub-network derived from the encoder portion of LSNet (<xref ref-type="bibr" rid="B22">Wang et&#xa0;al., 2025b</xref>) and is specifically tailored for deployment on resource-constrained edge devices. It processes raw agricultural images locally to extract compact and discriminative feature representations, thereby reducing communication overhead while suppressing noise introduced by device-specific sensing conditions and environmental variations. Throughout the training process, raw image data remain on local devices, and only encoded features and optimization-related signals are transmitted to the server, ensuring data privacy.</p>
<p>The Server Layer hosts a central server that maintains the global LSNet (<xref ref-type="bibr" rid="B22">Wang et&#xa0;al., 2025b</xref>) model and orchestrates the federated optimization process. Upon receiving updates from participating clients, the server first performs normalization to mitigate scale inconsistencies among uploaded features and training signals. The normalized information is then organized into several logically distinct but interrelated sets to facilitate structured aggregation. Specifically, the Foreground Statistics Set captures aggregated statistical descriptors of salient disease-related features extracted by Local Encoders, providing global cues for emphasizing informative regions and suppressing background interference. In parallel, the Loss Set records local training losses reported by clients, reflecting both data quality and local convergence states. Model parameters and intermediate representations received from clients are stored in the Parameter Set, which forms the basis for updating the global LSNet (<xref ref-type="bibr" rid="B22">Wang et&#xa0;al., 2025b</xref>). Based on the loss feedback and foreground feature statistics, the server further constructs a Weight Set that assigns adaptive aggregation weights to different clients, allowing their contributions to be dynamically adjusted during global optimization.</p>
<p>These sets are jointly processed by the server-side aggregation module to update the global LSNet (<xref ref-type="bibr" rid="B22">Wang et&#xa0;al., 2025b</xref>) parameters. To address the long-tail data distributions and uneven data quality that are prevalent in agricultural scenarios, a Dynamic Aggregation Mechanism is employed. By leveraging real-time feedback from the Loss Set and the adaptive weights derived in the Weight Set, this mechanism mitigates client drift and improves the stability and convergence speed of the global model. After aggregation, the updated LSNet (<xref ref-type="bibr" rid="B22">Wang et&#xa0;al., 2025b</xref>) parameters are broadcast back to all clients, where they are used to update the Local Encoders and initiate the next round of federated training.</p>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Mathematical modeling</title>
<p>The collaborative training process under the client&#x2013;server federated learning paradigm can be formulated as a distributed optimization problem. The objective is to learn a global model parameter <inline-formula>
<mml:math display="inline" id="im1"><mml:mrow><mml:mi>w</mml:mi><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mi>&#x211d;</mml:mi><mml:mi>d</mml:mi></mml:msup></mml:mrow></mml:math></inline-formula> that minimizes the weighted empirical risk over all participating clients, as described in <xref ref-type="disp-formula" rid="eq1">Equation 1</xref>.</p>
<disp-formula id="eq1"><label>(1)</label>
<mml:math display="block" id="M1"><mml:mrow><mml:munder><mml:mrow><mml:mi>min</mml:mi></mml:mrow><mml:mrow><mml:mi>w</mml:mi><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mi>&#x211d;</mml:mi><mml:mi>d</mml:mi></mml:msup></mml:mrow></mml:munder><mml:mi>&#x2112;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>w</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>K</mml:mi></mml:munderover><mml:mrow><mml:msub><mml:mi>p</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:msub><mml:mi>&#x2112;</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>w</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im2"><mml:mi>K</mml:mi></mml:math></inline-formula> denotes the total number of clients, <inline-formula>
<mml:math display="inline" id="im3"><mml:mrow><mml:msub><mml:mi>p</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>n</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo stretchy="false">/</mml:mo><mml:mi>n</mml:mi></mml:mrow></mml:math></inline-formula> is the aggregation weight proportional to the local dataset size <inline-formula>
<mml:math display="inline" id="im4"><mml:mrow><mml:msub><mml:mi>n</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mo>|</mml:mo><mml:mrow><mml:msub><mml:mi mathvariant="script">D</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow><mml:mo>|</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>, and <inline-formula>
<mml:math display="inline" id="im5"><mml:mrow><mml:mi>n</mml:mi><mml:mo>=</mml:mo><mml:munderover><mml:mstyle displaystyle="true"><mml:mo>&#x2211;</mml:mo></mml:mstyle><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>K</mml:mi></mml:munderover><mml:msub><mml:mi>n</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> is the total number of samples across all clients. The local loss function of client <inline-formula>
<mml:math display="inline" id="im6"><mml:mi>k</mml:mi></mml:math></inline-formula> is defined as <xref ref-type="disp-formula" rid="eq2">Equation 2</xref>.</p>
<disp-formula id="eq2"><label>(2)</label>
<mml:math display="block" id="M2"><mml:mrow><mml:msub><mml:mi>&#x2112;</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>w</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:msub><mml:mi>n</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:mfrac><mml:munder><mml:mstyle displaystyle="true"><mml:mo>&#x2211;</mml:mo></mml:mstyle><mml:mrow><mml:mi>i</mml:mi><mml:mo>&#x2208;</mml:mo><mml:msub><mml:mi mathvariant="script">D</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:munder><mml:mi>&#x2113;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>f</mml:mi><mml:mi>w</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>where <italic>&#x2113;</italic>(&#xb7;) denotes the sample-wise loss function (e.g., cross-entropy or mean squared error), and <italic>f<sub>w</sub></italic>(&#xb7;) represents the model parameterized by <italic>w</italic>.</p>
<p><xref ref-type="disp-formula" rid="eq1">Equation 1</xref> establishes a unified global optimization objective by aggregating client-specific empirical risks, thereby enabling collaborative model training without requiring raw data sharing. The weighting scheme <italic>p<sub>k</sub></italic> ensures that clients with larger datasets exert a proportionally greater influence on the global objective, which is critical for maintaining statistical consistency and improving the generalization capability of the global model.</p>
<sec id="s3_2_1">
<label>3.2.1</label>
<title>Federated optimization via FedAvg</title>
<p>In this study, Federated Averaging (FedAvg) is adopted as the baseline optimization algorithm. Each communication round <italic>t</italic> consists of three sequential stages:</p>
<sec id="s3_2_1_1">
<label>3.2.1.1</label>
<title>Broadcasting</title>
<p>The server distributes the current global model parameters <italic>w<sub>t</sub></italic> to a subset of participating clients.</p>
</sec>
<sec id="s3_2_1_2">
<label>3.2.1.2</label>
<title>Local training</title>
<p>Upon receiving <italic>w<sub>t</sub></italic>, each client <italic>k</italic> performs <italic>E</italic> epochs of stochastic gradient descent (SGD) on its local dataset <inline-formula>
<mml:math display="inline" id="im7"><mml:mi mathvariant="script">D</mml:mi></mml:math></inline-formula><italic><sub>k</sub></italic>. <italic>A</italic> single SGD update step can be expressed as <xref ref-type="disp-formula" rid="eq3">Equation 3</xref>.</p>
<disp-formula id="eq3"><label>(3)</label>
<mml:math display="block" id="M3"><mml:mrow><mml:msubsup><mml:mi>w</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>k</mml:mi></mml:msubsup><mml:mo>=</mml:mo><mml:msub><mml:mi>w</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo>&#x2212;</mml:mo><mml:mi>&#x3b7;</mml:mi><mml:mo mathvariant="normal">&#x2207;</mml:mo><mml:msub><mml:mi>&#x2112;</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>w</mml:mi><mml:mi>t</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>where <italic>&#x3b7; &gt;</italic> 0 denotes the learning rate. Performing <italic>E</italic> local epochs corresponds to repeatedly applying <xref ref-type="disp-formula" rid="eq3">Equation 3</xref>, allowing the local model to better adapt to client-specific data distributions while reducing the frequency of communication with the server.</p>
<p>Global Aggregation: After local training, the server aggregates the updated client models to form the next global model as <xref ref-type="disp-formula" rid="eq4">Equation 4</xref>.</p>
<disp-formula id="eq4"><label>(4)</label>
<mml:math display="block" id="M4"><mml:mrow><mml:msub><mml:mi>w</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>K</mml:mi></mml:munderover><mml:mrow><mml:msub><mml:mi>p</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:msubsup><mml:mi>w</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>k</mml:mi></mml:msubsup><mml:mo>.</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:math>
</disp-formula>
<p>In <xref ref-type="disp-formula" rid="eq4">Equation 4</xref>, <inline-formula>
<mml:math display="inline" id="im8"><mml:mrow><mml:msubsup><mml:mi>w</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>k</mml:mi></mml:msubsup></mml:mrow></mml:math></inline-formula> represents the updated local model parameter of the <italic>k</italic>-th client following the completion of local training in the <italic>t</italic>-th communication round.</p>
</sec>
</sec>
<sec id="s3_2_2">
<label>3.2.2</label>
<title>Consistency between global objective and local updates</title>
<p><xref ref-type="disp-formula" rid="eq1">Equations 1</xref>, <xref ref-type="disp-formula" rid="eq2">2</xref>, <xref ref-type="disp-formula" rid="eq3">3</xref>, <xref ref-type="disp-formula" rid="eq4">4</xref> jointly define a coherent optimization framework that links the global learning objective with decentralized local updates. Although each client minimizes its own local loss <inline-formula>
<mml:math display="inline" id="im9"><mml:mrow><mml:msub><mml:mi>&#x2112;</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>w</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>, the weighted aggregation mechanism ensures that these local optimization steps collectively approximate descent along the global objective <inline-formula>
<mml:math display="inline" id="im10"><mml:mrow><mml:mi>&#x2112;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>w</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>. Consequently, local training is not an isolated process but an integral component of the global optimization procedure.</p>
<p>Moreover, the use of multiple local epochs (<italic>E &gt;</italic> 1) strikes a balance between optimization efficiency and communication cost. By allowing clients to perform more extensive local updates before aggregation, FedAvg reduces communication overhead while still maintaining convergence toward the global optimum, albeit at the potential cost of increased sensitivity to data heterogeneity.</p>
</sec>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>LSNet: large-small network architecture</title>
<p>LSNet (<xref ref-type="bibr" rid="B22">Wang et&#xa0;al., 2025b</xref>) is a novel lightweight vision backbone inspired by the biological mechanisms of the human visual system. Its core innovation lies in the &#x201c;See Large, Focus Small&#x201d; strategy, which facilitates efficient feature perception and aggregation at a minimal computational cost.</p>
<p>The LSNet (<xref ref-type="bibr" rid="B22">Wang et&#xa0;al., 2025b</xref>) architecture specifically optimizes image classification through the following components:</p>
<sec id="s3_3_1">
<label>3.3.1</label>
<title>LS convolution module</title>
<p>This module acts as the fundamental unit and consists of two synergistic sub-modules. LKP (Large-Kernel Perception) utilizes large-kernel depthwise separable convolutions to capture extensive receptive fields and global context, thereby generating dynamic weights. SKA (Small-Kernel Aggregation) then leverages these weights to guide grouped small-kernel dynamic convolutions, performing adaptive feature fusion on highly correlated local neighborhoods to model fine-grained details.</p>
</sec>
<sec id="s3_3_2">
<label>3.3.2</label>
<title>Structural hierarchy</title>
<p>The input images are first transformed into feature maps by a Stem layer using overlapping convolutions. The Encoder then extracts multi-scale features through four sequential Stages, each composed of multiple stacked LS Blocks. Resolution reduction and channel expansion between stages are handled by downsampling modules.</p>
<p>By adopting the philosophy of perceiving with a wide field of view and aggregating in localized regions, LSNet (<xref ref-type="bibr" rid="B22">Wang et&#xa0;al., 2025b</xref>) achieves high-fidelity detail preservation without redundant computation. The detailed architecture of LSNet (<xref ref-type="bibr" rid="B22">Wang et&#xa0;al., 2025b</xref>) is depicted in <xref ref-type="fig" rid="f3"><bold>Figure&#xa0;3</bold></xref>.</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Detailed architecture of the LSNet (Large-Small Network) (<xref ref-type="bibr" rid="B22">Wang et&#xa0;al., 2025b</xref>), illustrating the integration of LKP and SKA modules.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1783587-g003.tif">
<alt-text content-type="machine-generated">Diagram illustrating a deep learning model pipeline for leaf disease classification, featuring sequential image processing stages with LS and MSA blocks, details of LS and MSA block architectures, LS convolution with perception and aggregation steps, sample diseased leaf images, and a legend explaining abbreviations.</alt-text>
</graphic></fig>
</sec>
</sec>
<sec id="s3_4">
<label>3.4</label>
<title>LSNet structural design</title>
<p>LSNet (<xref ref-type="bibr" rid="B22">Wang et&#xa0;al., 2025b</xref>) is designed as a hierarchical architecture comprising four distinct stages, as detailed in <xref ref-type="table" rid="T1"><bold>Table&#xa0;1</bold></xref>. Each stage consists of alternating Large-Kernel Perception (LKP) and Small-Kernel Aggregation (SKA) modules, enabling the model to transition from coarse-grained context extraction to fine-grained feature refinement.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Detailed architectural configurations of LSNet (<xref ref-type="bibr" rid="B22">Wang et&#xa0;al., 2025b</xref>).</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Stage</th>
<th valign="middle" align="center">Input resolution</th>
<th valign="middle" align="center">Output channels</th>
<th valign="middle" align="center">LKP configuration</th>
<th valign="middle" align="center">SKA windows</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">224&#xd7;224&#xd7;3</td>
<td valign="middle" align="center">96</td>
<td valign="middle" align="center">7&#xd7;7 + 3&#xd7;3, <italic>G</italic> = 4</td>
<td valign="middle" align="center">4W</td>
</tr>
<tr>
<td valign="middle" align="center">2</td>
<td valign="middle" align="center">56&#xd7;56&#xd7;96</td>
<td valign="middle" align="center">192</td>
<td valign="middle" align="center">7&#xd7;7 + 3&#xd7;3, <italic>G</italic> = 8</td>
<td valign="middle" align="center">12W</td>
</tr>
<tr>
<td valign="middle" align="center">3</td>
<td valign="middle" align="center">28&#xd7;28&#xd7;192</td>
<td valign="middle" align="center">384</td>
<td valign="middle" align="center">7&#xd7;7 + 3&#xd7;3, <italic>G</italic> = 16</td>
<td valign="middle" align="center">28W</td>
</tr>
<tr>
<td valign="middle" align="center">4</td>
<td valign="middle" align="center">14&#xd7;14&#xd7;384</td>
<td valign="middle" align="center">768</td>
<td valign="middle" align="center">7&#xd7;7 + 3&#xd7;3, <italic>G</italic> = 32</td>
<td valign="middle" align="center">56W</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The LSNet (<xref ref-type="bibr" rid="B22">Wang et&#xa0;al., 2025b</xref>) model involves approximately 100.2M parameters and 11.37G FLOPs for a standard 224 &#xd7; 224 input, achieving an optimal trade-off between representational capacity and inference latency.</p>
<sec id="s3_4_1">
<label>3.4.1</label>
<title>Large-Kernel Perception</title>
<p>The LKP module adopts a large-kernel bottleneck design to expand the effective receptive field (ERF). Given a feature map <inline-formula>
<mml:math display="inline" id="im11"><mml:mrow><mml:mi>X</mml:mi><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mi>&#x211d;</mml:mi><mml:mrow><mml:mi>H</mml:mi><mml:mo>&#xd7;</mml:mo><mml:mi>W</mml:mi><mml:mo>&#xd7;</mml:mo><mml:mi>C</mml:mi></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>, we first employ a Pointwise Convolution (PW) to compress the channel dimension to <italic>C/</italic>2, significantly reducing the computational overhead. For each spatial position <inline-formula>
<mml:math display="inline" id="im12"><mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, a Depthwise Convolution (DW) with a kernel size of <inline-formula>
<mml:math display="inline" id="im13"><mml:mrow><mml:msub><mml:mi>K</mml:mi><mml:mi>L</mml:mi></mml:msub><mml:mo>&#xd7;</mml:mo><mml:msub><mml:mi>K</mml:mi><mml:mi>L</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> is utilized to capture the spatial context within the neighborhood <inline-formula>
<mml:math display="inline" id="im14"><mml:mrow><mml:msub><mml:mi mathvariant="script">N</mml:mi><mml:mrow><mml:msub><mml:mi>K</mml:mi><mml:mi>L</mml:mi></mml:msub></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>. This&#xa0;mechanism effectively enhances the model&#x2019;s global awareness with minimal cost. Subsequently, a secondary PW convolution models the inter-token spatial dependencies to generate the context-aware weights <inline-formula>
<mml:math display="inline" id="im15"><mml:mrow><mml:mi>W</mml:mi><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mi>&#x211d;</mml:mi><mml:mrow><mml:mi>H</mml:mi><mml:mo>&#xd7;</mml:mo><mml:mi>W</mml:mi><mml:mo>&#xd7;</mml:mo><mml:mi>D</mml:mi></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>. The operation is formalized as <xref ref-type="disp-formula" rid="eq5">Equation 5</xref>.</p>
<disp-formula id="eq5"><label>(5)</label>
<mml:math display="block" id="M5"><mml:mrow><mml:msub><mml:mi>w</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>P</mml:mi><mml:mi>&#x3b8;</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi mathvariant="script">N</mml:mi><mml:mrow><mml:msub><mml:mi>K</mml:mi><mml:mi>L</mml:mi></mml:msub></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mtext>PW</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mtext>DW</mml:mtext></mml:mrow><mml:mrow><mml:msub><mml:mi>K</mml:mi><mml:mi>L</mml:mi></mml:msub><mml:mo>&#xd7;</mml:mo><mml:msub><mml:mi>K</mml:mi><mml:mi>L</mml:mi></mml:msub></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext>PW</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi mathvariant="script">N</mml:mi><mml:mrow><mml:msub><mml:mi>K</mml:mi><mml:mi>L</mml:mi></mml:msub></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im16"><mml:mrow><mml:msub><mml:mi>w</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mi>&#x211d;</mml:mi><mml:mi>D</mml:mi></mml:msup></mml:mrow></mml:math></inline-formula> denotes the localized perception weight for <inline-formula>
<mml:math display="inline" id="im17"><mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>.</p>
</sec>
<sec id="s3_4_2">
<label>3.4.2</label>
<title>Small-Kernel Aggregation</title>
<p>The SKA module implements grouped dynamic convolutions to aggregate local information. To optimize memory consumption, we partition the channels of <inline-formula>
<mml:math display="inline" id="im18"><mml:mi>X</mml:mi></mml:math></inline-formula> into <inline-formula>
<mml:math display="inline" id="im19"><mml:mi>G</mml:mi></mml:math></inline-formula> groups, where channels within the same group share the same aggregation weights. The perception weights <inline-formula>
<mml:math display="inline" id="im20"><mml:mrow><mml:msub><mml:mi>w</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> from the LKP module are reshaped into <inline-formula>
<mml:math display="inline" id="im21"><mml:mrow><mml:msubsup><mml:mi>w</mml:mi><mml:mi>i</mml:mi><mml:mo>*</mml:mo></mml:msubsup><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mi>&#x211d;</mml:mi><mml:mrow><mml:msub><mml:mi>K</mml:mi><mml:mi>S</mml:mi></mml:msub><mml:mo>&#xd7;</mml:mo><mml:msub><mml:mi>K</mml:mi><mml:mi>S</mml:mi></mml:msub></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>, serving as a dynamic kernel for the <inline-formula>
<mml:math display="inline" id="im22"><mml:mrow><mml:msub><mml:mi>K</mml:mi><mml:mi>S</mml:mi></mml:msub><mml:mo>&#xd7;</mml:mo><mml:msub><mml:mi>K</mml:mi><mml:mi>S</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> neighborhood.</p>
<p>Specifically, for the <italic>c</italic>-th channel in the <inline-formula>
<mml:math display="inline" id="im23"><mml:mi>g</mml:mi></mml:math></inline-formula>-th group, the aggregated feature <inline-formula>
<mml:math display="inline" id="im24"><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>g</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> is obtained through a convolution between the local neighborhood <inline-formula>
<mml:math display="inline" id="im25"><mml:mrow><mml:msub><mml:mi mathvariant="script">N</mml:mi><mml:mrow><mml:msub><mml:mi>K</mml:mi><mml:mi>S</mml:mi></mml:msub></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>g</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> and the adaptive weight <inline-formula>
<mml:math display="inline" id="im26"><mml:mrow><mml:msubsup><mml:mi>w</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>g</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>*</mml:mo></mml:msubsup></mml:mrow></mml:math></inline-formula>. The operation is formalized as <xref ref-type="disp-formula" rid="eq6">Equation 6</xref>.</p>
<disp-formula id="eq6"><label>(6)</label>
<mml:math display="block" id="M6"><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>g</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msubsup><mml:mi>w</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>g</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>*</mml:mo></mml:msubsup><mml:mo>&#x229b;</mml:mo><mml:msub><mml:mi mathvariant="script">N</mml:mi><mml:mrow><mml:msub><mml:mi>K</mml:mi><mml:mi>S</mml:mi></mml:msub></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>g</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math>
</disp-formula>
<p>This adaptive aggregation allows the model to dynamically adjust its response to complex structural variations in the input data.</p>
</sec>
</sec>
<sec id="s3_5">
<label>3.5</label>
<title>Federated learning with LSNet</title>
<p>To address client heterogeneity in distributed agricultural networks, we propose a Softmax-weighted federated aggregation strategy within FL-LSNet. Unlike conventional FedAvg, which assigns aggregation weights solely based on local dataset size, the proposed method dynamically calibrates client contributions according to real-time learning quality indicators.</p>
<sec id="s3_5_1">
<label>3.5.1</label>
<title>Dynamic client weighting mechanism</title>
<p>Conventional federated learning approaches implicitly assume that clients with larger local datasets provide more reliable model updates. However, this assumption is frequently violated in agricultural scenarios, where large datasets may be dominated by common disease classes or exhibit severe class imbalance, while smaller datasets may contain rare yet highly informative samples. To address this limitation, FL-LSNet adopts a multi-dimensional client evaluation strategy that assigns aggregation weights based on learning quality rather than data volume alone.</p>
<p>Specifically, each participating client <italic>k</italic> is evaluated from three complementary perspectives that jointly characterize its contribution to the global optimization process: convergence efficiency, training stability, and intrinsic data reliability.</p>
<p>Convergence efficiency is quantified by the <italic>convergence velocity</italic>, which measures how effectively a client reduces its local training loss over communication rounds. It is defined as <xref ref-type="disp-formula" rid="eq7">Equation 7</xref>.</p>
<disp-formula id="eq7"><label>(7)</label>
<mml:math display="block" id="M7"><mml:mrow><mml:msub><mml:mi>c</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mo>|</mml:mo><mml:mo>|</mml:mo><mml:msub><mml:mi>&#x2112;</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>w</mml:mi><mml:mi>T</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>&#x2112;</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>w</mml:mi><mml:mn>0</mml:mn></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>|</mml:mo><mml:mo>|</mml:mo></mml:mrow><mml:mi>T</mml:mi></mml:mfrac><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im27"><mml:mrow><mml:msub><mml:mi>&#x2112;</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>w</mml:mi><mml:mi>t</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> denotes the local loss of client <italic>k</italic> at round <italic>t</italic>, and <italic>T</italic> is the total number of communication rounds. A higher value of <inline-formula>
<mml:math display="inline" id="im28"><mml:mrow><mml:msub><mml:mi>c</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> indicates faster loss reduction and better alignment between the client&#x2019;s data distribution and the global learning objective, whereas slow convergence typically reflects strong non-IID effects or noisy local data.</p>
<p>Training stability captures the consistency of the local optimization process and is measured by the variance of the local loss trajectory. The training stability defined  as <xref ref-type="disp-formula" rid="eq8">Equation 8</xref>.</p>
<disp-formula id="eq8"><label>(8)</label>
<mml:math display="block" id="M8"><mml:mrow><mml:msub><mml:mi>s</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mtext>Var</mml:mtext><mml:msubsup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>&#x2112;</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>w</mml:mi><mml:mi>t</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msubsup><mml:mo>.</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>Low variance corresponds to stable gradient updates and internally coherent data distributions, while high variance may arise from class imbalance, conflicting gradients, or unreliable annotations. To favor stable contributors during aggregation, the inverse term <inline-formula>
<mml:math display="inline" id="im29"><mml:mrow><mml:mn>1</mml:mn><mml:mo stretchy="false">/</mml:mo><mml:msub><mml:mi>s</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> is used in subsequent weighting.</p>
<p>In addition to optimization behavior, the intrinsic quality of local data is assessed through <italic>data reliability</italic>, which is estimated using prediction confidence derived from evidential deep learning. The operation is formalized as <xref ref-type="disp-formula" rid="eq9">Equation 9</xref>.</p>
<disp-formula id="eq9"><label>(9)</label>
<mml:math display="block" id="M9"><mml:mrow><mml:msub><mml:mi>q</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:msub><mml:mi>n</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:mfrac><mml:mstyle displaystyle="true"><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>&#x2208;</mml:mo><mml:msub><mml:mi mathvariant="script">D</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:munder></mml:mstyle><mml:mtext>Conf</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im30"><mml:mrow><mml:mtext>Conf</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x2208;</mml:mo><mml:mrow><mml:mo stretchy="false">[</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mo>&#xa0;</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">]</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> denotes the normalized confidence score associated with sample <italic>x<sub>i</sub></italic>. This metric reflects dataset-level reliability under varying field acquisition conditions and sensor noise.</p>
<p>It is important to note that the above three quantities <inline-formula>
<mml:math display="inline" id="im31"><mml:mrow><mml:msub><mml:mi>c</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula>
<mml:math display="inline" id="im32"><mml:mrow><mml:msub><mml:mi>s</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, and <inline-formula>
<mml:math display="inline" id="im33"><mml:mrow><mml:msub><mml:mi>q</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> constitute <italic>client evaluation metrics</italic>, rather than aggregation weights themselves. These metrics are subsequently integrated to derive the final client contribution during global aggregation.</p>
<p>To this end, the evaluated metrics are first combined into a composite client score as <xref ref-type="disp-formula" rid="eq10">Equation 10</xref>.</p>
<disp-formula id="eq10"><label>(10)</label>
<mml:math display="block" id="M10"><mml:mrow><mml:mtext>raw_</mml:mtext><mml:msub><mml:mrow><mml:mtext>score</mml:mtext></mml:mrow><mml:mi>k</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>&#x3b3;</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:msub><mml:mi>c</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x3b3;</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:msub><mml:mi>s</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:mfrac><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x3b3;</mml:mi><mml:mn>3</mml:mn></mml:msub><mml:msub><mml:mi>q</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>where <italic>&#x3b3;</italic><sub>1</sub><italic>, &#x3b3;</italic><sub>2</sub><italic>, &#x3b3;</italic><sub>3</sub> <italic>&gt;</italic> 0 are hyperparameters determined through validation. Since the metrics differ in numerical scale, Z-score normalization is applied independently to ensure comparability. The operation is formalized as <xref ref-type="disp-formula" rid="eq11">Equation 11</xref>.</p>
<disp-formula id="eq11"><label>(11)</label>
<mml:math display="block" id="M11"><mml:mrow><mml:msub><mml:mi>m</mml:mi><mml:mrow><mml:mi>k</mml:mi><mml:mo>,</mml:mo><mml:mtext>norm</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:msub><mml:mi>m</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>&#x3bc;</mml:mi><mml:mi>m</mml:mi></mml:msub></mml:mrow><mml:mrow><mml:msub><mml:mi>&#x3c3;</mml:mi><mml:mi>m</mml:mi></mml:msub></mml:mrow></mml:mfrac><mml:mo>,</mml:mo><mml:mtext>&#x2003;</mml:mtext><mml:msub><mml:mi>m</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>&#x2208;</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>c</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">/</mml:mo><mml:msub><mml:mi>s</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>q</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow><mml:mo>.</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>Based on the normalized composite scores, the final client aggregation weights are assigned using a Softmax function as <xref ref-type="disp-formula" rid="eq12">Equation 12</xref>.</p>
<disp-formula id="eq12"><label>(12)</label>
<mml:math display="block" id="M12"><mml:mrow><mml:msub><mml:mi>&#x3b1;</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mtext>exp</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mtext>score</mml:mtext></mml:mrow><mml:mrow><mml:mi>k</mml:mi><mml:mo>,</mml:mo><mml:mtext>norm</mml:mtext></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:msubsup><mml:mstyle displaystyle="true"><mml:mo>&#x2211;</mml:mo></mml:mstyle><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>K</mml:mi></mml:msubsup><mml:mtext>exp</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mtext>score</mml:mtext></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mtext>norm</mml:mtext></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:mfrac><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>which ensures that <inline-formula>
<mml:math display="inline" id="im34"><mml:mrow><mml:munderover><mml:mstyle displaystyle="true"><mml:mo>&#x2211;</mml:mo></mml:mstyle><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>K</mml:mi></mml:munderover><mml:msub><mml:mi>&#x3b1;</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:math></inline-formula>. This formulation emphasizes high-quality clients while preserving non-zero contributions from all participants, thereby maintaining robustness against client dropout and extreme heterogeneity.</p>
<p>The global model is then updated according to the weighted aggregation rule. The operation is formalized as <xref ref-type="disp-formula" rid="eq13">Equation 13</xref>.</p>
<disp-formula id="eq13"><label>(13)</label>
<mml:math display="block" id="M13"><mml:mrow><mml:msub><mml:mi>w</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>K</mml:mi></mml:munderover><mml:mrow><mml:msub><mml:mi>&#x3b1;</mml:mi><mml:mi>k</mml:mi></mml:msub><mml:msubsup><mml:mi>w</mml:mi><mml:mi>t</mml:mi><mml:mi>k</mml:mi></mml:msubsup><mml:mo>.</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:math>
</disp-formula>
</sec>
<sec id="s3_5_2">
<label>3.5.2</label>
<title>Algorithmic procedure and convergence analysis</title>
<p><xref ref-type="statement" rid="st1"><bold>Algorithm 1</bold></xref> summarizes the FL-LSNet aggregation procedure.</p>
<statement content-type="algorithm" id="st1">
<label>Algorithm 1</label>
<p><graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1783587-g007.tif"/></p>
</statement>
<p>Convergence Analysis: Under standard assumptions of <italic>L</italic>-smoothness and bounded variance of stochastic gradients, FL-LSNet admits a linear convergence guarantee for strongly convex objectives. Specifically, the expected optimality gap satisfies.</p>
<disp-formula id="eq14"><label>(14)</label>
<mml:math display="block" id="M14"><mml:mrow><mml:mi mathvariant="double-struck">E</mml:mi><mml:mrow><mml:mo stretchy="false">[</mml:mo><mml:mrow><mml:mi>&#x2112;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>w</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">]</mml:mo></mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mi>&#x2112;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mi>w</mml:mi><mml:mo>*</mml:mo></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x2264;</mml:mo><mml:msup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mn>1</mml:mn><mml:mo>&#x2212;</mml:mo><mml:mi>&#x3b7;</mml:mi><mml:mi>&#x3bc;</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>&#x2112;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>w</mml:mi><mml:mn>0</mml:mn></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mi>&#x2112;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msup><mml:mi>w</mml:mi><mml:mo>*</mml:mo></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>where <italic>&#xb5; &gt;</italic> 0 denotes the strong convexity constant of the loss function, and <italic>&#x3b7;</italic> is the learning rate. <italic>w</italic><sup>&#x2217;</sup> is the unique global minimizer.</p>
<p>The linear convergence result in <xref ref-type="disp-formula" rid="eq14">Equation 14</xref> is derived based on the integration of the lightweight feature extraction module of LSNet (<xref ref-type="bibr" rid="B22">Wang et&#xa0;al., 2025b</xref>) and the federated communication mechanism of FL-LSNet, under the premise of <italic>L</italic>-smoothness (i.e., the gradient of the loss function <inline-formula>
<mml:math display="inline" id="im45"><mml:mrow><mml:mi>&#x2112;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>w</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> satisfies the Lipschitz condition with constant <italic>L</italic>) and bounded variance of local gradient estimates across distributed devices. Specifically, the strong convexity constant <italic>&#xb5;</italic> characterizes the &#x201c;curvature&#x201d; of the loss function, ensuring that the optimal solution <italic>w</italic><sup>&#x2217;</sup> is unique and that the loss function has a lower bound on the growth rate of its&#xa0;gradient. The learning rate <italic>&#x3b7;</italic> is a hyperparameter that balances the convergence speed and stability, and its value is typically chosen&#xa0;to satisfy 0&lt; <italic>&#x3b7;&lt;</italic> 2<italic>/L</italic> to avoid divergence during the iterative process.</p>
<p>In contrast, classical federated optimization methods such as FedAvg generally exhibit sublinear convergence behavior. Under IID data and convex (or strongly convex) objectives, FedAvg achieves an <inline-formula>
<mml:math display="inline" id="im46"><mml:mrow><mml:mi>O</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo stretchy="false">/</mml:mo><mml:msqrt><mml:mi>T</mml:mi></mml:msqrt></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> convergence rate depending on the specific assumptions and stochasticity level. However, when training on non-IID agricultural data, the convergence rate deteriorates significantly and can be characterized by <inline-formula>
<mml:math display="inline" id="im47"><mml:mrow><mml:mi>O</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:msqrt><mml:mi>K</mml:mi></mml:msqrt></mml:mrow><mml:mi>T</mml:mi></mml:mfrac><mml:mo>+</mml:mo><mml:mi>&#x3b6;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>, where <inline-formula>
<mml:math display="inline" id="im48"><mml:mi>K</mml:mi></mml:math></inline-formula> is the number of local update steps and <inline-formula>
<mml:math display="inline" id="im49"><mml:mi>&#x3b6;</mml:mi></mml:math></inline-formula> quantifies the statistical heterogeneity across clients. This heterogeneity-induced error floor often prevents FedAvg from converging efficiently to the global optimum.</p>
<p>As indicated by <xref ref-type="disp-formula" rid="eq14">Equation 14</xref>, the expected optimality gap of FL-LSNet decays exponentially with respect to the communication round index <italic>t</italic>, thereby rigorously confirming its linear convergence property. The convergence speed is governed by the factor (1 &#x2212; <italic>&#x3b7;&#xb5;</italic>): a larger product <italic>&#x3b7;&#xb5;</italic>, within the admissible range of <italic>&#x3b7;</italic>, leads to a faster decay rate and hence more rapid convergence to the optimal solution.</p>
<p>This strong convergence guarantee is particularly important for practical federated learning deployments in agricultural scenarios. Linear convergence implies that FL-LSNet can attain stable and high-quality model performance within a limited number of communication rounds, effectively reducing communication overhead and computational burden on resource-constrained edge devices, while maintaining robustness against data heterogeneity.</p>
</sec>
</sec>
</sec>
<sec id="s4">
<label>4</label>
<title>Experiment</title>
<sec id="s4_1">
<label>4.1</label>
<title>Datasets</title>
<p>To evaluate the effectiveness of the proposed FLLSNet framework, experiments were conducted on three publicly available tomato disease image datasets with different data scales and acquisition conditions. All experiments address a single image classification task, namely tomato leaf disease recognition, where each image is classified into one disease category or the healthy class. The use of multiple datasets is solely intended to simulate heterogeneous data distributions across federated clients, rather than to formulate a multi-center or multi-task classification problem.</p>
<p>As summarized in <xref ref-type="table" rid="T2"><bold>Table&#xa0;2</bold></xref>, the CCMT (P. K. Mensah, 2023), Taiwan Tomato (<xref ref-type="bibr" rid="B9">Huang et&#xa0;al., 2020</xref>), and PlantVillage (<xref ref-type="bibr" rid="B10">Hughes and Salath&#xe9;, 2015</xref>) datasets were selected due to their complementary characteristics. CCMT represents a medium-scale field-collected dataset with moderate heterogeneity, Taiwan Tomato introduces small-scale and noisy data from diverse climatic conditions, and PlantVillage (<xref ref-type="bibr" rid="B10">Hughes and Salath&#xe9;, 2015</xref>) provides a large-scale dataset with controlled acquisition settings and pronounced class imbalance. Together, these datasets form a representative benchmark for evaluating federated learning performance under realistic agricultural data heterogeneity.</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Statistical summary of the tomato image subsets in agricultural datasets.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Dataset</th>
<th valign="middle" align="center">Categories</th>
<th valign="middle" align="center">Training</th>
<th valign="middle" align="center">Validation</th>
<th valign="middle" align="center">Test</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">CCMT (Tomato) (<xref ref-type="bibr" rid="B16">Mensah and E., 2023</xref>)</td>
<td valign="middle" align="center">5</td>
<td valign="middle" align="center">3805</td>
<td valign="middle" align="center">815</td>
<td valign="middle" align="center">815</td>
</tr>
<tr>
<td valign="middle" align="left">Taiwan Tomato (<xref ref-type="bibr" rid="B9">Huang et&#xa0;al., 2020</xref>)</td>
<td valign="middle" align="center">6</td>
<td valign="middle" align="center">274</td>
<td valign="middle" align="center">98</td>
<td valign="middle" align="center">98</td>
</tr>
<tr>
<td valign="middle" align="left">PlantVillage (Tomato) (<xref ref-type="bibr" rid="B10">Hughes and Salath&#xe9;, 2015</xref>)</td>
<td valign="middle" align="center">10</td>
<td valign="middle" align="center">12,712</td>
<td valign="middle" align="center">2,724</td>
<td valign="middle" align="center">2,724</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The data distribution analysis reveals several challenges:</p>
<list list-type="bullet">
<list-item>
<p>Category Imbalance: As shown in <xref ref-type="table" rid="T3"><bold>Table&#xa0;3</bold></xref>, the tomato datasets exhibit significant class skewness. In the CCMT (P. K. <xref ref-type="bibr" rid="B16">Mensah and E., 2023</xref>) dataset, healthy leaves constitute 9.20% of samples, while tomato mosaic virus in PlantVillage (<xref ref-type="bibr" rid="B10">Hughes and Salath&#xe9;, 2015</xref>) accounts for only 2.05% of samples. Conversely, several disease categories dominate the distribution: Septoria leaf spot reaches 43.11% in CCMT (P. K. <xref ref-type="bibr" rid="B16">Mensah and E., 2023</xref>), Yellow Leaf Curl Virus comprises 29.50% in PlantVillage (<xref ref-type="bibr" rid="B10">Hughes and Salath&#xe9;, 2015</xref>), and Powdery mildew represents 25.24% in Taiwan Tomato (<xref ref-type="bibr" rid="B9">Huang et&#xa0;al., 2020</xref>).</p></list-item>
<list-item>
<p>Spatial and Climatic Heterogeneity: The Taiwan Tomato (<xref ref-type="bibr" rid="B9">Huang et&#xa0;al., 2020</xref>) dataset encompasses data from six distinct climatic zones, and PlantVillage (<xref ref-type="bibr" rid="B10">Hughes and Salath&#xe9;, 2015</xref>) covers four diverse agro-ecological regions, effectively simulating the <italic>Non-IID</italic> (Non-Independent and Identically Distributed) nature of real-world agricultural environments.</p></list-item>
</list>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Plant disease dataset distribution by category.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Dataset</th>
<th valign="middle" align="left">Categories</th>
<th valign="middle" align="left">Quantity</th>
<th valign="middle" align="left">Ratio (%)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="5" align="left">CCMT (<xref ref-type="bibr" rid="B16">Mensah and E., 2023</xref>)</td>
<td valign="middle" align="left">Healthy</td>
<td valign="middle" align="left">500</td>
<td valign="middle" align="left">9.20</td>
</tr>
<tr>
<td valign="middle" align="left">Leaf blight</td>
<td valign="middle" align="left">1301</td>
<td valign="middle" align="left">23.94</td>
</tr>
<tr>
<td valign="middle" align="left">Leaf curl</td>
<td valign="middle" align="left">518</td>
<td valign="middle" align="left">9.53</td>
</tr>
<tr>
<td valign="middle" align="left">Septoria leaf spot</td>
<td valign="middle" align="left">2343</td>
<td valign="middle" align="left">43.11</td>
</tr>
<tr>
<td valign="middle" align="left">Verticillium wilt</td>
<td valign="middle" align="left">773</td>
<td valign="middle" align="left">14.22</td>
</tr>
<tr>
<td valign="middle" rowspan="6" align="left">Taiwan Tomato (<xref ref-type="bibr" rid="B9">Huang et&#xa0;al., 2020</xref>)</td>
<td valign="middle" align="left">Bacterial spot</td>
<td valign="middle" align="left">110</td>
<td valign="middle" align="left">17.68</td>
</tr>
<tr>
<td valign="middle" align="left">Black leaf mold</td>
<td valign="middle" align="left">67</td>
<td valign="middle" align="left">10.77</td>
</tr>
<tr>
<td valign="middle" align="left">Gray leaf spot</td>
<td valign="middle" align="left">84</td>
<td valign="middle" align="left">13.50</td>
</tr>
<tr>
<td valign="middle" align="left">Healthy</td>
<td valign="middle" align="left">106</td>
<td valign="middle" align="left">17.04</td>
</tr>
<tr>
<td valign="middle" align="left">Late blight</td>
<td valign="middle" align="left">98</td>
<td valign="middle" align="left">15.76</td>
</tr>
<tr>
<td valign="middle" align="left">Powdery mildew</td>
<td valign="middle" align="left">157</td>
<td valign="middle" align="left">25.24</td>
</tr>
<tr>
<td valign="middle" rowspan="10" align="left">PlantVillage (<xref ref-type="bibr" rid="B10">Hughes and Salath&#xe9;, 2015</xref>)</td>
<td valign="middle" align="left">Bacterial spot</td>
<td valign="middle" align="left">2127</td>
<td valign="middle" align="left">11.71</td>
</tr>
<tr>
<td valign="middle" align="left">Early blight</td>
<td valign="middle" align="left">1000</td>
<td valign="middle" align="left">5.51</td>
</tr>
<tr>
<td valign="middle" align="left">Late blight</td>
<td valign="middle" align="left">1909</td>
<td valign="middle" align="left">10.51</td>
</tr>
<tr>
<td valign="middle" align="left">Leaf Mold</td>
<td valign="middle" align="left">952</td>
<td valign="middle" align="left">5.24</td>
</tr>
<tr>
<td valign="middle" align="left">Septoria leaf spot</td>
<td valign="middle" align="left">1771</td>
<td valign="middle" align="left">9.75</td>
</tr>
<tr>
<td valign="middle" align="left">Spider mites</td>
<td valign="middle" align="left">1676</td>
<td valign="middle" align="left">9.23</td>
</tr>
<tr>
<td valign="middle" align="left">Target Spot</td>
<td valign="middle" align="left">1404</td>
<td valign="middle" align="left">7.73</td>
</tr>
<tr>
<td valign="middle" align="left">Yellow Leaf Curl Virus</td>
<td valign="middle" align="left">5357</td>
<td valign="middle" align="left">29.50</td>
</tr>
<tr>
<td valign="middle" align="left">Tomato mosaic virus</td>
<td valign="middle" align="left">373</td>
<td valign="middle" align="left">2.05</td>
</tr>
<tr>
<td valign="middle" align="left">Healthy</td>
<td valign="middle" align="left">1591</td>
<td valign="middle" align="left">8.76</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s4_2">
<label>4.2</label>
<title>Experimental configuration</title>
<sec id="s4_2_1">
<label>4.2.1</label>
<title>Environmental specifications</title>
<p>All experiments were conducted on a high-performance workstation equipped with an NVIDIA GeForce RTX 3060 GPU (12GB RAM). The software stack included Ubuntu 20.04 LTS as the operating system, PyTorch 2.0 as the primary deep learning framework, and CUDA 11.8 for GPU-accelerated computing. This configuration ensures consistent computational throughput for large-scale iterative training.</p>
</sec>
<sec id="s4_2_2">
<label>4.2.2</label>
<title>Federated learning parameters</title>
<p>The federated learning (FL) hyperparameters were meticulously tuned to balance model convergence and communication overhead. The specific configurations are summarized in <xref ref-type="table" rid="T4"><bold>Table&#xa0;4</bold></xref>.</p>
<table-wrap id="T4" position="float">
<label>Table&#xa0;4</label>
<caption>
<p>Hyperparameter settings for the federated learning framework.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Category</th>
<th valign="middle" align="left">Parameter</th>
<th valign="middle" align="left">Value</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="5" align="left">Network</td>
<td valign="middle" align="left">Batch Size</td>
<td valign="middle" align="left">8</td>
</tr>
<tr>
<td valign="middle" align="left">Learning Rate</td>
<td valign="middle" align="left">0.01</td>
</tr>
<tr>
<td valign="middle" align="left">Weight Decay</td>
<td valign="middle" align="left">5e-5</td>
</tr>
<tr>
<td valign="middle" align="left">Optimizer</td>
<td valign="middle" align="left">Adam</td>
</tr>
<tr>
<td valign="middle" align="left">Activation Function</td>
<td valign="middle" align="left">GELU</td>
</tr>
<tr>
<td valign="middle" rowspan="5" align="left">Federated Settings</td>
<td valign="middle" align="left">Number of Clients (<italic>K</italic>)</td>
<td valign="middle" align="left">3</td>
</tr>
<tr>
<td valign="middle" align="left">Communication Rounds</td>
<td valign="middle" align="left">70</td>
</tr>
<tr>
<td valign="middle" align="left">Local Epochs (<italic>E</italic>)</td>
<td valign="middle" align="left">1</td>
</tr>
<tr>
<td valign="middle" align="left">Participation Ratio (<italic>C</italic>)</td>
<td valign="middle" align="left">0.8</td>
</tr>
<tr>
<td valign="middle" align="left">Aggregation Algorithm</td>
<td valign="middle" align="left">FL-LSNet</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Consider the performance of the client device, balanced the trade-off between training efficiency and gradient descent stability. This study set the batch sizes 8 led to faster convergence but overfitting on our relatively small domain-specific dataset.</p>
<p>To ensure training stability and achieve optimal generalization performance primarily evaluated based on validation and test set results it is essential to carefully select and tune key hyperparameters, such as the learning rate and batch size. In this study, a 5-fold cross-validation strategy is adopted to systematically evaluate the effectiveness of different hyperparameter configurations.</p>
<p>Specifically, the original dataset is randomly partitioned into five mutually exclusive subsets with approximately balanced sample distributions. In each cross-validation round, four subsets are used for model training, while the remaining subset serves as the validation set for performance evaluation. This procedure is repeated five times so that each subset is used exactly once for validation. The average validation performance over the five folds is then used as an objective criterion to assess the suitability of each hyperparameter configuration.</p>
<p>During the hyperparameter search phase, a set of candidate configurations is predefined (e.g., learning rates of 0.001, 0.01, and 0.1). For each configuration, the above cross-validation procedure is conducted, and key performance metrics on the validation set are recorded. The hyperparameter combination yielding the best average performance across the five folds is selected as the final configuration.</p>
<p>The resulting optimal hyperparameter settings are summarized in <xref ref-type="table" rid="T4"><bold>Table&#xa0;4</bold></xref>, which includes both network-related parameters (e.g., batch size, learning rate, optimizer) and federated learning&#x2013;specific settings (e.g., number of clients, communication rounds, local epochs, and participation ratio). This configuration demonstrates robust convergence behavior and strong generalization capability in practice, thereby ensuring the reliability of the experimental results reported in this work.</p>
</sec>
</sec>
<sec id="s4_3">
<label>4.3</label>
<title>Comparative baselines</title>
<p>To validate the superiority of the proposed FL-LSNet framework, we benchmarked it against several state-of-the-art (SOTA) federated optimization methods:</p>
<list list-type="bullet">
<list-item>
<p>FedAvg (<xref ref-type="bibr" rid="B15">McMahan et&#xa0;al., 2017</xref>): The vanilla federated averaging algorithm which synchronizes global parameters via a simple coordinate-wise weighted average of local updates.</p></list-item>
<list-item>
<p>FedProx (<xref ref-type="bibr" rid="B13">Li et&#xa0;al., 2020</xref>): An extension of FedAvg that introduces a proximal term to the local objective function to mitigate the drift caused by statistical heterogeneity across clients.</p></list-item>
<list-item>
<p>MOON (<xref ref-type="bibr" rid="B12">Li et&#xa0;al., 2021</xref>): A model-contrastive learning strategy designed to correct the representation shift by minimizing the discrepancy between the representations of local and global models.</p></list-item>
</list>
</sec>
<sec id="s4_4">
<label>4.4</label>
<title>Experimental results</title>
<sec id="s4_4_1">
<label>4.4.1</label>
<title>Performance evaluation</title>
<p>Taking agricultural images as an example, We evaluate the proposed FL-LSNet framework through a comprehensive set of experiments against representative federated learning baselines on three agricultural image datasets. <xref ref-type="table" rid="T5"><bold>Table&#xa0;5</bold></xref> reports the accuracy (in percent) achieved by each method.</p>
<table-wrap id="T5" position="float">
<label>Table&#xa0;5</label>
<caption>
<p>Performance comparison (Accuracy %) of different FL algorithms on three agricultural image datasets.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Method</th>
<th valign="middle" align="center">CCMT (%)</th>
<th valign="middle" align="center">Taiwan (%)</th>
<th valign="middle" align="center">PlantVillage (%)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">FedAvg (<xref ref-type="bibr" rid="B15">McMahan et&#xa0;al., 2017</xref>)</td>
<td valign="middle" align="center">80.39</td>
<td valign="middle" align="center">79.86</td>
<td valign="middle" align="center">97.40</td>
</tr>
<tr>
<td valign="middle" align="left">FedProx (<xref ref-type="bibr" rid="B13">Li et&#xa0;al., 2020</xref>)</td>
<td valign="middle" align="center">81.25</td>
<td valign="middle" align="center">80.43</td>
<td valign="middle" align="center">97.85</td>
</tr>
<tr>
<td valign="middle" align="left">MOON (<xref ref-type="bibr" rid="B12">Li et&#xa0;al., 2021</xref>)</td>
<td valign="middle" align="center">82.55</td>
<td valign="middle" align="center">79.36</td>
<td valign="middle" align="center">98.65</td>
</tr>
<tr>
<td valign="middle" align="left">FL-LSNet (Ours)</td>
<td valign="middle" align="center">84.32</td>
<td valign="middle" align="center">85.14</td>
<td valign="middle" align="center">98.92</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The results in <xref ref-type="table" rid="T5"><bold>Table&#xa0;5</bold></xref> demonstrate that FL-LSNet consistently outperforms state-of-the-art federated learning baselines across all three agricultural datasets, confirming its effectiveness in addressing data heterogeneity and distribution imbalance in federated agricultural scenarios.</p>
<p>The CCMT (P. K. <xref ref-type="bibr" rid="B16">Mensah and E., 2023</xref>) dataset is characterized by severe class imbalance, with healthy samples accounting for 9.2% of the data. Under this setting, FedAvg achieves 80.39% accuracy, while FedProx and MOON improve performance to 81.25% and 82.55%, respectively, through regularization and representation alignment. In contrast, FL-LSNet attains 84.32% accuracy, outperforming FedAvg and MOON by 3.93% and 2.07%, respectively. This improvement is attributed to FL-LSNet&#x2019;s dynamic client weighting and the discriminative capacity of LSNet (<xref ref-type="bibr" rid="B22">Wang et&#xa0;al., 2025b</xref>) under imbalanced conditions. The Taiwan Tomato (<xref ref-type="bibr" rid="B9">Huang et&#xa0;al., 2020</xref>) dataset represents an extreme non-IID scenario due to data collection across six distinct climatic zones. FedAvg, FedProx, and MOON achieve accuracies of 79.86%, 80.43%, and 79.36%, respectively, with MOON exhibiting instability under severe distribution shifts. FL-LSNet significantly improves performance to 85.14%, exceeding FedAvg by 5.28% and FedProx by 4.71%. This result highlights FL-LSNet&#x2019;s robustness to spatial and climatic heterogeneity, enabled by adaptive aggregation and robust feature learning. The PlantVillage (<xref ref-type="bibr" rid="B10">Hughes and Salath&#xe9;, 2015</xref>) provides a relatively homogeneous benchmark, where all methods achieve high accuracy. FedAvg, FedProx, and MOON reach 97.40%, 97.85%, and 98.65%, respectively, while FL-LSNet further improves performance to 98.92%. Although the absolute gains are smaller in this controlled setting, FL-LSNet maintains consistent superiority.</p>
<p>Beyond absolute accuracy, cross-dataset stability is evaluated as a measure of generalization. FedAvg exhibits the largest performance variance (19.53 percentage points), followed by MOON (19.29) and FedProx (17.42). FL-LSNet achieves the smallest variability, with a gap of only 13.78 points between its lowest and highest accuracies. This reduced variance indicates that FL-LSNet effectively mitigates non-IID effects, yielding more stable and reliable performance across diverse agricultural environments.</p>
</sec>
<sec id="s4_4_2">
<label>4.4.2</label>
<title>Ablation study</title>
<p>To quantify the contribution of each component in the proposed framework, we conducted ablation experiments on the PlantVillage (<xref ref-type="bibr" rid="B10">Hughes and Salath&#xe9;, 2015</xref>) dataset. We evaluate three metrics&#x2014;Accuracy, Precision, and F1-score&#x2014;and report the results in <xref ref-type="table" rid="T6"><bold>Table&#xa0;6</bold></xref>.</p>
<table-wrap id="T6" position="float">
<label>Table&#xa0;6</label>
<caption>
<p>Ablation study results for different component combinations.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Combination</th>
<th valign="middle" align="center">FedAvg</th>
<th valign="middle" align="center">LSNet</th>
<th valign="middle" align="center">Accuracy (%)</th>
<th valign="middle" align="center">Precision (%)</th>
<th valign="middle" align="center">F1-score (%)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">LSNet (<xref ref-type="bibr" rid="B22">Wang et&#xa0;al., 2025b</xref>) (Standalone)</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">92.50</td>
<td valign="middle" align="center">93.38</td>
<td valign="middle" align="center">92.48</td>
</tr>
<tr>
<td valign="middle" align="left">FedAvg + SwinUnet</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">97.40</td>
<td valign="middle" align="center">97.47</td>
<td valign="middle" align="center">97.40</td>
</tr>
<tr>
<td valign="middle" align="left">FL-LSNet (FedAvg + LSNet)</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">98.65</td>
<td valign="middle" align="center">98.68</td>
<td valign="middle" align="center">98.65</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The ablation results in <xref ref-type="table" rid="T6"><bold>Table&#xa0;6</bold></xref> provide clear insights into the individual and combined contributions of the proposed components. By comparing three configurations&#x2014;(i) standalone LSNet (<xref ref-type="bibr" rid="B22">Wang et&#xa0;al., 2025b</xref>), (ii) FedAvg with a SwinUnet backbone, and (iii) the integrated FL-LSNet (FedAvg + LSNet)&#x2014;the performance gains can be attributed to specific architectural and algorithmic design choices.</p>
<p>The standalone LSNet (<xref ref-type="bibr" rid="B22">Wang et&#xa0;al., 2025b</xref>) demonstrates strong intrinsic representation capability in a centralized setting, achieving an accuracy of 92.50%, Precision of 93.38%, and F1-score of 92.48%. These results confirm the effectiveness of the &#x201c;See Large, Focus Small&#x201d; design, which combines large-kernel global perception with small-kernel fine-grained aggregation to capture hierarchical disease features. Notably, LSNet (<xref ref-type="bibr" rid="B22">Wang et&#xa0;al., 2025b</xref>) attains this performance with approximately 72% fewer parameters than Swin Transformer&#x2013;based backbones, highlighting a favorable efficiency&#x2013;accuracy trade-off. The FedAvg + SwinUnet configuration serves as a strong federated baseline, yielding an accuracy of 97.40%, Precision of 97.47%, and F1-score of 97.40%. Compared with the standalone LSNet (<xref ref-type="bibr" rid="B22">Wang et&#xa0;al., 2025b</xref>), this setup improves accuracy by approximately 4.9 percentage points, demonstrating the benefit of federated aggregation across distributed data sources. The self-attention mechanism of SwinUnet facilitates modeling long-range dependencies, contributing to improved performance under non-IID conditions.</p>
<p>The fully integrated FL-LSNet achieves the best overall performance, with accuracy, Precision, and F1-score all reaching 98.65%, 98.68%, and 98.65%, respectively. This corresponds to a 6.15 percentage-point improvement over the standalone LSNet (<xref ref-type="bibr" rid="B22">Wang et&#xa0;al., 2025b</xref>) and a 1.25 percentage-point gain over the FedAvg + SwinUnet baseline. The consistent improvements across all metrics indicate that FL-LSNet effectively combines robust local feature learning with principled federated aggregation, yielding superior generalization and resilience to data heterogeneity rather than metric-specific gains. The performance of FL-LSNet can be understood through three complementary perspectives:</p>
<list list-type="bullet">
<list-item>
<p>Feature Representation Superiority: LSNet&#x2019;s (<xref ref-type="bibr" rid="B22">Wang et&#xa0;al., 2025b</xref>) LS Convolution module integrates multi-scale perception (large kernels) with adaptive aggregation (small kernels), rendering it inherently well-suited for extracting discriminative disease features compared to SwinUnet&#x2019;s patch-based attention. The 7&#xd7;7 large kernel in the LSCon block captures global context (e.g., lesion distribution and overall leaf condition), while the 3&#xd7;3 small kernels in the aggregation stage focus on fine-grained details such as spot edges, texture, and color gradations. This hierarchical feature extraction is particularly effective for tomato diseases, where symptoms manifest at multiple scales. Notably, LSNet (<xref ref-type="bibr" rid="B22">Wang et&#xa0;al., 2025b</xref>) achieves these results with roughly 72% fewer parameters than Swin Transformer&#x2013;based backbones, illustrating a favorable efficiency&#x2013;accuracy trade-off.</p></list-item>
<list-item>
<p>Adaptability to Federated Constraints: LSNet&#x2019;s (<xref ref-type="bibr" rid="B22">Wang et&#xa0;al., 2025b</xref>) grouped convolution design and parameter-efficient structure render it more robust under federated settings with limited local training epochs and modest per-client data. In typical FL scenarios where each client trains for only one or a few epochs per round, models must learn rapidly without overfitting to local idiosyncrasies. The efficiency of LSNet (<xref ref-type="bibr" rid="B22">Wang et&#xa0;al., 2025b</xref>) facilitates effective feature learning under these constraints, whereas heavier architectures like SwinUnet may be more prone to underfitting on small local datasets.</p></list-item>
<list-item>
<p>Synergy with Adaptive Aggregation: The observed gains also reflect the productive interplay between robust local feature learning (via LSNet (<xref ref-type="bibr" rid="B22">Wang et&#xa0;al., 2025b</xref>)) and principled federated aggregation (via FL-LSNet&#x2019;s weighting and aggregation strategy). The combination alleviates non-IID effects and enhances generalization beyond what either component achieves alone.</p></list-item>
</list>
<p>In summary, the ablation results validate that both LSNet&#x2019;s (<xref ref-type="bibr" rid="B22">Wang et&#xa0;al., 2025b</xref>) architectural innovations and its integration with the federated framework contribute synergistically to the overall performance. The consistent improvements across metrics and datasets provide strong evidence that FL-LSNet offers a principled and effective solution for distributed plant-disease classification in agricultural monitoring systems.</p>
</sec>
<sec id="s4_4_3">
<label>4.4.3</label>
<title>Comparative performance of different models</title>
<p>To quantify the performance of the proposed FL-LSNet and highlight its advantages over existing models, a comprehensive comparative experiment was conducted on the PlantVillage benchmark dataset, with the results summarized in <xref ref-type="table" rid="T7"><bold>Table&#xa0;7</bold></xref>. The selected comparison models cover mainstream learning paradigms for plant disease recognition, including classical convolutional neural networks (CNNs), lightweight neural networks, object detection models, and vision transformer (ViT)-based models, to fully reflect the trade-offs among detection accuracy, model architectural complexity, inference efficiency, and generalization capability in practical applications.</p>
<table-wrap id="T7" position="float">
<label>Table&#xa0;7</label>
<caption>
<p>Performance results of different models on the PlantVillage dataset.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Model</th>
<th valign="middle" align="center">Year</th>
<th valign="middle" align="center">Dataset</th>
<th valign="middle" align="center">Accuracy (%)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">DenseNet201 (<xref ref-type="bibr" rid="B3">Ashok et&#xa0;al., 2020</xref>)</td>
<td valign="middle" align="center">2020</td>
<td valign="middle" align="center">PlantVillage</td>
<td valign="middle" align="center">98.05</td>
</tr>
<tr>
<td valign="middle" align="left">YOLOv9 (<xref ref-type="bibr" rid="B1">Abulizi and Ye, 2024</xref>)</td>
<td valign="middle" align="center">2024</td>
<td valign="middle" align="center">PlantVillage</td>
<td valign="middle" align="center">89.50</td>
</tr>
<tr>
<td valign="middle" align="left">DM-YOLO (<xref ref-type="bibr" rid="B1">Abulizi and Ye, 2024</xref>)</td>
<td valign="middle" align="center">2024</td>
<td valign="middle" align="center">PlantVillage</td>
<td valign="middle" align="center">91.40</td>
</tr>
<tr>
<td valign="middle" align="left">MobileNet-V2 (<xref ref-type="bibr" rid="B5">Barman et&#xa0;al., 2024</xref>)</td>
<td valign="middle" align="center">2024</td>
<td valign="middle" align="center">Non-standard</td>
<td valign="middle" align="center">94.98</td>
</tr>
<tr>
<td valign="middle" align="left">ViT (<xref ref-type="bibr" rid="B17">Nishankar et&#xa0;al., 2025</xref>)</td>
<td valign="middle" align="center">2025</td>
<td valign="middle" align="center">PlantVillage</td>
<td valign="middle" align="center">90.99</td>
</tr>
<tr>
<td valign="middle" align="left">CLIP-ViT (<xref ref-type="bibr" rid="B19">Radford, 2024</xref>)</td>
<td valign="middle" align="center">2025</td>
<td valign="middle" align="center">PlantVillage</td>
<td valign="middle" align="center">98.50</td>
</tr>
<tr>
<td valign="middle" align="left"><bold>FL-LSNet (Proposed)</bold></td>
<td valign="middle" align="center">2026</td>
<td valign="middle" align="center">PlantVillage</td>
<td valign="middle" align="center"><bold>98.65</bold></td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Bold indicates the best performance.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>Representative object detection models (YOLOv9, DM-YOLO) exhibit relatively low classification accuracy (89.50% and 91.40%, respectively), as their network design is optimized for spatial localization rather than fine-grained feature extraction of foliar disease symptoms, making them less suitable for dedicated plant&#xa0;disease classification tasks. Classical CNN-based models (DenseNet201) achieve high accuracy (98.05%) on the standardized dataset due to their hierarchical feature extraction mechanism, but their heavy network structure leads to low inference efficiency and poor adaptability to heterogeneous data distributions in actual agricultural scenarios. ViT-based models (ViT, CLIP-ViT) show improved performance with the development of transformer architectures, with CLIP-ViT reaching a high accuracy of 98.50%, but such models rely on large-scale pre-training and high computing resources, which are difficult to deploy on low-computing-power edge devices (e.g., mobile phones) for on-site detection. MobileNet-V2, as a lightweight model, achieves 94.98% accuracy but is evaluated on a non-standard dataset, leading to limited direct comparability with the above models trained on the PlantVillage benchmark. In contrast, the proposed FL-LSNet achieves the highest classification accuracy of 98.65% on the PlantVillage dataset, surpassing all the compared state-of-the-art models across different learning paradigms. These results fully demonstrate that FL-LSNet has comprehensive performance superiority over existing models, and is more suitable for practical on-site tomato disease detection in agricultural production scenarios.</p>
<p>Specifically:</p>
<list list-type="bullet">
<list-item>
<p>Compared with YOLO-series detection models and Transformer-based approaches, FL-LSNet achieves a substantial accuracy improvement of more than 8&#x2013;9%, underscoring its superior capability in extracting discriminative fine-grained features for disease classification.</p></list-item>
<list-item>
<p>Compared with high-performing CNN-based models such as AlexNet, FL-LSNet yields an accuracy improvement exceeding 0.23%, indicating consistent performance gains even over strong baselines.</p></list-item>
</list>
<p>Overall, FL-LSNet maintains classification accuracy on par with leading CNN architectures while demonstrating enhanced adaptability under federated settings. By jointly balancing performance, generalization, and data privacy preservation, the proposed framework emerges as a robust and scalable solution for large-scale, federated plant disease detection applications.</p>
</sec>
<sec id="s4_4_4">
<label>4.4.4</label>
<title>Evaluation of FL-LSNet in field</title>
<p>This research is implemented on a heterogeneous device ecosystem base on Federate Learning that integrates field-oriented user terminals and a centralized management platform, to meet the differentiated demands of end-users and technical administrators. As <xref ref-type="fig" rid="f4"><bold>Figure&#xa0;4</bold></xref> show, The primary end-user interface is a cross-platform mobile application compatible with both Android and iOS operating systems, which is tailored for on-site deployment in tomato greenhouses and open-field cultivation scenarios. The mobile client supports one-tap image acquisition of foliar and fruit symptoms, incorporates an offline inference module for basic disease identification to address unstable network conditions in rural areas, and delivers real-time diagnostic outcomes along with site-specific prevention strategies. Its human-computer interaction design is optimized for agricultural environments under direct sunlight.</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Smart tomato disease monitoring system.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1783587-g004.tif">
<alt-text content-type="machine-generated">Composite infographic showing a greenhouse with growing crops, a digital map marking Cangshan District in Fuzhou, a labeled client mobile phone interface, and a server with an agricultural management dashboard, connected by arrows indicating data flow.</alt-text>
</graphic></fig>
<p>Complementing the mobile terminal, a web-based server platform is deployed for backend operations, accessible via standard desktop browsers for technical teams and agricultural experts. This centralized platform hosts the deep learning model for pathogen identification, maintains a repository of historical detection data for longitudinal trend analysis, and provides an administrative dashboard for monitoring regional disease prevalence. Furthermore, it enables expert review of ambiguous cases, iterative refinement of control recommendations, and over-the-air updates of the mobile application, establishing a bidirectional data synchronization mechanism that links on-field data collection with centralized analytical support. This dual-platform architecture ensures the system&#x2019;s scalability and practical applicability, facilitating seamless collaboration between frontline producers and agricultural specialists.</p>
<p>To further validate the effectiveness of the proposed algorithm, systematic field-level experiments were conducted under real agricultural production conditions. The validation scenarios, protocols, and evaluation metrics were carefully designed to reflect practical deployment environments.</p>
<p>First, a tomato cultivation site managed by the university laboratory was selected as the experimental field. Both tomato plants infected with Tomato Yellow Leaf Curl Virus (TYLCV) and healthy plants were included to ensure the representativeness and validity of the validation scenario. Second, image acquisition during field validation followed procedures consistent with actual agricultural practices. Tomato leaf images were captured using a commercial smartphone (Real 10) under natural illumination conditions, across multiple viewing angles (frontal, lateral, and 45&#xb0; oblique) and at different disease progression stages (early, middle, and late). A total of 1,637 field images were collected. Third, the validation protocol adopted a federated and distributed evaluation strategy. (i) Distributed validation: three spatially separated locations within the cultivation area were selected as independent federated clients, each performing local training and inference based on its own field samples, while disease recognition accuracy, recall, and inference latency were recorded. (ii) Stability evaluation: real-time field recognition was continuously performed over a seven-day period to assess the temporal robustness of the model. (iii) Practical usability evaluation: five agricultural workers operated the terminal devices to conduct disease identification, and the operational convenience and reliability of the recognition results were assessed.</p>
<p>Finally, the field validation results demonstrate that the proposed model achieves an average disease recognition accuracy of 94.7%, with inference latency on low-computing-power devices not exceeding 0.3 s per image. The model maintains stable performance under low-light conditions and complex backgrounds, and its operational usability was positively evaluated by agricultural personnel, indicating that it satisfies the practical requirements of in-field tomato disease detection.</p>
</sec>
</sec>
</sec>
<sec id="s5" sec-type="discussion">
<label>5</label>
<title>Discussion</title>
<sec id="s5_1">
<label>5.1</label>
<title>Performance analysis of FL-LSNet</title>
<p>The superior performance of FL-LSNet can be attributed to its architecture-level optimization that jointly enhances feature representation robustness and federated aggregation stability. Unlike classical federated methods such as MOON and FedProx, which primarily impose optimization-level constraints to mitigate client drift, FL-LSNet fundamentally improves the underlying feature extraction mechanism, enabling the model to learn more domain-invariant visual representations under heterogeneous data distributions.</p>
<p>As illustrated in <xref ref-type="fig" rid="f5"><bold>Figure&#xa0;5</bold></xref>, FL-LSNet exhibits consistently faster convergence and lower training loss across all three datasets compared with FedAvg, MOON, and FedProx. In particular, under the Taiwan Tomato (<xref ref-type="bibr" rid="B9">Huang et&#xa0;al., 2020</xref>) dataset&#x2014;which is characterized by pronounced climatic and acquisition heterogeneity&#x2014;FL-LSNet demonstrates a smoother and more stable loss descent, avoiding the oscillations observed in MOON and the slower convergence of FedAvg. This behavior indicates that the LSNet (<xref ref-type="bibr" rid="B22">Wang et&#xa0;al., 2025b</xref>) backbone effectively mitigates feature distribution shifts at the representation level, thereby reducing inter-client inconsistency during aggregation.</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>Comparison of training loss convergence across different federated learning algorithms on three agricultural datasets. FL-LSNet demonstrates faster convergence and more stable loss reduction under heterogeneous data distributions.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1783587-g005.tif">
<alt-text content-type="machine-generated">Four line graphs compare training loss over seventy epochs for three datasets&#x2014;CCMT, PlantVillage, and TaiwanTomato&#x2014;using FedAvg, Moon, FedProx, and FL-LSNet methods. Each method shows decreasing loss, with PlantVillage consistently achieving the lowest final loss.</alt-text>
</graphic></fig>
<p>From the perspective of training accuracy (<xref ref-type="fig" rid="f6"><bold>Figure&#xa0;6</bold></xref>), FL-LSNet achieves rapid early-stage accuracy gains and maintains a stable upward trend throughout training. Compared to FedAvg, which shows slower convergence under heterogeneous conditions, and FedProx, which introduces additional regularization overhead, FL-LSNet reaches higher accuracy with fewer communication rounds. This advantage is particularly evident on the CCMT (P. K. <xref ref-type="bibr" rid="B16">Mensah and E., 2023</xref>) and TaiwanTomato (<xref ref-type="bibr" rid="B9">Huang et&#xa0;al., 2020</xref>) datasets, where cross-domain variations in illumination, disease manifestation, and background complexity pose significant challenges to conventional federated optimization strategies.</p>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>Training accuracy comparison of federated learning algorithms on three agricultural datasets. FL-LSNet achieves higher accuracy with fewer communication rounds, particularly under cross-domain and multi-source data settings.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1783587-g006.tif">
<alt-text content-type="machine-generated">Four line graphs compare training accuracy across epochs for CCMT, PlantVillage, and TaiwanTomato datasets using FedAvg, Moon, FedProx, and FL-LSNet algorithms. PlantVillage consistently achieves the highest accuracy, followed by TaiwanTomato and then CCMT, with all algorithms showing accuracy increases over time before plateauing.</alt-text>
</graphic></fig>
<p>The observed performance gains further validate the effectiveness of the proposed &#x201c;See Large, Focus Small&#x201d; design philosophy. By integrating large-kernel global perception with small-kernel adaptive aggregation, LSNet (<xref ref-type="bibr" rid="B22">Wang et&#xa0;al., 2025b</xref>) enhances contextual awareness while preserving fine-grained disease features. When deployed within a federated learning framework, this design enables FL-LSNet to align local representations more effectively across clients, thereby improving global model generalization without relying solely on restrictive optimization constraints.</p>
</sec>
<sec id="s5_2">
<label>5.2</label>
<title>Application of FL-LSNet and limitation</title>
<p>The FL-LSNet framework is purpose-built for distributed disease monitoring in practical agricultural production scenarios, including large-scale commercial plantations, family farms, and agricultural cooperatives. Each planting entity operates as an independent federated client, performing local training and inference on its own tomato leaf images. Raw visual and agronomic data remain locally stored to avoid privacy risks, with only encrypted model updates transmitted via the federated learning framework, thus resolving both data silos and privacy leakage issues. In deployment, the framework follows a three-stage workflow: (1) local data preprocessing and augmentation, where field-acquired images are enhanced on edge devices; (2) federated collaborative training, with edge-level aggregators executing standard parameter aggregation strategies to refine the global model; and (3) on-device inference, enabling real-time disease identification and delivery of site-specific prevention recommendations to end-users via a mobile application. This design ensures flexible adaptation to heterogeneous cultivation scenarios: multi-client collaboration improves model generalization across large plantations, while lightweight variants of LSNet enable efficient single-point detection for smallholder farmers, supporting precision disease management that balances agricultural productivity with data security.</p>
<p>Despite these advancements, several limitations merit further exploration. First, the current evaluation is restricted to image classification tasks. Extending FL-LSNet to more complex agricultural visual problems, such as multi-crop disease detection and semantic segmentation of mixed infection symptoms, would further validate its real-world utility. Second, communication efficiency remains a challenge in bandwidth-constrained rural environments, particularly as model scales expand. Finally, while this study integrates LSNet with the FedAvg strategy, future research could investigate compatibility with advanced federated learning techniques, such as asynchronous communication and adaptive gradient compression, to enhance scalability and robustness in distributed agricultural settings.</p>
</sec>
</sec>
<sec id="s6" sec-type="conclusions">
<label>6</label>
<title>Conclusion</title>
<p>This paper presented FL-LSNet, a novel federated learning framework specifically designed for distributed image analysis under privacy constraints and data heterogeneity. By incorporating the &#x201c;See Large, Focus Small&#x201d; design philosophy through the LSNet (<xref ref-type="bibr" rid="B22">Wang et&#xa0;al., 2025b</xref>) lightweight convolutional architecture, the proposed method achieves high-fidelity visual feature extraction while substantially reducing computational overhead. Moreover, the integration of a dynamic aggregation mechanism enables FL-LSNet to effectively adapt to the statistical heterogeneity inherent in decentralized datasets.</p>
<p>Extensive experiments conducted on the CCMT (P. K. <xref ref-type="bibr" rid="B16">Mensah and E., 2023</xref>), Taiwan Tomato (<xref ref-type="bibr" rid="B9">Huang et&#xa0;al., 2020</xref>), and PlantVillage (<xref ref-type="bibr" rid="B10">Hughes and Salath&#xe9;, 2015</xref>) datasets demonstrate that FL-LSNet (<xref ref-type="bibr" rid="B22">Wang et&#xa0;al., 2025b</xref>) consistently delivers state-of-the-art performance, outperforming representative federated learning baselines such as FedAvg and MOON across multiple evaluation metrics. In particular, the synergistic combination of LSNet (<xref ref-type="bibr" rid="B22">Wang et&#xa0;al., 2025b</xref>) and federated optimization achieves classification accuracy exceeding 98% on standardized datasets, while also exhibiting pronounced performance gains under challenging real-world climatic and cross-domain conditions. These results highlight the robustness and generalization capability of FL-LSNet in practical multi-source agricultural environments.</p>
<p>Future work will focus on three key directions: (1) optimizing computational efficiency for resource-constrained edge devices, (2) enhancing model robustness under small-sample and imbalanced data, and (3) exploring asynchronous aggregation to reduce communication overhead in large-scale federated networks. Additionally, we will extend the framework to complex visual tasks such as public safety data sharing and cloud-edge-end video surveillance analysis. This study provides a reusable technical paradigm for offering insights into lightweight federated learning adaptation for domain-specific applications, including cross-domain scenarios like public safety.</p>
</sec>
</body>
<back>
<sec id="s7" sec-type="data-availability">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material. Further inquiries can be directed to the corresponding author.</p></sec>
<sec id="s8" sec-type="author-contributions">
<title>Author contributions</title>
<p>TL: Conceptualization, Formal analysis, Funding acquisition, Project administration, Resources, Supervision, Validation, Visualization, Writing &#x2013; original draft, Writing &#x2013; review &amp; editing. JX: Data curation, Visualization, Writing &#x2013; review &amp; editing. HD: Conceptualization, Data curation, Formal analysis, Investigation, Methodology, Project administration, Writing &#x2013; original draft, Writing &#x2013; review &amp; editing.</p></sec>
<sec id="s10" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
<p>The handling editor CY declared a past co-authorship with the author TL.</p></sec>
<sec id="s11" sec-type="ai-statement">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p></sec>
<sec id="s12" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors&#xa0;and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p></sec>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Abulizi</surname> <given-names>A.</given-names></name>
<name><surname>Ye</surname> <given-names>J.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Dm-yolo: improved yolov9 model for tomato leaf disease detection</article-title>. <source>Front. Plant Sci.</source> <volume>15</volume>., PMID: <pub-id pub-id-type="pmid">40007767</pub-id>
</mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Annabel</surname> <given-names>L.</given-names></name>
<name><surname>Puspha</surname> <given-names>S.</given-names></name>
<name><surname>Muthulakshmi</surname> <given-names>V.</given-names></name>
</person-group> (<year>2019</year>). 
<article-title>A novel image-based detection pipeline for tomato leaf disease classification</article-title>. <source>Comput. Electron. Agric.</source> <volume>160</volume>, <fpage>50</fpage>&#x2013;<lpage>58</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/I-SMAC47947.2019.9032621</pub-id> 
</mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Ashok</surname> <given-names>S.</given-names></name>
<name><surname>Kishore</surname> <given-names>G.</given-names></name>
<name><surname>Rajesh</surname> <given-names>V.</given-names></name>
<name><surname>Suchitra</surname> <given-names>S.</given-names></name>
<name><surname>Sophia</surname> <given-names>S. G.</given-names></name>
<name><surname>Pavithra</surname> <given-names>B.</given-names></name>
</person-group> (<year>2020</year>). 
<article-title>Densenet-based model for plant disease classification</article-title>. <source>Comput. Electron. Agric.</source> <volume>178</volume>, <fpage>105742</fpage>.
</mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Assaduzzaman</surname> <given-names>M.</given-names></name>
<name><surname>Bishshash</surname> <given-names>P.</given-names></name>
<name><surname>Sharker Nirob</surname> <given-names>M. A.</given-names></name>
<name><surname>Al Marouf</surname> <given-names>A.</given-names></name>
<name><surname>Rokne</surname> <given-names>J. G.</given-names></name>
<name><surname>Alhajj</surname> <given-names>R.</given-names></name>
</person-group> (<year>2025</year>). 
<article-title>Xse-tomatonet: Efficientnetb0 with squeeze-and-excitation modules for tomato disease classification</article-title>. <source>Comput. Electron. Agric.</source> <volume>223</volume>, <fpage>109025</fpage>.
</mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Barman</surname> <given-names>D.</given-names></name>
<name><surname>Bishshash</surname> <given-names>P.</given-names></name>
<name><surname>Sharker Nirob</surname> <given-names>Md. A.</given-names></name>
<name><surname>Al Marouf</surname> <given-names>A.</given-names></name>
<name><surname>Rokne</surname> <given-names>J. G.</given-names></name>
<name><surname>Alhajj</surname> <given-names>R.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>Performance evaluation of deep learning models for plant disease classification</article-title>. <source>Sci. Rep.</source> <volume>14</volume>, <fpage>12345</fpage>.
</mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Chowdhury</surname> <given-names>M. E. H.</given-names></name>
<name><surname>Rahman</surname> <given-names>T.</given-names></name>
<name><surname>Khandakar</surname> <given-names>A.</given-names></name>
<name><surname>Ibtehaz</surname> <given-names>N.</given-names></name>
<name><surname>Khan</surname> <given-names>A. U.</given-names></name>
<name><surname>Khan</surname> <given-names>M. S.</given-names></name>
<etal/>
</person-group>. (<year>2021</year>a). 
<article-title>Comparative analysis of cnn architectures for tomato disease classification</article-title>. <source>Sensors</source> <volume>21</volume>, <fpage>5140</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.5772/intechopen.97319</pub-id> 
</mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Chowdhury</surname> <given-names>M. E. H.</given-names></name>
<name><surname>Rahman</surname> <given-names>T.</given-names></name>
<name><surname>Khandakar</surname> <given-names>A.</given-names></name>
<name><surname>Ayari</surname> <given-names>M. A.</given-names></name>
<name><surname>Khan</surname> <given-names>A. U.</given-names></name>
<name><surname>Khan</surname> <given-names>M. S.</given-names></name>
<etal/>
</person-group>. (<year>2021</year>b). 
<article-title>Efficientnet for tomato leaf disease classification</article-title>. <source>AgriEngineering</source> <volume>9</volume>, <fpage>63528</fpage>&#x2013;<lpage>63541</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agriengineering3020020</pub-id> 
</mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Gookyi</surname> <given-names>D. A. N.</given-names></name>
<name><surname>Wulnye</surname> <given-names>F. A.</given-names></name>
<name><surname>Wilson</surname> <given-names>M.</given-names></name>
<name><surname>Danquah</surname> <given-names>P.</given-names></name>
<name><surname>Danso</surname> <given-names>S. A.</given-names></name>
<name><surname>Gariba</surname> <given-names>A. A.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>Enabling Intelligence on the Edge: Leveraging Edge Impulse to Deploy Multiple Deep Learning Models on Edge Devices for Tomato Leaf Disease Detection</article-title>. <source>AgriEngineering</source>. <volume>6</volume>, <fpage>3563</fpage>&#x2013;<lpage>3585</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agriengineering6040203</pub-id> 
</mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Huang</surname> <given-names>Y.</given-names></name>
<name><surname>Mei-Ling</surname></name>
<name><surname>Chang</surname> <given-names>Y.-H.</given-names></name>
</person-group> (<year>2020</year>). 
<article-title>Deep learning-based plant disease detection in Taiwan</article-title>. <source>Agronomy</source> <volume>10</volume>, <fpage>1321</fpage>.
</mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Hughes</surname> <given-names>D.</given-names></name>
<name><surname>Salath&#xe9;</surname> <given-names>M.</given-names></name>
</person-group> (<year>2015</year>). 
<article-title>An open access repository of images on plant health subjects for the development of mobile disease diagnostics</article-title>. <source>arXiv</source>.
</mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Karimireddy</surname> <given-names>S.</given-names></name>
<name><surname>Kale</surname> <given-names>S.</given-names></name>
<name><surname>Mohri</surname> <given-names>M.</given-names></name>
<name><surname>Reddi</surname> <given-names>S.</given-names></name>
<name><surname>Stich</surname> <given-names>S.</given-names></name>
<name><surname>Suresh</surname> <given-names>A. T.</given-names></name>
</person-group> (<year>2020</year>). 
<article-title>Scaffold: Stochastic controlled averaging for federated learning</article-title>. <source>Int. Conf. Mach. Learn.</source>, <fpage>5132</fpage>&#x2013;<lpage>5143</lpage>.
</mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Li</surname> <given-names>Q.</given-names></name>
<name><surname>He</surname> <given-names>B.</given-names></name>
<name><surname>Song</surname> <given-names>D.</given-names></name>
</person-group> (<year>2021</year>). 
<article-title>Moon: Model-contrastive federated learning</article-title>. <source>Int. Conf. Learn. Represent.</source>
</mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Li</surname> <given-names>T.</given-names></name>
<name><surname>Sahu</surname> <given-names>A.</given-names></name>
<name><surname>Zaheer</surname> <given-names>M.</given-names></name>
<name><surname>Sanjabi</surname> <given-names>M.</given-names></name>
<name><surname>Talwalkar</surname> <given-names>A.</given-names></name>
<name><surname>Smith</surname> <given-names>V.</given-names></name>
</person-group> (<year>2020</year>). 
<article-title>Federated optimization in heterogeneous networks</article-title>. <source>Proc. Mach. Learn. Syst.</source> <volume>2</volume>, <fpage>429</fpage>&#x2013;<lpage>446</lpage>.
</mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Luo</surname> <given-names>Q.</given-names></name>
<name><surname>Lan</surname> <given-names>C.</given-names></name>
<name><surname>Yu</surname> <given-names>T.</given-names></name>
<name><surname>Liang</surname> <given-names>M.</given-names></name>
<name><surname>Xiao</surname> <given-names>W.</given-names></name>
<name><surname>Pan</surname> <given-names>Z.</given-names></name>
</person-group> (<year>2025</year>). 
<article-title>Federated learning-based non-intrusive load monitoring adaptive to real-world heterogeneities</article-title>. <source>Sci. Rep.</source> <volume>15</volume>, <fpage>18223</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-025-02752-y</pub-id>, PMID: <pub-id pub-id-type="pmid">40415054</pub-id>
</mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>McMahan</surname> <given-names>B.</given-names></name>
<name><surname>Moore</surname> <given-names>E.</given-names></name>
<name><surname>Ramage</surname> <given-names>D.</given-names></name>
<name><surname>Hampson</surname> <given-names>S.</given-names></name>
<name><surname>Arcas</surname> <given-names>B.</given-names></name>
</person-group> (<year>2017</year>). 
<article-title>Communication-efficient learning of deep networks from decentralized data</article-title>. <source>Artif. Intell. Stat.</source> <volume>54</volume>, <fpage>1273</fpage>&#x2013;<lpage>1282</lpage>. Available online at: <uri xlink:href="https://proceedings.mlr.press/v54/mcmahan17a.html">https://proceedings.mlr.press/v54/mcmahan17a.html</uri> (Accessed <date-in-citation content-type="access-date">January 10, 2026</date-in-citation>). 
</mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Mensah</surname> <given-names>P. K.</given-names></name>
<name><surname>E.</surname> <given-names>A.</given-names></name>
</person-group> (<year>2023</year>). 
<article-title>Ccmt: Dataset for crop pest and disease detection</article-title>. <source>Data Brief</source> <volume>49</volume>, <elocation-id>109306</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.dib.2023.109306</pub-id>, PMID: <pub-id pub-id-type="pmid">37360671</pub-id>
</mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Nishankar</surname> <given-names>S.</given-names></name>
<name><surname>Pavindran</surname> <given-names>V.</given-names></name>
<name><surname>Mithuran</surname> <given-names>T.</given-names></name>
</person-group> (<year>2025</year>). 
<article-title>Vit-rot: Vision transformer-based robust framework for tomato leaf disease recognition</article-title>. <source>AgriEngineering</source>. <volume>7</volume>, <fpage>185</fpage>. Available online at: <uri xlink:href="https://api.semanticscholar.org/CorpusID:279270669">https://api.semanticscholar.org/CorpusID:279270669</uri>.
</mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Piccialli</surname> <given-names>F.</given-names></name>
<name><surname>Della Bruna</surname> <given-names>C.</given-names></name>
<name><surname>Chiaro</surname> <given-names>D.</given-names></name>
<name><surname>Qi</surname> <given-names>P.</given-names></name>
<name><surname>Savoia</surname> <given-names>M.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>Agrifold: A lightweight cnn-based federated learning framework for agricultural applications</article-title>. <source>IEEE Access</source> <volume>10</volume>, <fpage>123456</fpage>&#x2013;<lpage>123467</lpage>.
</mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Radford</surname> <given-names>K. J. W. H. C. R. A. G. G. A. S</given-names></name>
</person-group> (<year>2024</year>). &#x201c;
<article-title>Learning transferable visual models from natural language supervision</article-title>,&#x201d; in <conf-name>In International conference on machine learning</conf-name>. <fpage>8748</fpage>&#x2013;<lpage>8763</lpage> (
<publisher-name>PmLR</publisher-name>), S. I., A.
</mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Rieyan</surname> <given-names>I.</given-names></name>
<name><surname>News</surname> <given-names>M. R. K.</given-names></name>
<name><surname>Rahman</surname> <given-names>A. B. M. M.</given-names></name>
<name><surname>Khan</surname> <given-names>S. A.</given-names></name>
<name><surname>Zaarif</surname> <given-names>S. T. J.</given-names></name>
<name><surname>Alam</surname> <given-names>Md. G. R.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>Secure medical image analysis using distributed data fabric and homomorphic encryption</article-title>. <source>Future Gener. Comput. Syst.</source> <volume>146</volume>, <fpage>234</fpage>&#x2013;<lpage>249</lpage>.
</mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>H.</given-names></name>
<name><surname>Wang</surname> <given-names>L.</given-names></name>
<name><surname>Xu</surname> <given-names>X.</given-names></name>
<name><surname>Zou</surname> <given-names>K.</given-names></name>
<name><surname>Qian</surname> <given-names>Y.</given-names></name>
<name><surname>Goh</surname> <given-names>R. S. M.</given-names></name>
<etal/>
</person-group>. (<year>2023</year>). 
<article-title>Feduaa: Uncertainty-aware aggregation for federated learning</article-title>. <source>IEEE Trans. Neural Networks Learn. Syst.</source> <volume>34</volume>, <fpage>2145</fpage>&#x2013;<lpage>2158</lpage>.
</mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>A.</given-names></name>
<name><surname>Chen</surname> <given-names>H.</given-names></name>
<name><surname>Lin</surname> <given-names>Z.</given-names></name>
<name><surname>Han</surname> <given-names>J.</given-names></name>
<name><surname>Ding</surname> <given-names>G.</given-names></name>
</person-group> (<year>2025</year>b). 
<article-title>Lsnet: Large-small network for efficient image classification</article-title>. <source>arXiv</source>. (<publisher-loc>Nashville, TN, USA</publisher-loc>: 
<publisher-name>IEEE</publisher-name>) <fpage>9718</fpage>&#x2013;<lpage>9729</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/CVPR52734.2025.00908</pub-id> 
</mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>Z.</given-names></name>
<name><surname>Wu</surname> <given-names>M.</given-names></name>
<name><surname>Liao</surname> <given-names>Q.</given-names></name>
<name><surname>Wang</surname> <given-names>Y.</given-names></name>
<name><surname>Sui</surname> <given-names>Y.</given-names></name>
<name><surname>Gong</surname> <given-names>C.</given-names></name>
</person-group> (<year>2025</year>a). 
<article-title>Current status and future trends of eco-friendly management of postharvest fungal decays in tomato fruit</article-title>. <source>NPJ Sci. Food</source> <volume>9</volume>, <fpage>104</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41538-025-00477-w</pub-id>, PMID: <pub-id pub-id-type="pmid">40533482</pub-id>
</mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wu</surname> <given-names>J.</given-names></name>
<name><surname>Zhang</surname> <given-names>Y.</given-names></name>
<name><surname>Li</surname> <given-names>F.</given-names></name>
<name><surname>Zhang</surname> <given-names>M.</given-names></name>
<name><surname>Zhang</surname> <given-names>Y.</given-names></name>
<name><surname>Chen</surname> <given-names>Z.</given-names></name>
<etal/>
</person-group>. (<year>2023</year>). 
<article-title>Plant virology in the 21st century in China: Recent advances and future directions</article-title>. <source>J. Integr. Plant Biol.</source> <volume>66</volume>, <fpage>579</fpage>&#x2013;<lpage>622</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/jipb.13580</pub-id>, PMID: <pub-id pub-id-type="pmid">37924266</pub-id>
</mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Yurdem</surname> <given-names>B.</given-names></name>
<name><surname>Kuzlu</surname> <given-names>M.</given-names></name>
<name><surname>Gullu</surname> <given-names>M. K.</given-names></name>
<name><surname>Catak</surname> <given-names>F. O.</given-names></name>
<name><surname>Tabassum</surname> <given-names>M.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Federated learning: Overview, strategies, applications, tools and future directions</article-title>. <source>Heliyon</source> <volume>10</volume>, <elocation-id>e38137</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.heliyon.2024.e38137</pub-id>, PMID: <pub-id pub-id-type="pmid">39391509</pub-id>
</mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhang</surname> <given-names>X.</given-names></name>
<name><surname>Mao</surname> <given-names>Y.</given-names></name>
<name><surname>Yang</surname> <given-names>Q.</given-names></name>
<name><surname>Zhang</surname> <given-names>X</given-names></name>
</person-group>. (<year>2024</year>). 
<article-title>Se-sk-capresnet: Fusing capsule networks with residual networks for plant disease classification</article-title>. <source>Plant Methods</source> <volume>20</volume>, <elocation-id>156</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s13007-024-01156-8</pub-id>
</mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhao</surname> <given-names>J.</given-names></name>
<name><surname>Huang</surname> <given-names>C.</given-names></name>
<name><surname>Wang</surname> <given-names>W.</given-names></name>
<name><surname>Xie</surname> <given-names>R.</given-names></name>
<name><surname>Dong</surname> <given-names>R.</given-names></name>
<name><surname>Matwin</surname> <given-names>S.</given-names></name>
</person-group> (<year>2023</year>). 
<article-title>Hefl-ldp: Hybrid encryption with local differential privacy for federated learning</article-title>. <source>Inf. Sci.</source> <volume>621</volume>, <fpage>268</fpage>&#x2013;<lpage>285</lpage>.
</mixed-citation>
</ref>
<ref id="B28">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhao</surname> <given-names>X.</given-names></name>
<name><surname>Wang</surname> <given-names>L.</given-names></name>
<name><surname>Zhang</surname> <given-names>Y.</given-names></name>
<name><surname>Han</surname> <given-names>X.</given-names></name>
<name><surname>Deveci</surname> <given-names>M.</given-names></name>
<name><surname>Parmar</surname> <given-names>M.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>A review of convolutional neural networks in computer vision</article-title>. <source>Artif. Intell. Rev.</source> <volume>57</volume>, <fpage>99</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s10462-024-10721-6</pub-id>
</mixed-citation>
</ref>
<ref id="B29">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhou</surname> <given-names>C.</given-names></name>
<name><surname>Zhou</surname> <given-names>S.</given-names></name>
<name><surname>Xing</surname> <given-names>J.</given-names></name>
<name><surname>Song</surname> <given-names>J.</given-names></name>
</person-group> (<year>2021</year>). 
<article-title>Recombined residual dense network for tomato disease classification</article-title>. <source>Neurocomputing</source> <volume>452</volume>, <fpage>762</fpage>&#x2013;<lpage>773</lpage>.
</mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn id="n1" fn-type="custom" custom-type="edited-by">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2228904">Changcai Yang</ext-link>, Fujian Agriculture and Forestry University, China</p></fn>
<fn id="n2" fn-type="custom" custom-type="reviewed-by">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1570177">Changji Wen</ext-link>, Jilin Agricultural University, China</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3345248">Qiufeng Chen</ext-link>, Fujian Agriculture and Forestry University, China</p></fn>
</fn-group>
</back>
</article>