<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Plant Sci.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Plant Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Plant Sci.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">1664-462X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpls.2026.1747863</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Real-time on-device weed identification using a hardware-efficient lightweight CNN</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name><surname>Zhang</surname><given-names>Yuxuan</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>*</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3003690/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Lu</surname><given-names>Yuchen</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Martinez-Rau</surname><given-names>Luciano Sebastian</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2750602/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Qiu</surname><given-names>Quan</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>*</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/384127/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Bader</surname><given-names>Sebastian</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/1135666/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>College of Intelligent Science and Engineering, Beijing University of Agriculture</institution>, <city>Beijing</city>,&#xa0;<country country="cn">China</country></aff>
<aff id="aff2"><label>2</label><institution>Department of Computer and Electrical Engineering, Mid Sweden University</institution>, <city>Sundsvall</city>,&#xa0;<country country="se">Sweden</country></aff>
<aff id="aff3"><label>3</label><institution>Yantai Research Institute, Harbin Engineering University</institution>, <city>Yantai</city>,&#xa0;<country country="cn">China</country></aff>
<aff id="aff4"><label>4</label><institution>Instituto de Investigaci&#xf3;n en Se&#xf1;ales, Sistemas e Inteligencia Computacional, sinc(i), FICH-UNL/CONICET</institution>, <city>Santa Fe</city>,&#xa0;<country country="ar">Argentina</country></aff>
<author-notes>
<corresp id="c001"><label>*</label>Correspondence: Yuxuan Zhang, <email xlink:href="mailto:yuxuan.zhang@miun.se">yuxuan.zhang@miun.se</email>; Quan Qiu, <email xlink:href="mailto:qiuquan0110@ustc.edu">qiuquan0110@ustc.edu</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-16">
<day>16</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>17</volume>
<elocation-id>1747863</elocation-id>
<history>
<date date-type="received">
<day>17</day>
<month>11</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>29</day>
<month>01</month>
<year>2026</year>
</date>
<date date-type="rev-recd">
<day>29</day>
<month>01</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2026 Zhang, Lu, Martinez-Rau, Qiu and Bader.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Zhang, Lu, Martinez-Rau, Qiu and Bader</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-16">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<p>Accurate and timely weed identification is fundamental to sustainable crop management, particularly for autonomous agricultural systems operating under strict energy and hardware constraints. While deep learning has significantly advanced image-based weed recognition, most existing models rely on GPU-based inference and therefore cannot be deployed directly in low-power field devices. In this study, we propose a hardware-efficient lightweight convolutional neural network (CNN), named TinyWeedNet, designed specifically for real-time on-device weed identification in precision agriculture. The model integrates multi-scale feature extraction, depthwise separable inverted residual blocks, and compact channel attention to enhance discriminative ability while maintaining a minimal computational footprint. To evaluate its suitability for field deployment, TinyWeedNet was trained and tested on the public DeepWeeds dataset and implemented on an STM32H7 microcontroller via the TinyML workflow. Experimental results demonstrate that the model achieves 97.26% classification accuracy with only 0.48 M parameters, supporting sub-90 ms inference and low energy consumption during fully embedded execution. A comprehensive analysis, including benchmark comparisons, hyperparameter sensitivity tests, and ablation studies, demonstrates that TinyWeedNet provides a good balance of accuracy, speed, and energy efficiency for resource-constrained agricultural platforms. Overall, this work demonstrates a practical pathway for integrating real-time, low-power weed identification into field robots, UAVs, and distributed sensing nodes, contributing to more autonomous and energy-aware weed management strategies in precision agriculture.</p>
</abstract>
<kwd-group>
<kwd>embedded systems</kwd>
<kwd>energy-efficient computing</kwd>
<kwd>lightweight convolutional neural network (CNN)</kwd>
<kwd>on-device inference</kwd>
<kwd>precision agriculture</kwd>
<kwd>TinyML</kwd>
<kwd>weed identification</kwd>
</kwd-group>
<funding-group>
<award-group id="gs1">
<funding-source id="sp1">
<institution-wrap>
<institution>Beijing University of Agriculture</institution>
<institution-id institution-id-type="doi" vocab="open-funder-registry" vocab-identifier="10.13039/open_funder_registry">10.13039/501100003352</institution-id>
</institution-wrap>
</funding-source>
<award-id rid="sp1">QJKC-2025020</award-id>
</award-group>
<award-group id="gs2">
<funding-source id="sp2">
<institution-wrap>
<institution>Stiftelsen f&#xf6;r Kunskaps- och Kompetensutveckling</institution>
<institution-id institution-id-type="doi" vocab="open-funder-registry" vocab-identifier="10.13039/open_funder_registry">10.13039/501100003170</institution-id>
</institution-wrap>
</funding-source>
<award-id rid="sp2">20180170, 20240029-H-02</award-id>
</award-group>
<funding-statement>The author(s) declared that financial support was received for&#xa0;this work and/or its publication. This research was financially&#xa0;supported by the Beijing University of Agriculture&#x2019;s Young Faculty&#xa0;Scientific Research and Innovation Capacity&#xa0;Enhancement Program QJKC-2025020 and Sweden Knowledge&#xa0;Foundation grants 20180170 (NIIT) &amp; 20240029-H-02 (TransTech2Horizon).</funding-statement>
</funding-group>
<counts>
<fig-count count="5"/>
<table-count count="6"/>
<equation-count count="22"/>
<ref-count count="38"/>
<page-count count="16"/>
<word-count count="10129"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Technical Advances in Plant Science</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>Agricultural weed infestation poses a persistent threat to global crop production, competing with crops for nutrients, water, and sunlight while increasing reliance on herbicides <xref ref-type="bibr" rid="B6">Dadashzadeh et&#xa0;al. (2024)</xref>; <xref ref-type="bibr" rid="B24">Sathesh et&#xa0;al. (2025b)</xref>; <xref ref-type="bibr" rid="B13">Jim&#xe9;nez et&#xa0;al. (2024)</xref>; <xref ref-type="bibr" rid="B21">Sathesh and Maheswaran (2024)</xref>. Effective weed management is therefore essential for improving yield stability and supporting sustainable agricultural practices <xref ref-type="bibr" rid="B23">Sathesh et&#xa0;al. (2025a)</xref>; <xref ref-type="bibr" rid="B19">Parvis and Pirola (1999)</xref>; <xref ref-type="bibr" rid="B2">Anand et&#xa0;al. (2021)</xref>; <xref ref-type="bibr" rid="B9">Holmes et&#xa0;al. (2019)</xref>. Traditional methods, such as uniform herbicide spraying and manual removal, are increasingly unsuitable due to ecological risks, labor intensity, and rising operational costs. Consequently, real-time and accurate weed identification has become a central requirement for autonomous field robots, UAVs, and smart farming systems that aim to implement precise and environmentally responsible weed control <xref ref-type="bibr" rid="B15">Lottes et&#xa0;al. (2017)</xref>; <xref ref-type="bibr" rid="B32">Wu et&#xa0;al. (2020)</xref>.</p>
<p>Recent advances in agricultural computer vision have leveraged deep learning (DL) techniques to achieve high weed&#x2013;crop discrimination accuracy across diverse field conditions <xref ref-type="bibr" rid="B22">Sathesh et&#xa0;al. (2024)</xref>; <xref ref-type="bibr" rid="B30">Upadhyay et&#xa0;al. (2025)</xref>; <xref ref-type="bibr" rid="B16">Luo et&#xa0;al. (2023)</xref>; <xref ref-type="bibr" rid="B12">Ishak et&#xa0;al. (2009)</xref>; <xref ref-type="bibr" rid="B5">Czymmek et&#xa0;al. (2019)</xref>; <xref ref-type="bibr" rid="B10">Hussmann et&#xa0;al. (2019)</xref>; <xref ref-type="bibr" rid="B17">Maheswaran et&#xa0;al. (2024)</xref>. High-capacity neural models such as ResNet, Inception, and transformer-based architectures perform well on benchmark datasets including DeepWeeds <xref ref-type="bibr" rid="B18">Olsen et&#xa0;al. (2019)</xref>. For example, Sunil et&#xa0;al. employed VGG16 to classify four weed species (crabgrass, kochia, ragweed, and waterhemp) and six crop species (black bean, rapeseed, maize, flax, soybean, and sugar beet), achieving an accuracy of 93&#x2013;97.5% <xref ref-type="bibr" rid="B26">Sunil et&#xa0;al. (2022)</xref>. Liu et&#xa0;al. achieved a spraying completion rate of 93% for target weeds, including Senecio vulgaris and Capparis spinosa, in strawberry fields, thereby reducing pesticide use and associated environmental risks <xref ref-type="bibr" rid="B14">Liu et&#xa0;al. (2021)</xref>. Garibaldi et&#xa0;al. reported that the Xception model achieved accuracies ranging from 83% to 97% for early-stage weed classification in maize fields under natural illumination conditions <xref ref-type="bibr" rid="B7">Garibaldi-M&#xe1;rquez et&#xa0;al. (2022)</xref>. However, their heavy computational and memory demands make them impractical for low-power devices that operate autonomously in the field. While lightweight CNNs (such as MobileNet, SqueezeNet, and ShuffleNet <xref ref-type="bibr" rid="B20">Sandler et&#xa0;al. (2018)</xref>; <xref ref-type="bibr" rid="B11">Iandola et&#xa0;al. (2016)</xref>; <xref ref-type="bibr" rid="B28">Tan and Le (2019)</xref>) reduce model complexity, most still exceed the strict limitations of microcontroller units (MCUs), which typically offer only a few hundred kilobytes of RAM and a limited energy budget. As a result, many current agricultural DL systems rely on cloud-based or offline processing <xref ref-type="bibr" rid="B38">Zhang et&#xa0;al. (2025c)</xref>; <xref ref-type="bibr" rid="B31">Wiafe et&#xa0;al. (2025)</xref>, limiting their ability to make immediate in-field decisions.</p>
<p>Tiny Machine Learning (TinyML) has recently emerged as a promising paradigm for bringing deep neural inference directly to small, energy-constrained devices <xref ref-type="bibr" rid="B34">Zhang (2025)</xref>; <xref ref-type="bibr" rid="B35">Zhang et&#xa0;al. (2023</xref>, <xref ref-type="bibr" rid="B37">Zhang et&#xa0;al., 2025b)</xref>. For example, Alshuhail et&#xa0;al. proposed a TinyML-based structural anomaly detection framework, achieving an anomaly detection accuracy of up to 92% while reducing energy consumption by 40% <xref ref-type="bibr" rid="B1">Alshuhail et&#xa0;al. (2025)</xref>. Zeynali et&#xa0;al. applied TinyML to non-invasive blood glucose monitoring based on PPG signals; their method achieved 76.6% accuracy in Zone A and 23.4% in Zone B of the Clarke Error Grid Analysis (CEGA), indicating 100% clinical acceptability <xref ref-type="bibr" rid="B33">Zeynali et&#xa0;al. (2025)</xref>. Asante et&#xa0;al. introduced TinyEEGConformer, which achieved an average accuracy of 79.63% on the BCI Competition IV 2a dataset, outperforming the baseline EEG Conformer model by 0.97% while reducing the number of parameters by a factor of 21 <xref ref-type="bibr" rid="B3">Asante et&#xa0;al. (2025)</xref>. By executing models entirely on MCUs, TinyML enables low-latency, always-available, and energy-efficient weed detection without depending on external computation or network connectivity <xref ref-type="bibr" rid="B36">Zhang et&#xa0;al. (2025a)</xref>. However, deploying CNNs in such constrained environments remains challenging. The model must provide reliable recognition of weeds with diverse morphological traits and under varying illumination, while simultaneously adhering to strict limits on memory footprint, computational cost, and energy consumption. Achieving this balance requires hardware-aware architectural designs rather than directly scaling down conventional CNNs.</p>
<p>To address these limitations, we propose TinyWeedNet, a hardware-efficient lightweight CNN tailored for real-time on-device weed identification on STM32-class MCUs. The model integrates multi-scale feature extraction, depthwise separable inverted residual blocks, and compact channel attention to enhance discriminative capability while minimizing computational demands. In addition to model development, we also conducted a comprehensive evaluation of benchmark comparisons, hyperparameter sensitivity analysis, and ablation studies to systematically assess how architectural parameters affect accuracy, latency, and power consumption during embedded execution.</p>
<p>The major contributions of this work are as follows:</p>
<list list-type="bullet">
<list-item>
<p>Hardware-efficient Lightweight Architecture: We design TinyWeedNet, a multi-scale, attention-enhanced lightweight CNN optimized for deployment on low-power MCUs commonly used in field robotics and embedded agricultural systems.</p></list-item>
<list-item>
<p>Comprehensive Evaluation for Embedded Environments: We quantitatively analyze the trade-offs among accuracy, latency, memory footprint, and energy consumption through benchmark comparisons, hyperparameter sensitivity experiments, and ablation studies.</p></list-item>
<list-item>
<p>Real-world Embedded Validation: We implement TinyWeedNet on an STM32H7 microcontroller, achieving 97.26% accuracy on DeepWeeds with sub-90 ms inference time and approximately 39 mJ energy per inference, demonstrating its suitability for real-time field deployment.</p></list-item>
</list>
<p>Together, these contributions provide a practical and validated pathway for bringing real-time weed identification to resource-constrained agricultural platforms, offering new opportunities for intelligent, energy-aware, and autonomous weed management in precision agriculture.</p>
<p>The rest of the paper is structured as follows. Section 2 describes the datasets used in this study, the preprocessing procedures, the proposed TinyWeedNet architecture, the experimental design, the MCU deployment toolchain, as well as the configurations for hyperparameter analysis, ablation studies and robustness studies. Section 3 presents the comparative experimental results, the hyperparameter sensitivity analysis, the ablation study, and the robustness analysis outcomes. Finally, Section 4 concludes the&#xa0;paper and outlines directions for future work.</p>
</sec>
<sec id="s2">
<label>2</label>
<title>Methodology</title>
<sec id="s2_1">
<label>2.1</label>
<title>Dataset and preprocessing</title>
<p>The public DeepWeeds dataset comprises 17,509 RGB images representing eight Australian weed species along with a negative class consisting of non-target plants and background vegetation <xref ref-type="bibr" rid="B18">Olsen et&#xa0;al. (2019)</xref>. These images were collected from eight distinct regions across northern Australia between June 2017 and March 2018. Classification on this species is particularly challenging due to the geographic and seasonal variability of the plants, the complexity of background environments, and the wide dynamic range of illumination conditions.</p>
<p>The eight weed species are Chinee apple (<italic>Ziziphus mauritiana</italic>), Lantana (<italic>Lantana camara</italic>), Parkinsonia (<italic>Parkinsonia aculeata</italic>), Sida (<italic>Sida acuta</italic>), Rubber vine (<italic>Cryptostegia grandiflora</italic>), Prickly acacia (<italic>Vachellia nilotica</italic>), Parthenium (<italic>Parthenium hysterophorus</italic>), and Snake weed (<italic>Stachytarpheta jamaicensis</italic>). The high inter-class similarity and significant intra-class variability further complicate the classification task. All images were captured using a FLIR Blackfly 23S6C camera and uniformly resized to 256 &#xd7; 256 pixels in the original dataset.</p>
<p>For this study, all images were resized to a uniform resolution of 224 &#xd7; 224 &#xd7; 3 pixels using bilinear interpolation in order to match the input specification of the proposed TinyWeedNet model. This resolution was selected as a compromise between preserving sufficient visual details for weed discrimination and maintaining computational efficiency suitable for MCU deployment. Pixel intensity values were converted from 8-bit integer format to floating-point and normalized to the range [0, 1] by dividing by 255. No additional color space transformation, histogram equalization, or manual filtering was applied, so as to preserve the original visual characteristics of the field-acquired images. The dataset was split into training and testing subsets following an 8:2 ratio using stratified sampling, ensuring that samples from all weed species were proportionally represented in both sets. Representative examples of the weed species from the DeepWeeds dataset are shown in <xref ref-type="fig" rid="f1"><bold>Figure&#xa0;1</bold></xref>.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Weed image samples in the Deepweeds dataset <bold>(a)</bold> Chinese apple, <bold>(b)</bold> lantana, <bold>(c)</bold> parkinsonia, <bold>(d)</bold> parthenium, <bold>(e)</bold> prickly acacia, <bold>(f)</bold> rubber vine, <bold>(g)</bold> siam weed, <bold>(h)</bold> snake weed, and <bold>(i)</bold> negative class (non-target plants and background).</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1747863-g001.tif">
<alt-text content-type="machine-generated">Nine-panel figure displaying different wild plant species in outdoor environments, each labeled from a to i. Panels show various leaf shapes, colors, and ground textures in natural light, illustrating plant diversity.</alt-text>
</graphic></fig>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>TinyWeedNet architecture</title>
<p>This section presents the architecture of TinyWeedNet, a lightweight CNN designed for real-time weed classification on low-power MCUs. As illustrated in <xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2</bold></xref>, TinyWeedNet adopts a hierarchical design comprising four core modules: a compact <italic>Stem</italic> for early feature extraction, a <italic>Multi-Scale Convolution (MSC)</italic> block for receptive-field diversity, a stack of <italic>Inverted Residual (IR)</italic> blocks enhanced with <italic>Channel Attention (CA)</italic>, and a minimalist classification head. A central design principle of TinyWeedNet is to maximize computational efficiency under the strict memory and latency constraints of MCU-based platforms. To this end, all network components are constructed exclusively from operators natively supported by the STM32Cube.AI<xref ref-type="fn" rid="fn1"><sup>1</sup></xref> framework including standard, pointwise, and depthwise convolutions, batch normalization, ReLU activation, pooling, and linear layers. This ensures full hardware compatibility, facilitates integer quantization, and enables memory-aware code generation for seamless embedded deployment.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Architecture of TinyWeedNet showing the stem, multi-scale convolution (MSC) block with four parallel branches, inverted residual (IR) block with expansion ratio <italic>E</italic> = 4 and depthwise separable convolution, channel attention (CA) module with reduction ratio <italic>R</italic> = 8, and the classification head.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1747863-g002.tif">
<alt-text content-type="machine-generated">Deep learning model architecture diagram for image classification, starting with a 224 by 224 by 3 input through a stem block, followed by parallel convolutions and pooling in an MSC block, five IR blocks with channel attention, and a head with convolution, global average pooling, dropout, and dense layers, producing a nine-class output. Channel attention uses adaptive pooling, pointwise convolutions, and a sigmoid operation for feature refinement.</alt-text>
</graphic></fig>
<p>Throughout this section, we denote the input image as <inline-formula>
<mml:math display="inline" id="im1"><mml:mrow><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>X</mml:mi></mml:mstyle><mml:mtext>&#xa0;</mml:mtext><mml:mo>&#x2208;</mml:mo><mml:mtext>&#xa0;</mml:mtext><mml:msup><mml:mi>&#x211d;</mml:mi><mml:mrow><mml:mn>3</mml:mn><mml:mo>&#xd7;</mml:mo><mml:mi>H</mml:mi><mml:mo>&#xd7;</mml:mo><mml:mi>W</mml:mi></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula> with <inline-formula>
<mml:math display="inline" id="im2"><mml:mrow><mml:mi>H</mml:mi><mml:mo>=</mml:mo><mml:mi>W</mml:mi><mml:mo>=</mml:mo><mml:mn>224</mml:mn></mml:mrow></mml:math></inline-formula>, and a feature map at any stage as <inline-formula>
<mml:math display="inline" id="im3"><mml:mrow><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>F</mml:mi></mml:mstyle><mml:mtext>&#xa0;</mml:mtext><mml:mo>&#x2208;</mml:mo><mml:mtext>&#xa0;</mml:mtext><mml:msup><mml:mi>&#x211d;</mml:mi><mml:mrow><mml:mi>C</mml:mi><mml:mo>&#xd7;</mml:mo><mml:mi>H</mml:mi><mml:mo>&#xd7;</mml:mo><mml:mi>W</mml:mi></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>. We use <inline-formula>
<mml:math display="inline" id="im4"><mml:mrow><mml:msubsup><mml:mrow><mml:mtext>Conv&#xa0;</mml:mtext></mml:mrow><mml:mrow><mml:mi>k</mml:mi><mml:mo>&#xd7;</mml:mo><mml:mi>k</mml:mi></mml:mrow><mml:mi>s</mml:mi></mml:msubsup></mml:mrow></mml:math></inline-formula> and <inline-formula>
<mml:math display="inline" id="im5"><mml:mrow><mml:msubsup><mml:mrow><mml:mtext>DWConv&#xa0;</mml:mtext></mml:mrow><mml:mrow><mml:mi>k</mml:mi><mml:mo>&#xd7;</mml:mo><mml:mi>k</mml:mi></mml:mrow><mml:mi>s</mml:mi></mml:msubsup></mml:mrow></mml:math></inline-formula> to represent standard and depthwise convolutions with kernel size <inline-formula>
<mml:math display="inline" id="im6"><mml:mrow><mml:mi>k</mml:mi><mml:mo>&#xd7;</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:math></inline-formula> and stride <inline-formula>
<mml:math display="inline" id="im7"><mml:mi>s</mml:mi></mml:math></inline-formula>, respectively; <inline-formula>
<mml:math display="inline" id="im8"><mml:mrow><mml:mtext>BN&#xa0;</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:mo>&#xb7;</mml:mo><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> denotes batch normalization, <inline-formula>
<mml:math display="inline" id="im9"><mml:mrow><mml:mi>&#x3c3;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mo>&#xb7;</mml:mo><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> is the ReLU activation, and <inline-formula>
<mml:math display="inline" id="im10"><mml:mrow><mml:mtext>GAP&#xa0;</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:mo>&#xb7;</mml:mo><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> / <inline-formula>
<mml:math display="inline" id="im11"><mml:mrow><mml:mtext>GMP&#xa0;</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:mo>&#xb7;</mml:mo><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> represent global average and max pooling. These operations are consistently used across all modules described below. More details could be found in <xref ref-type="table" rid="T1"><bold>Table&#xa0;1</bold></xref>.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>TinyWeedNet layer schedule (input size 224&#xd7;224).</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Stage</th>
<th valign="middle" align="center">Output size</th>
<th valign="middle" align="center">Operator/Channels</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">Input</td>
<td valign="middle" align="center">224&#xd7;224</td>
<td valign="middle" align="center">RGB</td>
</tr>
<tr>
<td valign="middle" align="left">Stem</td>
<td valign="middle" align="center">112&#xd7;112</td>
<td valign="middle" align="center">Conv 3&#xd7;3, <italic>C</italic> = 24, <italic>s</italic> = 2</td>
</tr>
<tr>
<td valign="middle" align="left">MSC</td>
<td valign="middle" align="center">112&#xd7;112</td>
<td valign="middle" align="center">36 (concat of 1&#xd7;1, 3&#xd7;3, 5&#xd7;5, pool+1&#xd7;1)</td>
</tr>
<tr>
<td valign="middle" align="left">IR#1</td>
<td valign="middle" align="center">56&#xd7;56</td>
<td valign="middle" align="center">IR(<italic>E</italic> = 4, CA <italic>R</italic> = 8), 36&#x2192;48, <italic>s</italic> = 2</td>
</tr>
<tr>
<td valign="middle" align="left">IR#2</td>
<td valign="middle" align="center">28&#xd7;28</td>
<td valign="middle" align="center">IR(<italic>E</italic> = 4, CA <italic>R</italic> = 8), 48&#x2192;72, <italic>s</italic> = 2</td>
</tr>
<tr>
<td valign="middle" align="left">IR#3</td>
<td valign="middle" align="center">14&#xd7;14</td>
<td valign="middle" align="center">IR(<italic>E</italic> = 4, CA <italic>R</italic> = 8), 72&#x2192;96, <italic>s</italic> = 2</td>
</tr>
<tr>
<td valign="middle" align="left">IR#4</td>
<td valign="middle" align="center">7&#xd7;7</td>
<td valign="middle" align="center">IR(<italic>E</italic> = 4, CA <italic>R</italic> = 8), 96&#x2192;120, <italic>s</italic> = 2</td>
</tr>
<tr>
<td valign="middle" align="left">IR#5</td>
<td valign="middle" align="center">7&#xd7;7</td>
<td valign="middle" align="center">IR(<italic>E</italic> = 4, CA <italic>R</italic> = 8), 120&#x2192;120, <italic>s</italic> = 1</td>
</tr>
<tr>
<td valign="middle" align="left">Head</td>
<td valign="middle" align="center">1&#xd7;1</td>
<td valign="middle" align="center">Conv 1&#xd7;1: 120&#x2192;240; GAP; Dropout; FC(9)</td>
</tr>
</tbody>
</table>
</table-wrap>
<sec id="s2_2_1">
<label>2.2.1</label>
<title>Stem Block</title>
<p>The Stem Block (green section in <xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2</bold></xref>) serves as the entry point of TinyWeedNet, responsible for low-level feature extraction and spatial downsampling of the input image. It applies a single <inline-formula>
<mml:math display="inline" id="im12"><mml:mrow><mml:mn>3</mml:mn><mml:mo>&#xd7;</mml:mo><mml:mn>3</mml:mn></mml:mrow></mml:math></inline-formula> convolution layer with stride <inline-formula>
<mml:math display="inline" id="im13"><mml:mrow><mml:mi>s</mml:mi><mml:mo>=</mml:mo><mml:mn>2</mml:mn></mml:mrow></mml:math></inline-formula> and <inline-formula>
<mml:math display="inline" id="im14"><mml:mrow><mml:msub><mml:mi>C</mml:mi><mml:mn>0</mml:mn></mml:msub><mml:mo>=</mml:mo><mml:mn>24</mml:mn></mml:mrow></mml:math></inline-formula> output channels, followed by batch normalization and a ReLU activation in <xref ref-type="disp-formula" rid="eq1">Equation 1</xref>:</p>
<disp-formula id="eq1"><label>(1)</label>
<mml:math display="block" id="M1"><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>F</mml:mi></mml:mstyle><mml:mn>0</mml:mn></mml:msub><mml:mo>=</mml:mo><mml:mi>&#x3c3;</mml:mi><mml:mtext>&#xa0;</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext>BN</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mtext>Conv</mml:mtext></mml:mrow><mml:mrow><mml:mn>3</mml:mn><mml:mo>&#xd7;</mml:mo><mml:mn>3</mml:mn></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mo>=</mml:mo><mml:mn>2</mml:mn></mml:mrow></mml:msubsup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>X</mml:mi></mml:mstyle><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mo>&#xa0;</mml:mo><mml:mo>&#xa0;</mml:mo><mml:msub><mml:mi>F</mml:mi><mml:mn>0</mml:mn></mml:msub><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mi>&#x211d;</mml:mi><mml:mrow><mml:mn>24</mml:mn><mml:mo>&#xd7;</mml:mo><mml:mn>112</mml:mn><mml:mo>&#xd7;</mml:mo><mml:mn>112</mml:mn></mml:mrow></mml:msup><mml:mo>.</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>This operation reduces the spatial resolution by a factor of two. The relatively narrow channel width (<italic>C</italic><sub>0</sub> = 24) ensures a lightweight feature representation that minimizes computation in subsequent layers while maintaining sufficient expressive capacity for early-stage encoding. Such an compact stem design is particularly beneficial for TinyML deployment, as it balances representational richness and on-chip efficiency, enabling fast inference on low-power MCUs.</p>
</sec>
<sec id="s2_2_2">
<label>2.2.2</label>
<title>Multi-Scale Convolution block</title>
<p>The MSC block (<xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2</bold></xref> blue part) aims to enhance receptive-field diversity while maintaining computational efficiency. Natural scenes in agricultural environments often exhibit significant intra-class variations and complex backgrounds&#x2014;e.g., overlapping leaves, varying weed sizes, and inconsistent lighting. To effectively handle these spatial complexities, the MSC employs four parallel convolutional branches with distinct receptive fields and a pooling pathway to capture features across multiple spatial scales.</p>
<p>Given the input feature map <inline-formula>
<mml:math display="inline" id="im15"><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>F</mml:mi></mml:mstyle><mml:mn>0</mml:mn></mml:msub><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mi>&#x211d;</mml:mi><mml:mrow><mml:mn>24</mml:mn><mml:mo>&#xd7;</mml:mo><mml:mn>112</mml:mn><mml:mo>&#xd7;</mml:mo><mml:mn>112</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula> from the Stem block, the MSC block processes it through four concurrent branches shown in <xref ref-type="disp-formula" rid="eq2">Equations 2</xref>&#x2013;<xref ref-type="disp-formula" rid="eq5">5</xref>:</p>
<disp-formula id="eq2"><label>(2)</label>
<mml:math display="block" id="M2"><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>B</mml:mi></mml:mstyle><mml:mn>1</mml:mn></mml:msub><mml:mo>=</mml:mo><mml:mi>&#x3c3;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext>BN</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mtext>Conv</mml:mtext></mml:mrow><mml:mrow><mml:mn>1</mml:mn><mml:mo>&#xd7;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>F</mml:mi></mml:mstyle><mml:mn>0</mml:mn></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<disp-formula id="eq3"><label>(3)</label>
<mml:math display="block" id="M3"><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>B</mml:mi></mml:mstyle><mml:mn>2</mml:mn></mml:msub><mml:mo>=</mml:mo><mml:mi>&#x3c3;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext>BN</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mtext>Conv</mml:mtext></mml:mrow><mml:mrow><mml:mn>3</mml:mn><mml:mo>&#xd7;</mml:mo><mml:mn>3</mml:mn></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msubsup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>F</mml:mi></mml:mstyle><mml:mn>0</mml:mn></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<disp-formula id="eq4"><label>(4)</label>
<mml:math display="block" id="M4"><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>B</mml:mi></mml:mstyle><mml:mn>3</mml:mn></mml:msub><mml:mo>=</mml:mo><mml:mi>&#x3c3;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext>BN</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mtext>Conv</mml:mtext></mml:mrow><mml:mrow><mml:mn>5</mml:mn><mml:mo>&#xd7;</mml:mo><mml:mn>5</mml:mn></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msubsup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>F</mml:mi></mml:mstyle><mml:mn>0</mml:mn></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<disp-formula id="eq5"><label>(5)</label>
<mml:math display="block" id="M5"><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>B</mml:mi></mml:mstyle><mml:mn>4</mml:mn></mml:msub><mml:mo>=</mml:mo><mml:mi>&#x3c3;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext>BN</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mtext>Conv</mml:mtext></mml:mrow><mml:mrow><mml:mn>1</mml:mn><mml:mo>&#xd7;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mtext>MaxPool</mml:mtext></mml:mrow><mml:mrow><mml:mn>3</mml:mn><mml:mo>&#xd7;</mml:mo><mml:mn>3</mml:mn></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msubsup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>F</mml:mi></mml:mstyle><mml:mn>0</mml:mn></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>.</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>Here, the first branch with 1 &#xd7; 1 kernels performs local feature compaction and inter-channel mixing, the 3 &#xd7; 3 and 5 &#xd7; 5 branches extract mid- and large-scale spatial structures, and the fourth branch incorporates a 3&#xd7;3 max-pooling operation to capture contextual cues such as background contrast before projection via a 1 &#xd7; 1 convolution. All convolutional outputs are batch-normalized and activated using ReLU to ensure stable training and non-linear feature transformation.</p>
<p>Finally, the outputs of the four branches are concatenated along the channel dimension as follow in <xref ref-type="disp-formula" rid="eq6">Equation 6</xref>:</p>
<disp-formula id="eq6"><label>(6)</label>
<mml:math display="block" id="M6"><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>F</mml:mi></mml:mstyle><mml:mrow><mml:mtext>msc</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mtext>Concat</mml:mtext><mml:mrow><mml:mo stretchy="false">[</mml:mo><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>B</mml:mi></mml:mstyle><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>B</mml:mi></mml:mstyle><mml:mn>2</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>B</mml:mi></mml:mstyle><mml:mn>3</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>B</mml:mi></mml:mstyle><mml:mn>4</mml:mn></mml:msub></mml:mrow><mml:mo stretchy="false">]</mml:mo></mml:mrow><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mi>&#x211d;</mml:mi><mml:mrow><mml:mn>36</mml:mn><mml:mo>&#xd7;</mml:mo><mml:mn>112</mml:mn><mml:mo>&#xd7;</mml:mo><mml:mn>112</mml:mn></mml:mrow></mml:msup><mml:mo>.</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>To maintain parameter uniformity across hardware deployments, the total output channels are fixed at 36, with each branch contributing roughly one quarter of the total (as determined using the _make_divisible function in the implementation). This design allows the MSC to capture both fine-grained edge patterns and coarse-scale weed morphology while preserving the same spatial resolution as the Stem output. These kernel scales were empirically selected to capture the diverse weed morphologies ranging from fine leaf veins to broad canopy textures. By aggregating multi-scale features early in the network, the subsequent layers can focus on high-level abstraction without sacrificing low-level texture fidelity&#x2014;an essential property for visual tasks executed on resource-constrained MCUs.</p>
</sec>
<sec id="s2_2_3">
<label>2.2.3</label>
<title>Inverted Residual block with channel attention</title>
<p>The Inverted Residual block constitutes the core feature transformation unit of TinyWeedNet, combining efficient depthwise separable convolutions with lightweight channel attention to achieve trade-off between high accuracy and efficiency (<xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2</bold></xref> orange part). Each IR block adopts an &#x201c;expand&#x2013;depthwise&#x2013;project&#x201d; pattern, which was first introduced in MobileNetV2 and later optimized for edge deployment <xref ref-type="bibr" rid="B20">Sandler et&#xa0;al. (2018)</xref>. Given an input feature map <inline-formula>
<mml:math display="inline" id="im16"><mml:mrow><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>U</mml:mi></mml:mstyle><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mi>&#x211d;</mml:mi><mml:mrow><mml:msub><mml:mi>C</mml:mi><mml:mrow><mml:mtext>in</mml:mtext></mml:mrow></mml:msub><mml:mo>&#xd7;</mml:mo><mml:mi>H</mml:mi><mml:mo>&#xd7;</mml:mo><mml:mi>W</mml:mi></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>, the IR block expands the channel dimension by a factor of <inline-formula>
<mml:math display="inline" id="im17"><mml:mrow><mml:mtext>E</mml:mtext><mml:mo>=</mml:mo><mml:mn>4</mml:mn></mml:mrow></mml:math></inline-formula>, applies spatial filtering through a depthwise convolution, recalibrates the resulting features via a Channel Attention mechanism, and finally projects the output back to the target dimension <inline-formula>
<mml:math display="inline" id="im18"><mml:mrow><mml:msub><mml:mi>C</mml:mi><mml:mrow><mml:mtext>out</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>. The value of <inline-formula>
<mml:math display="inline" id="im19"><mml:mtext>E</mml:mtext></mml:math></inline-formula> is determined by the hyperparameter sensitivity analysis discussed later. Residual connections are included when the input and output tensors share identical shapes (<inline-formula>
<mml:math display="inline" id="im20"><mml:mrow><mml:mi>s</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:math></inline-formula> and <inline-formula>
<mml:math display="inline" id="im21"><mml:mrow><mml:msub><mml:mi>C</mml:mi><mml:mrow><mml:mtext>in</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>C</mml:mi><mml:mrow><mml:mtext>out</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>) to facilitate gradient propagation.</p>
<p>The complete transformation can be expressed as in <xref ref-type="disp-formula" rid="eq7">Equations&#xa0;7</xref>&#x2013;<xref ref-type="disp-formula" rid="eq10">10</xref>:</p>
<disp-formula id="eq7"><label>(7)</label>
<mml:math display="block" id="M7"><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>Z</mml:mi></mml:mstyle><mml:mi>e</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mi>&#x3c3;</mml:mi><mml:mtext>&#xa0;</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext>BN</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mtext>Conv</mml:mtext></mml:mrow><mml:mrow><mml:mn>1</mml:mn><mml:mo>&#xd7;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>U</mml:mi></mml:mstyle><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<disp-formula id="eq8"><label>(8)</label>
<mml:math display="block" id="M8"><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>Z</mml:mi></mml:mstyle><mml:mi>d</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mi>&#x3c3;</mml:mi><mml:mtext>&#xa0;</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext>BN</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mtext>DWConv</mml:mtext></mml:mrow><mml:mrow><mml:mn>3</mml:mn><mml:mo>&#xd7;</mml:mo><mml:mn>3</mml:mn></mml:mrow><mml:mi>s</mml:mi></mml:msubsup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>Z</mml:mi></mml:mstyle><mml:mi>e</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<disp-formula id="eq9"><label>(9)</label>
<mml:math display="block" id="M9"><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>Z</mml:mi></mml:mstyle><mml:mi>a</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mtext>CA</mml:mtext></mml:mrow><mml:mrow><mml:mi>R</mml:mi><mml:mo>=</mml:mo><mml:mn>8</mml:mn></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>Z</mml:mi></mml:mstyle><mml:mi>d</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<disp-formula id="eq10"><label>(10)</label>
<mml:math display="block" id="M10"><mml:mrow><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>V</mml:mi></mml:mstyle><mml:mo>=</mml:mo><mml:mtext>BN</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mtext>Conv</mml:mtext></mml:mrow><mml:mrow><mml:mn>1</mml:mn><mml:mo>&#xd7;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>Z</mml:mi></mml:mstyle><mml:mi>a</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo stretchy="false">)</mml:mo><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im22"><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>Z</mml:mi></mml:mstyle><mml:mi>e</mml:mi></mml:msub><mml:mtext>&#xa0;</mml:mtext><mml:mo>&#x2208;</mml:mo><mml:mtext>&#xa0;</mml:mtext><mml:msup><mml:mi>&#x211d;</mml:mi><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>4</mml:mn><mml:msub><mml:mi>C</mml:mi><mml:mrow><mml:mtext>in</mml:mtext></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#xd7;</mml:mo><mml:mi>H</mml:mi><mml:mo>&#xd7;</mml:mo><mml:mi>W</mml:mi></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>, <inline-formula>
<mml:math display="inline" id="im23"><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>Z</mml:mi></mml:mstyle><mml:mi>d</mml:mi></mml:msub><mml:mtext>&#xa0;</mml:mtext><mml:mo>&#x2208;</mml:mo><mml:mtext>&#xa0;</mml:mtext><mml:msup><mml:mi>&#x211d;</mml:mi><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>4</mml:mn><mml:msub><mml:mi>C</mml:mi><mml:mrow><mml:mtext>in</mml:mtext></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#xd7;</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>H</mml:mi><mml:mo stretchy="false">/</mml:mo><mml:mi>s</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#xd7;</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>W</mml:mi><mml:mo stretchy="false">/</mml:mo><mml:mi>s</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>, <inline-formula>
<mml:math display="inline" id="im24"><mml:mrow><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>V</mml:mi></mml:mstyle><mml:mtext>&#xa0;</mml:mtext><mml:mo>&#x2208;</mml:mo><mml:mtext>&#xa0;</mml:mtext><mml:msup><mml:mi>&#x211d;</mml:mi><mml:mrow><mml:msub><mml:mi>C</mml:mi><mml:mrow><mml:mtext>out</mml:mtext></mml:mrow></mml:msub><mml:mo>&#xd7;</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>H</mml:mi><mml:mo stretchy="false">/</mml:mo><mml:mi>s</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#xd7;</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>W</mml:mi><mml:mo stretchy="false">/</mml:mo><mml:mi>s</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>.</p>
<p>When the stride <inline-formula>
<mml:math display="inline" id="im25"><mml:mrow><mml:mi>s</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:math></inline-formula> and the channel dimensions remain unchanged, a residual connection is added shown in <xref ref-type="disp-formula" rid="eq11">Equation 11</xref>:</p>
<disp-formula id="eq11"><label>(11)</label>
<mml:math display="block" id="M11"><mml:mrow><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>Y</mml:mi></mml:mstyle><mml:mo>=</mml:mo><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>U</mml:mi></mml:mstyle><mml:mo>+</mml:mo><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>V</mml:mi></mml:mstyle><mml:mo>.</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>This shortcut enhances gradient flow and stabilizes the optimization process, allowing deeper feature extraction without accuracy degradation. By operating primarily in the channel domain, the IR block minimizes the computational cost associated with conventional full convolutions, reducing the number of multiply&#x2013;accumulate operations while retaining representational richness.</p>
<p>To further improve feature discrimination, a lightweight CA (R = 8) mechanism is embedded after the depthwise stage in each IR block (<xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2</bold></xref> gray part). CA adaptively re-weights channel responses by modeling inter-channel dependencies using global pooling operations and a shared two-layer 1 &#xd7; 1 convolutional bottleneck. The value of R is determined by the hyperparameter sensitivity analysis discussed later. For an input feature tensor <inline-formula>
<mml:math display="inline" id="im26"><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>Z</mml:mi></mml:mstyle><mml:mi>d</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, the attention response is compress as in <xref ref-type="disp-formula" rid="eq12">Equations 12</xref> and <xref ref-type="disp-formula" rid="eq13">13</xref>:</p>
<disp-formula id="eq12"><label>(12)</label>
<mml:math display="block" id="M12"><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>q</mml:mi></mml:mstyle><mml:mrow><mml:mtext>avg</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mtext>GAP</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>Z</mml:mi></mml:mstyle><mml:mi>d</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mtext>&#x2003;</mml:mtext><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>q</mml:mi></mml:mstyle><mml:mrow><mml:mtext>max</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mtext>GMP</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>Z</mml:mi></mml:mstyle><mml:mi>d</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<disp-formula id="eq13"><label>(13)</label>
<mml:math display="block" id="M13"><mml:mrow><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>s</mml:mi></mml:mstyle><mml:mo>=</mml:mo><mml:msub><mml:mi>&#x3c3;</mml:mi><mml:mi>g</mml:mi></mml:msub><mml:mtext>&#xa0;</mml:mtext><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>W</mml:mi></mml:mstyle><mml:mn>2</mml:mn></mml:msub><mml:mtext>&#xa0;</mml:mtext><mml:mi>&#x3c3;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>W</mml:mi></mml:mstyle><mml:mn>1</mml:mn></mml:msub><mml:mtext>&#xa0;</mml:mtext><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>q</mml:mi></mml:mstyle><mml:mrow><mml:mtext>avg</mml:mtext></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>+</mml:mo><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>W</mml:mi></mml:mstyle><mml:mn>2</mml:mn></mml:msub><mml:mtext>&#xa0;</mml:mtext><mml:mi>&#x3c3;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>W</mml:mi></mml:mstyle><mml:mn>1</mml:mn></mml:msub><mml:mtext>&#xa0;</mml:mtext><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>q</mml:mi></mml:mstyle><mml:mrow><mml:mtext>max</mml:mtext></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im27"><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>W</mml:mi></mml:mstyle><mml:mn>1</mml:mn></mml:msub><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mi>&#x211d;</mml:mi><mml:mrow><mml:mfrac><mml:mi>C</mml:mi><mml:mi>R</mml:mi></mml:mfrac><mml:mo>&#xd7;</mml:mo><mml:mi>C</mml:mi></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula> and <inline-formula>
<mml:math display="inline" id="im28"><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>W</mml:mi></mml:mstyle><mml:mn>2</mml:mn></mml:msub><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mi>&#x211d;</mml:mi><mml:mrow><mml:mi>C</mml:mi><mml:mo>&#xd7;</mml:mo><mml:mfrac><mml:mi>C</mml:mi><mml:mi>R</mml:mi></mml:mfrac></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula> are <inline-formula>
<mml:math display="inline" id="im29"><mml:mrow><mml:mn>1</mml:mn><mml:mo>&#xd7;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:math></inline-formula> convolutional layers functioning as an efficient two-layer perceptron, <inline-formula>
<mml:math display="inline" id="im30"><mml:mrow><mml:mi>&#x3c3;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mo>&#xb7;</mml:mo><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> denotes ReLU, and <inline-formula>
<mml:math display="inline" id="im31"><mml:mrow><mml:msub><mml:mi>&#x3c3;</mml:mi><mml:mi>g</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mo>&#xb7;</mml:mo><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> is the sigmoid activation. The attended feature map is obtained as <inline-formula>
<mml:math display="inline" id="im32"><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>Z</mml:mi></mml:mstyle><mml:mi>a</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>Z</mml:mi></mml:mstyle><mml:mi>d</mml:mi></mml:msub><mml:mo>&#x2299;</mml:mo><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>s</mml:mi></mml:mstyle></mml:mrow></mml:math></inline-formula>, where <inline-formula>
<mml:math display="inline" id="im33"><mml:mo>&#x2299;</mml:mo></mml:math></inline-formula> represents channel-wise multiplication. With a reduction ratio <inline-formula>
<mml:math display="inline" id="im34"><mml:mrow><mml:mi>R</mml:mi><mml:mo>=</mml:mo><mml:mn>8</mml:mn><mml:mo>,</mml:mo></mml:mrow></mml:math></inline-formula> this mechanism imposes minimal overhead while enabling the network to emphasize weed-related features (e.g., leaf shape, vein contrast) and suppress background noise.</p>
<p>TinyWeedNet stacks five IR blocks sequentially, progressively reducing the spatial resolution while expanding channel depth. The first four IR blocks employ a stride of <inline-formula>
<mml:math display="inline" id="im35"><mml:mrow><mml:mi>s</mml:mi><mml:mo>=</mml:mo><mml:mn>2</mml:mn></mml:mrow></mml:math></inline-formula> for spatial downsampling, and the final block keeps <inline-formula>
<mml:math display="inline" id="im36"><mml:mrow><mml:mi>s</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:math></inline-formula> to preserve high-level spatial context.</p>
<p>This hierarchical design enables gradual abstraction from low-level spatial textures to high-level semantic representations, ensuring robust weed background separation under varying field conditions. The embedded CA modules at each stage further enhance intra-class compactness and inter-class separability, leading to improved classification robustness on resource-constrained devices.</p>
</sec>
<sec id="s2_2_4">
<label>2.2.4</label>
<title>Final Projection and Classification Head</title>
<p>The Final Projection and Classification Head (<xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2</bold></xref> pink part) aggregates the high-level representations extracted by the preceding IR blocks and transforms them into class-level predictions. After the last IR stage, the feature tensor <inline-formula>
<mml:math display="inline" id="im37"><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>F</mml:mi></mml:mstyle><mml:mrow><mml:mtext>IR</mml:mtext><mml:mn>5</mml:mn></mml:mrow></mml:msub><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mi>&#x211d;</mml:mi><mml:mrow><mml:mn>120</mml:mn><mml:mo>&#xd7;</mml:mo><mml:mn>7</mml:mn><mml:mo>&#xd7;</mml:mo><mml:mn>7</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula> undergoes a <inline-formula>
<mml:math display="inline" id="im38"><mml:mrow><mml:mn>1</mml:mn><mml:mo>&#xd7;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:math></inline-formula> convolutional projection that expands the channel dimension to <inline-formula>
<mml:math display="inline" id="im39"><mml:mrow><mml:msub><mml:mi>C</mml:mi><mml:mi>f</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mn>240</mml:mn></mml:mrow></mml:math></inline-formula> for enhanced representational richness shown in <xref ref-type="disp-formula" rid="eq14">Equation 14</xref>:</p>
<disp-formula id="eq14"><label>(14)</label>
<mml:math display="block" id="M14"><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>F</mml:mi></mml:mstyle><mml:mi>f</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mi>&#x3c3;</mml:mi><mml:mtext>&#xa0;</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext>BN</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mtext>Conv</mml:mtext></mml:mrow><mml:mrow><mml:mn>1</mml:mn><mml:mo>&#xd7;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>F</mml:mi></mml:mstyle><mml:mrow><mml:mtext>IR</mml:mtext><mml:mn>5</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mtext>&#x2003;</mml:mtext><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>F</mml:mi></mml:mstyle><mml:mi>f</mml:mi></mml:msub><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mi>&#x211d;</mml:mi><mml:mrow><mml:mn>240</mml:mn><mml:mo>&#xd7;</mml:mo><mml:mn>7</mml:mn><mml:mo>&#xd7;</mml:mo><mml:mn>7</mml:mn></mml:mrow></mml:msup><mml:mo>.</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>This 1 &#xd7; 1 layer performs linear channel mixing and feature compression without altering spatial dimensions, allowing the network to consolidate multi-channel semantic descriptors from previous layers into a compact yet expressive feature space. Batch Normalization stabilizes the activation distribution, and the ReLU activation introduces non-linearity while preserving computational simplicity for MCU deployment.</p>
<p>Subsequently, global average pooling (GAP) aggregates the spatial information of <inline-formula>
<mml:math display="inline" id="im40"><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>F</mml:mi></mml:mstyle><mml:mi>f</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> into a 240-dimensional feature vector shown in <xref ref-type="disp-formula" rid="eq15">Equation 15</xref>:</p>
<disp-formula id="eq15"><label>(15)</label>
<mml:math display="block" id="M15"><mml:mrow><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>p</mml:mi></mml:mstyle><mml:mo>=</mml:mo><mml:mtext>GAP</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>F</mml:mi></mml:mstyle><mml:mi>f</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mi>&#x211d;</mml:mi><mml:mrow><mml:mn>240</mml:mn></mml:mrow></mml:msup><mml:mo>.</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>This operation effectively converts each feature map into a single representative statistic, thus eliminating spatial redundancy and ensuring translation invariance&#x2014;a desirable property for real-world weed imagery where plants appear at varying positions and scales.</p>
<p>To prevent overfitting and improve generalization, a dropout layer with a probability of p = 0.2 is applied before the final classifier. The resulting feature vector is then passed through a fully connected (FC) layer to produce the final prediction vector <inline-formula>
<mml:math display="inline" id="im41"><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mover accent="true"><mml:mi>y</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:mstyle></mml:math></inline-formula> over the nine target weed categories shown in <xref ref-type="disp-formula" rid="eq16">Equation 16</xref>:</p>
<disp-formula id="eq16"><label>(16)</label>
<mml:math display="block" id="M16"><mml:mrow><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mover accent="true"><mml:mi>y</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:mstyle><mml:mo>=</mml:mo><mml:mtext>FC</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext>Dropout</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>p</mml:mi></mml:mstyle><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mi>&#x211d;</mml:mi><mml:mn>9</mml:mn></mml:msup><mml:mo>.</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>This minimalistic classification head provides an trade-off between inference latency and predictive accuracy. By combining global pooling with a single dense layer, TinyWeedNet avoids the parameter overhead of multi-layer classifiers while maintaining discriminative capability. Such a streamlined design, together with&#xa0;integer-quantization compatibility, ensures that the network can be deployed on low-power STM32 MCUs for real-time weed classification tasks.</p>
</sec>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>Experimental configuration and baseline models</title>
<p>All experiments were conducted on a high-performance computing platform running Ubuntu 22.04, featuring an Intel<sup>&#xae;</sup> Core&#x2122; i9&#x2013;12900 processor, 32GB DDR5 RAM, and an NVIDIA GeForce RTX 3090 GPU. The development environment consisted of VS Code IDE with Python 3.9.19 and PyTorch<xref ref-type="fn" rid="fn2"><sup>2</sup></xref> 2.8.0 framework. The training hyperparameters were as follows: a learning rate of 0.001, a batch size of 32, and 100 epochs with the SGD optimizer. The cross-entropy loss function was used, and an early stopping strategy was introduced to terminate training when the accuracy no longer improved, ensuring algorithm convergence.</p>
<p>To comprehensively evaluate the effectiveness of the proposed TinyWeedNet architecture, a comparison study was conducted against several state-of-the-art (SOTA) CNNs that are widely adopted for image classification tasks. The selected models represent diverse architectural paradigms, ranging from early compact designs to more advanced high-capacity networks. Specifically, the comparison includes SqueezeNet <xref ref-type="bibr" rid="B11">Iandola et&#xa0;al. (2016)</xref>, GoogleNet <xref ref-type="bibr" rid="B27">Szegedy et&#xa0;al. (2016)</xref>, VGG16 and VGG19 <xref ref-type="bibr" rid="B25">Simonyan and Zisserman (2014)</xref>, ResNet18 and ResNet101 <xref ref-type="bibr" rid="B8">He et&#xa0;al. (2016)</xref>, Inception v3 <xref ref-type="bibr" rid="B27">Szegedy et&#xa0;al. (2016)</xref>, MobileNet V2 <xref ref-type="bibr" rid="B20">Sandler et&#xa0;al. (2018)</xref>, Xception <xref ref-type="bibr" rid="B4">Chollet (2017)</xref>, and EfficientNet B3 <xref ref-type="bibr" rid="B29">Tan et&#xa0;al. (2019)</xref>. These models were selected to cover a broad spectrum of model sizes, depths, and computational complexities, providing a representative benchmark for assessing both accuracy and efficiency.</p>
<p>All networks were trained and tested under the same experimental protocol to ensure fairness of comparison. The input image size was fixed at 224 &#xd7; 224 &#xd7; 3, and identical data preprocessing was applied across all models. Each model was trained from beginning on the weed classification dataset for the same number of epochs, using the same learning-rate schedule and batch size. For lightweight architectures (e.g., MobileNetV2, SqueezeNet), default width multipliers were used, while for heavy architectures (e.g., ResNet101, EfficientNet-B3), depth and width configurations were kept standard without additional pruning.</p>
<p>The performance metrics considered include classification accuracy, F1-score, and parameter count, which jointly reflect prediction performance, and model complexity. All experiments were repeated ten times, and the mean values with their standard deviations (mean &#xb1; SD) were reported.</p>
</sec>
<sec id="s2_4">
<label>2.4</label>
<title>MCU deployment framework</title>
<p>To achieve on-device weed classification, the optimized TinyWeedNet model is deployed on an STM32 MCU following the standardized Tiny Machine Learning workflow, as illustrated in <xref ref-type="fig" rid="f3"><bold>Figure&#xa0;3</bold></xref>. The deployment process involves four main stages: (1) model design and training using PyTorch; (2) format conversion and optimization through the Open Neural Network Exchange<xref ref-type="fn" rid="fn3"><sup>3</sup></xref> (ONNX) intermediate representation; (3) conversion to MCU-executable code via STM32Cube.AI; and (4) model integration and inference execution on embedded hardware. After model training, the PyTorch checkpoint is exported to ONNX as a framework-independent representation, which is then imported into STM32Cube.AI for automatic graph optimization and layer fusion. The tool subsequently generates ANSI C source code with fixed-point parameters, including initialization functions and inference routines that can be compiled within STM32CubeIDE. This workflow ensures seamless transition from high-level model development to resource-constrained deployment, enabling efficient, fully integer-based inference execution with memory-aware scheduling on embedded targets.</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Tiny machine learning workflow.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1747863-g003.tif">
<alt-text content-type="machine-generated">Flowchart illustrating a weeds image classification pipeline: start with weeds images, proceed through lightweight CNN design, model optimization, edge deployment, and edge inference. Respective tools shown include PyTorch, ONNX, STM32Cube.AI, and STM32 MCU.</alt-text>
</graphic></fig>
<p>The model deployment and evaluation were conducted on an STM32H7B3I-EVAL development board. The main hardware specifications are summarized in <xref ref-type="table" rid="T2"><bold>Table&#xa0;2</bold></xref>. All experiments were performed at 3.3 V low-dropout (LDO) regulation mode, which allows power consumption measurement through the onboard current-sensing interface. Internal SRAM and flash memory were prioritized for model deployment and execution, while external memory was utilized only when internal storage capacity was insufficient.</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Technical specifications of STM32H7B3I-EVAL.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Parameter</th>
<th valign="middle" align="center">STM32H7B3I-EVAL development board</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">MCU</td>
<td valign="middle" align="center">STM32H7B3LIH6Q</td>
</tr>
<tr>
<td valign="middle" align="center">CPU Core</td>
<td valign="middle" align="center">ARM Cortex M7 with FPU</td>
</tr>
<tr>
<td valign="middle" align="center">CPU Frequency</td>
<td valign="middle" align="center">280MHz</td>
</tr>
<tr>
<td valign="middle" align="center">RAM</td>
<td valign="middle" align="center">1.4 MB internal + 16 MB external SRAM</td>
</tr>
<tr>
<td valign="middle" align="center">Flash</td>
<td valign="middle" align="center">2 MB internal + 64 MB external flash</td>
</tr>
<tr>
<td valign="middle" align="center">Voltage</td>
<td valign="middle" align="center">3.3V</td>
</tr>
<tr>
<td valign="middle" align="center">Operating Current</td>
<td valign="middle" align="center">132.5mA (Low-Dropout Regulator mode)</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Model integration, code compilation, and firmware flashing were performed in STM32-CubeIDE using C++ as the implementation language. The generated inference code was linked with the STM32 HAL library and executed on bare-metal firmware without an operating system. Ten images were stored in array format on the MCU as model inputs for on-device performance testing. Model accuracy was tested on the PC using the ONNX framework.</p>
<p>This deployment framework provides a reusable toolchain for converting TinyWeedNet from a high-level deep learning model to a fully functional TinyML implementation on an MCU. It also establishes a unified environment for subsequent measurements of latency, memory usage, and energy consumption under real-world embedded conditions.</p>
</sec>
<sec id="s2_5">
<label>2.5</label>
<title>Hyperparameter exploration</title>
<p>To systematically explore the trade-off between model accuracy and deployment efficiency, a hyperparameter analysis was conducted focusing on three key architectural parameters of TinyWeedNet: the Expand Ratio, the Reduction Ratio, and the Stem Channels. The selection of these three hyperparameters was guided by both architectural significance and deployment practicality. These hyperparameters control the expansion width in IR blocks, the compression factor in CA modules, and the number of feature maps in the initial stem convolution layer, respectively.</p>
<p>For each hyperparameter, a discrete search space was defined based on empirical design constraints and MCU memory limits:</p>
<p>Expand Ratio <inline-formula>
<mml:math display="inline" id="im42"><mml:mrow><mml:mi>E</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mn>3</mml:mn><mml:mo>,</mml:mo><mml:mo>&#xa0;</mml:mo><mml:mn>4</mml:mn><mml:mo>,</mml:mo><mml:mo>&#xa0;</mml:mo><mml:mn>6</mml:mn></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>Reduction Ratio <inline-formula>
<mml:math display="inline" id="im43"><mml:mrow><mml:mi>R</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mn>4</mml:mn><mml:mo>,</mml:mo><mml:mo>&#xa0;</mml:mo><mml:mn>8</mml:mn><mml:mo>,</mml:mo><mml:mo>&#xa0;</mml:mo><mml:mn>16</mml:mn></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>Stem Channels <inline-formula>
<mml:math display="inline" id="im44"><mml:mrow><mml:mi>S</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mn>8</mml:mn><mml:mo>,</mml:mo><mml:mo>&#xa0;</mml:mo><mml:mn>16</mml:mn><mml:mo>,</mml:mo><mml:mo>&#xa0;</mml:mo><mml:mn>24</mml:mn></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>A full factorial combination experiment was performed, resulting in a total of 3&#xd7;3&#xd7;3 = 27 model variants. All configurations were trained using the same optimization setup and evaluated on both accuracy-oriented and deployment-oriented metrics, including classification accuracy, F1-score, number of parameters, model size, and inference latency. To ensure fairness, each configuration was trained from scratch under identical random seeds and training epochs, and early stopping was applied based on the model&#x2019;s accuracy on the test set.</p>
<p>The <italic>Expand Ratio (E)</italic> directly controls the intermediate channel width within the inverted residual (IR) blocks, thereby influencing the network&#x2019;s representational capacity and computational cost. Lower values (e.g., <italic>E</italic> = 3) favor compactness and lower latency, whereas higher values (e.g., <italic>E</italic> = 6) enhance feature richness at the expense of larger parameter counts. The <italic>Reduction Ratio (R)</italic> in the CA module determines the degree of channel compression and thus governs the balance between feature selectivity and overhead in the attention subnetwork. A moderate reduction ratio (<italic>R</italic> = 8) was hypothesized to offer the best trade-off between discriminative capability and MCU efficiency. Finally, the <italic>Stem Channels (S)</italic> parameter controls the number of filters in the initial convolution layer, affecting both early feature diversity and the propagation of representational capacity throughout the network.</p>
<p>The discrete ranges of E, R, and S were selected based on prior empirical findings from lightweight CNNs such as MobileNet and EfficientNet, as well as hardware profiling constraints observed on the STM32H7 platform. Values beyond these ranges (e.g., <inline-formula>
<mml:math display="inline" id="im45"><mml:mrow><mml:mi>E</mml:mi><mml:mo>&gt;</mml:mo><mml:mn>6</mml:mn></mml:mrow></mml:math></inline-formula> or <inline-formula>
<mml:math display="inline" id="im46"><mml:mrow><mml:mi>S</mml:mi><mml:mo>&gt;</mml:mo><mml:mn>24</mml:mn></mml:mrow></mml:math></inline-formula>) led to significant memory overflow or increased inference latency without measurable accuracy gain in preliminary tests. A full-factorial exploration was therefore adopted to comprehensively assess the interdependence among these factors and to ensure that the final configuration, later identified as Combo15, represents a globally optimal design rather than a locally tuned solution.</p>
<p>The quantitative outcomes and deployment performance associated with all hyperparameter configurations are analyzed in Section 3, where the trade-offs between accuracy, latency, and energy consumption are discussed in detail.</p>
</sec>
<sec id="s2_6">
<label>2.6</label>
<title>Ablation study design</title>
<p>To evaluate the contribution of each key component within the proposed TinyWeedNet architecture, a series of ablation experiments were conducted. Each experiment selectively disables or replaces a specific module while keeping all other training and evaluation settings identical to the baseline configuration. This allows for isolating the impact of individual design choices on both model accuracy and deployment performance.</p>
<p>Four ablation variants were designed by removing one specific module from the original model as follows:</p>
<list list-type="bullet">
<list-item>
<p>The Multi-Scale Convolution module is replaced with a single 3 &#xd7; 3 convolution branch, removing the parallel receptive-field aggregation mechanism.</p></list-item>
<list-item>
<p>All Channel Attention modules are removed from the IR blocks, disabling adaptive feature recalibration.</p></list-item>
<list-item>
<p>Depthwise Convolution convolutions are replaced with standard 3 &#xd7; 3 convolutions, resulting in higher computational cost but unchanged spatial topology.</p></list-item>
<list-item>
<p>The Final Conv1&#xd7;1 projection layer before global average pooling is omitted, and the feature maps are directly fed into the GAP&#x2013;FC head.</p></list-item>
</list>
<p>Each simplified model was retrained from scratch using the same optimization settings, data preprocessing pipeline, and hyperparameter configuration as the baseline TinyWeedNet model. The performance was then evaluated using identical metrics, including classification accuracy, F1-score, number of parameters, model size, inference latency, and energy per inference.</p>
</sec>
<sec id="s2_7">
<label>2.7</label>
<title>Robustness analysis design under controlled domain shifts</title>
<p>To evaluate the robustness of the proposed model under realistic environmental variations without introducing additional datasets, a controlled robustness analysis is conducted on the DeepWeeds test set. Rather than assessing cross-dataset generalization, which would require out-of-distribution data, this analysis focuses on quantifying the model&#x2019;s sensitivity to appearance changes commonly encountered in field deployment scenarios, including illumination variations, weather-induced image degradation, and soil or background color shifts.</p>
<p>Let <inline-formula>
<mml:math display="inline" id="im47"><mml:mrow><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>I</mml:mi></mml:mstyle><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mi>&#x211d;</mml:mi><mml:mrow><mml:mi>H</mml:mi><mml:mo>&#xd7;</mml:mo><mml:mi>W</mml:mi><mml:mo>&#xd7;</mml:mo><mml:mn>3</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula> denote a clean RGB test image, and let <inline-formula>
<mml:math display="inline" id="im48"><mml:mrow><mml:mi>f</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mo>&#xb7;</mml:mo><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> represent the trained classification model. For each robustness experiment, a perturbed image <inline-formula>
<mml:math display="inline" id="im49"><mml:mstyle mathvariant="bold" mathsize="normal"><mml:msup><mml:mi>I</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup></mml:mstyle></mml:math></inline-formula> is generated by applying a transformation <inline-formula>
<mml:math display="inline" id="im50"><mml:mrow><mml:mi mathvariant="script">T</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mo>&#xb7;</mml:mo><mml:mo>;</mml:mo><mml:mi>&#x3b8;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>, while keeping the ground-truth label unchanged.</p>
<sec id="s2_7_1">
<label>2.7.1</label>
<title>Illumination variations</title>
<p>Illumination changes are simulated using four photometric transformations: brightness adjustment, contrast scaling, gamma correction, and white-balance shift.</p>
<p>Brightness adjustment is defined as in <xref ref-type="disp-formula" rid="eq17">Equation 17</xref>.</p>
<disp-formula id="eq17"><label>(17)</label>
<mml:math display="block" id="M17"><mml:mrow><mml:mstyle mathvariant="bold" mathsize="normal"><mml:msup><mml:mi>I</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup></mml:mstyle><mml:mo>=</mml:mo><mml:mtext>clip</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>I</mml:mi></mml:mstyle><mml:mo>+</mml:mo><mml:mi>&#x3b2;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im51"><mml:mrow><mml:mi>&#x3b2;</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mn>0.10</mml:mn><mml:mo>,</mml:mo><mml:mo>&#xa0;</mml:mo><mml:mn>0.20</mml:mn><mml:mo>,</mml:mo><mml:mo>&#xa0;</mml:mo><mml:mn>0.30</mml:mn></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> controls the intensity of brightness variation after image normalization to [0,1]. These values are selected to represent mild to strong exposure changes commonly observed in outdoor agricultural environments.</p>
<p>Contrast scaling is formulated as in <xref ref-type="disp-formula" rid="eq18">Equation 18</xref>.</p>
<disp-formula id="eq18"><label>(18)</label>
<mml:math display="block" id="M18"><mml:mrow><mml:mstyle mathvariant="bold" mathsize="normal"><mml:msup><mml:mi>I</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup></mml:mstyle><mml:mo>=</mml:mo><mml:mtext>clip</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>&#x3b1;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>I</mml:mi></mml:mstyle><mml:mo>&#x2212;</mml:mo><mml:mi>&#x3bc;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>+</mml:mo><mml:mi>&#x3bc;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im52"><mml:mi>&#x3bc;</mml:mi></mml:math></inline-formula> denotes the mean pixel intensity and <inline-formula>
<mml:math display="inline" id="im53"><mml:mrow><mml:mi>&#x3b1;</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mn>0.8</mml:mn><mml:mo>,</mml:mo><mml:mo>&#xa0;</mml:mo><mml:mn>0.6</mml:mn><mml:mo>,</mml:mo><mml:mo>&#xa0;</mml:mo><mml:mn>0.4</mml:mn></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> represents progressively decreasing contrast levels. This range captures gradual contrast degradation caused by overcast weather or sensor limitations while avoiding unrealistic visual artifacts.</p>
<p>Gamma correction is applied as in <xref ref-type="disp-formula" rid="eq19">Equation 19</xref>.</p>
<disp-formula id="eq19"><label>(19)</label>
<mml:math display="block" id="M19"><mml:mrow><mml:mstyle mathvariant="bold" mathsize="normal"><mml:msup><mml:mi>I</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup></mml:mstyle><mml:mo>=</mml:mo><mml:msup><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>I</mml:mi></mml:mstyle><mml:mi>&#x3b3;</mml:mi></mml:msup><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>with <inline-formula>
<mml:math display="inline" id="im54"><mml:mrow><mml:mtext>&#x3b3;</mml:mtext><mml:mo>&#x2208;</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mn>0.8</mml:mn><mml:mo>,</mml:mo><mml:mo>&#xa0;</mml:mo><mml:mn>1.2</mml:mn><mml:mo>,</mml:mo><mml:mo>&#xa0;</mml:mo><mml:mn>1.6</mml:mn></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>. These values are chosen to approximate non-linear illumination effects under shadowed and high-glare conditions frequently encountered in field imaging.</p>
<p>White-balance shift is modeled through channel-wise scaling as in <xref ref-type="disp-formula" rid="eq20">Equation 20</xref>:</p>
<disp-formula id="eq20"><label>(20)</label>
<mml:math display="block" id="M20"><mml:mrow><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:msup><mml:mi>I</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup></mml:mstyle><mml:mi>c</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>s</mml:mi><mml:mi>c</mml:mi></mml:msub><mml:mo>&#xb7;</mml:mo><mml:msub><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>I</mml:mi></mml:mstyle><mml:mi>c</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mtext>&#x2003;</mml:mtext><mml:mi>c</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mi>R</mml:mi><mml:mo>,</mml:mo><mml:mo>&#xa0;</mml:mo><mml:mi>G</mml:mi><mml:mo>,</mml:mo><mml:mi>B</mml:mi></mml:mrow><mml:mo>}</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im55"><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>s</mml:mi><mml:mi>R</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>s</mml:mi><mml:mi>G</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>s</mml:mi><mml:mi>B</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x2208;</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1.1</mml:mn><mml:mo>,</mml:mo><mml:mn>1.0</mml:mn><mml:mo>,</mml:mo><mml:mn>0.9</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1.2</mml:mn><mml:mo>,</mml:mo><mml:mn>1.0</mml:mn><mml:mo>,</mml:mo><mml:mn>0.8</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1.3</mml:mn><mml:mo>,</mml:mo><mml:mn>1.0</mml:mn><mml:mo>,</mml:mo><mml:mn>0.7</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>. These settings simulate color temperature variations caused by different sunlight spectra throughout the day.</p>
</sec>
<sec id="s2_7_2">
<label>2.7.2</label>
<title>Weather-induced degradation</title>
<p>Weather-related image degradation is approximated using Gaussian blur, which models reduced image sharpness due to motion, defocus, light rain, or atmospheric haze.</p>
<p>Gaussian blur is defined as in <xref ref-type="disp-formula" rid="eq21">Equation 21</xref>.</p>
<disp-formula id="eq21"><label>(21)</label>
<mml:math display="block" id="M21"><mml:mrow><mml:mstyle mathvariant="bold" mathsize="normal"><mml:msup><mml:mi>I</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup></mml:mstyle><mml:mo>=</mml:mo><mml:mstyle mathvariant="bold" mathsize="normal"><mml:mi>I</mml:mi></mml:mstyle><mml:mo>*</mml:mo><mml:msub><mml:mi mathvariant="script">G</mml:mi><mml:mi>&#x3c3;</mml:mi></mml:msub><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im56"><mml:mrow><mml:msub><mml:mi mathvariant="script">G</mml:mi><mml:mi>&#x3c3;</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> denotes a Gaussian kernel. Kernel sizes of {3, 5, 7} are employed to represent increasing levels of degradation severity. These values are selected to reflect realistic blur conditions encountered by mobile agricultural platforms, while preserving the semantic structure of the target objects.</p>
</sec>
<sec id="s2_7_3">
<label>2.7.3</label>
<title>Soil and background color variation</title>
<p>To approximate variations in soil appearance and background color distributions without introducing new textures or datasets, a hue shift is applied in the HSV color space as in <xref ref-type="disp-formula" rid="eq22">Equation 22</xref>:</p>
<disp-formula id="eq22"><label>(22)</label>
<mml:math display="block" id="M22"><mml:mrow><mml:msup><mml:mi>H</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msup><mml:mo>=</mml:mo><mml:mi>H</mml:mi><mml:mo>+</mml:mo><mml:mtext>&#x394;</mml:mtext><mml:mi>h</mml:mi><mml:mo>,</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im57"><mml:mrow><mml:mtext>&#x394;</mml:mtext><mml:mi>h</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mo>&#xb1;</mml:mo><mml:msup><mml:mn>5</mml:mn><mml:mo>&#x2218;</mml:mo></mml:msup><mml:mo>,</mml:mo><mml:mo>&#xb1;</mml:mo><mml:msup><mml:mrow><mml:mn>10</mml:mn></mml:mrow><mml:mo>&#x2218;</mml:mo></mml:msup><mml:mo>,</mml:mo><mml:mo>&#xb1;</mml:mo><mml:msup><mml:mrow><mml:mn>15</mml:mn></mml:mrow><mml:mo>&#x2218;</mml:mo></mml:msup></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>. This perturbation serves as a lightweight proxy for changes in soil color, vegetation background, and regional color distributions, enabling an initial robustness assessment under controlled background-related domain shifts.</p>
</sec>
</sec>
</sec>
<sec id="s3" sec-type="results">
<label>3</label>
<title>Results and discussion</title>
<sec id="s3_1">
<label>3.1</label>
<title>CNN benchmark comparison</title>
<p>As shown in <xref ref-type="table" rid="T3"><bold>Table&#xa0;3</bold></xref>; <xref ref-type="fig" rid="f4"><bold>Figure&#xa0;4</bold></xref>, TinyWeedNet achieves competitive classification accuracy (97.26%) and F1-score (96.64%), comparable to deeper architectures such as ResNet101 while using only 0.48M parameters. This represents a parameter reduction of approximately two orders of magnitude compared to VGG- and ResNet-family models, highlighting the efficiency of the proposed design. Although ResNet101 achieved a slightly higher accuracy (97.82%), TinyWeedNet achieved 200&#xd7; higher accuracy-per-parameter efficiency (0.97/0.48M vs. 0.97/42.5M). Unlike large&#xa0;models that rely on extensive channel widths and deep hierarchies, TinyWeedNet achieves strong discriminative capability through its compact combination of multi-scale convolutions, depthwise separable operations, and lightweight channel attention mechanisms.</p>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Comparison of classification performance and model complexity.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Model</th>
<th valign="middle" align="center">Accuracy (%)</th>
<th valign="middle" align="center">F1-score (%)</th>
<th valign="middle" align="center">Params. (M)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">SqueezeNet</td>
<td valign="middle" align="center">81.55 &#xb1; 0.87</td>
<td valign="middle" align="center">81.98 &#xb1; 0.76</td>
<td valign="middle" align="center">0.7</td>
</tr>
<tr>
<td valign="middle" align="left">GoogleNet</td>
<td valign="middle" align="center">83.33 &#xb1; 0.45</td>
<td valign="middle" align="center">83.64 &#xb1; 0.41</td>
<td valign="middle" align="center">12.0</td>
</tr>
<tr>
<td valign="middle" align="left">VGG16</td>
<td valign="middle" align="center">90.12 &#xb1; 0.25</td>
<td valign="middle" align="center">90.33 &#xb1; 0.22</td>
<td valign="middle" align="center">134.3</td>
</tr>
<tr>
<td valign="middle" align="left">VGG19</td>
<td valign="middle" align="center">92.45 &#xb1; 0.49</td>
<td valign="middle" align="center">94.48 &#xb1; 0.41</td>
<td valign="middle" align="center">139.6</td>
</tr>
<tr>
<td valign="middle" align="left">ResNet18</td>
<td valign="middle" align="center">83.22 &#xb1; 0.91</td>
<td valign="middle" align="center">83.55 &#xb1; 0.78</td>
<td valign="middle" align="center">25.6</td>
</tr>
<tr>
<td valign="middle" align="left">ResNet101</td>
<td valign="middle" align="center">97.82 &#xb1; 0.66</td>
<td valign="middle" align="center">97.76 &#xb1; 0.43</td>
<td valign="middle" align="center">42.5</td>
</tr>
<tr>
<td valign="middle" align="left">Inception-v3</td>
<td valign="middle" align="center">88.11 &#xb1; 0.48</td>
<td valign="middle" align="center">88.33 &#xb1; 0.58</td>
<td valign="middle" align="center">21.8</td>
</tr>
<tr>
<td valign="middle" align="left">MobileNetV2</td>
<td valign="middle" align="center">85.13 &#xb1; 0.34</td>
<td valign="middle" align="center">84.46 &#xb1; 0.44</td>
<td valign="middle" align="center">2.2</td>
</tr>
<tr>
<td valign="middle" align="left">Xception</td>
<td valign="middle" align="center">90.23 &#xb1; 0.76</td>
<td valign="middle" align="center">89.56 &#xb1; 0.41</td>
<td valign="middle" align="center">25.6</td>
</tr>
<tr>
<td valign="middle" align="left">EfficientNet-B3</td>
<td valign="middle" align="center">88.83 &#xb1; 0.98</td>
<td valign="middle" align="center">87.85 &#xb1; 0.77</td>
<td valign="middle" align="center">11.2</td>
</tr>
<tr>
<td valign="middle" align="left"><bold>TinyWeedNet</bold></td>
<td valign="middle" align="center">97.26 &#xb1; 0.72</td>
<td valign="middle" align="center">96.64 &#xb1; 0.76</td>
<td valign="middle" align="center">0.4758</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>RED and BLUE indicate the first and second highest ranking results, respectively. BOLD indicates the proposed model.</p></fn>
</table-wrap-foot>
</table-wrap>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Training accuracy curves of different CNN models over 100 epochs.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1747863-g004.tif">
<alt-text content-type="machine-generated">Line chart comparing training accuracy against epochs for eleven neural network models, with ResNet101 and TinyWeedNet achieving the highest accuracy and GoogleNet the lowest. Legend matches each model to a colored line.</alt-text>
</graphic></fig>
<p>In contrast, other lightweight networks (e.g., SqueezeNet and MobileNetV2) demonstrate accuracy values below 86%, indicating limited feature representation power for complex natural scenes. TinyWeedNet therefore provides a more balanced trade-off between accuracy and compactness, making it especially suitable for deployment in resource-constrained edge devices.</p>
<p>Overall, the benchmark results demonstrate that TinyWeedNet&#xa0;achieves near state-of-the-art accuracy with the lowest model complexity among all tested CNNs. This performance&#x2013;efficiency synergy confirms its suitability for embedded weed recognition tasks and supports its deployment on MCU-based TinyML platforms.</p>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Hyperparameter sensitivity analysis</title>
<p><xref ref-type="table" rid="T4"><bold>Table&#xa0;4</bold></xref> summarizes the joint influence of three key architectural hyperparameters, the Expand Ratio, Reduction Ratio, and Stem Channels, on both classification performance and MCU deployment efficiency. A total of 27 network configurations were systematically trained and evaluated to investigate how these factors affect representational capacity, computational complexity, and&#xa0;resource utilization. These experiments were designed to validate the effectiveness of the proposed TinyWeedNet configuration (E = 4, R = 8, and S = 24), which was originally derived from theoretical design considerations and preliminary observations on model scaling.</p>
<table-wrap id="T4" position="float">
<label>Table&#xa0;4</label>
<caption>
<p>Hyperparameter sensitivity analysis and On-MCU performance across combined configurations: Expand Ratio (E), Reduction Ratio (R), and Stem Channels (S).</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">ID</th>
<th valign="middle" align="center">Experiment</th>
<th valign="middle" align="center">Params.</th>
<th valign="middle" align="center">Model size (MB)</th>
<th valign="middle" align="center">Accuracy (%)</th>
<th valign="middle" align="center">F1-score (%)</th>
<th valign="middle" align="center">Inf. time (ms)</th>
<th valign="middle" align="center">MACC</th>
<th valign="middle" align="center">Flash (int.)</th>
<th valign="middle" align="center">RAM (int. + ext.)</th>
<th valign="middle" align="center">Energy/Inf. (mJ)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">E3_R4_S8</td>
<td valign="middle" align="center">51.5K</td>
<td valign="middle" align="center">0.196</td>
<td valign="middle" align="center">88.55 <inline-formula>
<mml:math display="inline" id="im58"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.65</td>
<td valign="middle" align="center">83.92 <inline-formula>
<mml:math display="inline" id="im59"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.72</td>
<td valign="middle" align="center">12.67 <inline-formula>
<mml:math display="inline" id="im60"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.22</td>
<td valign="middle" align="center">3.98 <inline-formula>
<mml:math display="inline" id="im61"><mml:mo>&#xd7;</mml:mo></mml:math></inline-formula> 10<sup>7</sup></td>
<td valign="middle" align="center">236.4 KiB</td>
<td valign="middle" align="center">24.1 KiB + 2.34 MiB</td>
<td valign="middle" align="center">5.54</td>
</tr>
<tr>
<td valign="middle" align="center">2</td>
<td valign="middle" align="center">E3_R4_S16</td>
<td valign="middle" align="center">187.5K</td>
<td valign="middle" align="center">0.715</td>
<td valign="middle" align="center">95.72 <inline-formula>
<mml:math display="inline" id="im62"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.83</td>
<td valign="middle" align="center">94.32 <inline-formula>
<mml:math display="inline" id="im63"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.55</td>
<td valign="middle" align="center">31.65 <inline-formula>
<mml:math display="inline" id="im64"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.40</td>
<td valign="middle" align="center">1.10 <inline-formula>
<mml:math display="inline" id="im65"><mml:mo>&#xd7;</mml:mo></mml:math></inline-formula> 10<sup>8</sup></td>
<td valign="middle" align="center">762.9 KiB</td>
<td valign="middle" align="center">24.3 KiB + 3.51 MiB</td>
<td valign="middle" align="center">13.82</td>
</tr>
<tr>
<td valign="middle" align="center">3</td>
<td valign="middle" align="center">E3_R4_S24</td>
<td valign="middle" align="center">416.3K</td>
<td valign="middle" align="center">1.588</td>
<td valign="middle" align="center">96.55 <inline-formula>
<mml:math display="inline" id="im66"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.91</td>
<td valign="middle" align="center">95.47 <inline-formula>
<mml:math display="inline" id="im67"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.73</td>
<td valign="middle" align="center">73.45 <inline-formula>
<mml:math display="inline" id="im68"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 1.24</td>
<td valign="middle" align="center">2.64 <inline-formula>
<mml:math display="inline" id="im69"><mml:mo>&#xd7;</mml:mo></mml:math></inline-formula> 10<sup>8</sup></td>
<td valign="middle" align="center">1.61 MiB</td>
<td valign="middle" align="center">24.3 KiB + 5.85 MiB</td>
<td valign="middle" align="center">32.08</td>
</tr>
<tr>
<td valign="middle" align="center">4</td>
<td valign="middle" align="center">E3_R8_S8</td>
<td valign="middle" align="center">43.2K</td>
<td valign="middle" align="center">0.165</td>
<td valign="middle" align="center">89.32 <inline-formula>
<mml:math display="inline" id="im70"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.58</td>
<td valign="middle" align="center">85.42 <inline-formula>
<mml:math display="inline" id="im71"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.46</td>
<td valign="middle" align="center">12.79 <inline-formula>
<mml:math display="inline" id="im72"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.36</td>
<td valign="middle" align="center">3.97 <inline-formula>
<mml:math display="inline" id="im73"><mml:mo>&#xd7;</mml:mo></mml:math></inline-formula> 10<sup>7</sup></td>
<td valign="middle" align="center">203.8 KiB</td>
<td valign="middle" align="center">24.2 KiB + 2.34 MiB</td>
<td valign="middle" align="center">5.59</td>
</tr>
<tr>
<td valign="middle" align="center">5</td>
<td valign="middle" align="center">E3_R8_S16</td>
<td valign="middle" align="center">155.1K</td>
<td valign="middle" align="center">0.592</td>
<td valign="middle" align="center">95.49 <inline-formula>
<mml:math display="inline" id="im74"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.68</td>
<td valign="middle" align="center">94.03 <inline-formula>
<mml:math display="inline" id="im75"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.66</td>
<td valign="middle" align="center">31.48 <inline-formula>
<mml:math display="inline" id="im76"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.28</td>
<td valign="middle" align="center">1.10 <inline-formula>
<mml:math display="inline" id="im77"><mml:mo>&#xd7;</mml:mo></mml:math></inline-formula> 10<sup>8</sup></td>
<td valign="middle" align="center">636.3 KiB</td>
<td valign="middle" align="center">24.4 KiB + 3.51 MiB</td>
<td valign="middle" align="center">13.76</td>
</tr>
<tr>
<td valign="middle" align="center">6</td>
<td valign="middle" align="center">E3_R8_S24</td>
<td valign="middle" align="center">342.8K</td>
<td valign="middle" align="center">1.308</td>
<td valign="middle" align="center">96.69 <inline-formula>
<mml:math display="inline" id="im78"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.84</td>
<td valign="middle" align="center">95.40 <inline-formula>
<mml:math display="inline" id="im79"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.59</td>
<td valign="middle" align="center">74.05 <inline-formula>
<mml:math display="inline" id="im80"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.29</td>
<td valign="middle" align="center">2.64 <inline-formula>
<mml:math display="inline" id="im81"><mml:mo>&#xd7;</mml:mo></mml:math></inline-formula> 10<sup>8</sup></td>
<td valign="middle" align="center">1.33 MiB</td>
<td valign="middle" align="center">24.4 KiB + 5.85 MiB</td>
<td valign="middle" align="center">32.34</td>
</tr>
<tr>
<td valign="middle" align="center">7</td>
<td valign="middle" align="center">E3_R16_S8</td>
<td valign="middle" align="center">38.8K</td>
<td valign="middle" align="center">0.148</td>
<td valign="middle" align="center">87.18 <inline-formula>
<mml:math display="inline" id="im82"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.44</td>
<td valign="middle" align="center">81.78 <inline-formula>
<mml:math display="inline" id="im83"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.55</td>
<td valign="middle" align="center">12.80 <inline-formula>
<mml:math display="inline" id="im84"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.13</td>
<td valign="middle" align="center">3.97 <inline-formula>
<mml:math display="inline" id="im85"><mml:mo>&#xd7;</mml:mo></mml:math></inline-formula> 10<sup>7</sup></td>
<td valign="middle" align="center">186.6 KiB</td>
<td valign="middle" align="center">24.2 KiB + 2.34 MiB</td>
<td valign="middle" align="center">5.60</td>
</tr>
<tr>
<td valign="middle" align="center">8</td>
<td valign="middle" align="center">E3_R16_S16</td>
<td valign="middle" align="center">138.8K</td>
<td valign="middle" align="center">0.530</td>
<td valign="middle" align="center">94.32 <inline-formula>
<mml:math display="inline" id="im86"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.73</td>
<td valign="middle" align="center">92.26 <inline-formula>
<mml:math display="inline" id="im87"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.61</td>
<td valign="middle" align="center">32.09 <inline-formula>
<mml:math display="inline" id="im88"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.39</td>
<td valign="middle" align="center">1.10 <inline-formula>
<mml:math display="inline" id="im89"><mml:mo>&#xd7;</mml:mo></mml:math></inline-formula> 10<sup>8</sup></td>
<td valign="middle" align="center">572.8 KiB</td>
<td valign="middle" align="center">24.4 KiB + 3.51 MiB</td>
<td valign="middle" align="center">14.03</td>
</tr>
<tr>
<td valign="middle" align="center">9</td>
<td valign="middle" align="center">E3_R16_S24</td>
<td valign="middle" align="center">305.3K</td>
<td valign="middle" align="center">1.165</td>
<td valign="middle" align="center">96.40 <inline-formula>
<mml:math display="inline" id="im90"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.66</td>
<td valign="middle" align="center">95.44 <inline-formula>
<mml:math display="inline" id="im91"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.58</td>
<td valign="middle" align="center">74.94 <inline-formula>
<mml:math display="inline" id="im92"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.79</td>
<td valign="middle" align="center">2.64 <inline-formula>
<mml:math display="inline" id="im93"><mml:mo>&#xd7;</mml:mo></mml:math></inline-formula> 10<sup>8</sup></td>
<td valign="middle" align="center">1.19 MiB</td>
<td valign="middle" align="center">24.4 KiB + 5.85 MiB</td>
<td valign="middle" align="center">32.75</td>
</tr>
<tr>
<td valign="middle" align="center">10</td>
<td valign="middle" align="center">E4_R4_S8</td>
<td valign="middle" align="center">74.2K</td>
<td valign="middle" align="center">0.283</td>
<td valign="middle" align="center">91.24 <inline-formula>
<mml:math display="inline" id="im94"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.56</td>
<td valign="middle" align="center">88.11 <inline-formula>
<mml:math display="inline" id="im95"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.69</td>
<td valign="middle" align="center">15.39 <inline-formula>
<mml:math display="inline" id="im96"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.16</td>
<td valign="middle" align="center">4.71 <inline-formula>
<mml:math display="inline" id="im97"><mml:mo>&#xd7;</mml:mo></mml:math></inline-formula> 10<sup>7</sup></td>
<td valign="middle" align="center">322.8 KiB</td>
<td valign="middle" align="center">23.9 KiB + 3.12 MiB</td>
<td valign="middle" align="center">6.72</td>
</tr>
<tr>
<td valign="middle" align="center">11</td>
<td valign="middle" align="center">E4_R4_S16</td>
<td valign="middle" align="center">272.4K</td>
<td valign="middle" align="center">1.039</td>
<td valign="middle" align="center">95.83 <inline-formula>
<mml:math display="inline" id="im98"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.91</td>
<td valign="middle" align="center">94.36 <inline-formula>
<mml:math display="inline" id="im99"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.64</td>
<td valign="middle" align="center">38.15 <inline-formula>
<mml:math display="inline" id="im100"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.92</td>
<td valign="middle" align="center">1.31 <inline-formula>
<mml:math display="inline" id="im101"><mml:mo>&#xd7;</mml:mo></mml:math></inline-formula> 10<sup>8</sup></td>
<td valign="middle" align="center">1.07 MiB</td>
<td valign="middle" align="center">24.1 KiB + 4.69 MiB</td>
<td valign="middle" align="center">16.67</td>
</tr>
<tr>
<td valign="middle" align="center">12</td>
<td valign="middle" align="center">E4_R4_S24</td>
<td valign="middle" align="center">606.6K</td>
<td valign="middle" align="center">2.314</td>
<td valign="middle" align="center">96.95 <inline-formula>
<mml:math display="inline" id="im102"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.77</td>
<td valign="middle" align="center">95.98 <inline-formula>
<mml:math display="inline" id="im103"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.81</td>
<td valign="middle" align="center">87.80 <inline-formula>
<mml:math display="inline" id="im104"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.85</td>
<td valign="middle" align="center">3.13 <inline-formula>
<mml:math display="inline" id="im105"><mml:mo>&#xd7;</mml:mo></mml:math></inline-formula> 10<sup>8</sup></td>
<td valign="middle" align="center">2.33 MiB (ext.)</td>
<td valign="middle" align="center">24.1 KiB + 7.81 MiB</td>
<td valign="middle" align="center">38.41</td>
</tr>
<tr>
<td valign="middle" align="center">13</td>
<td valign="middle" align="center">E4_R8_S8</td>
<td valign="middle" align="center">59.3K</td>
<td valign="middle" align="center">0.226</td>
<td valign="middle" align="center">91.49 <inline-formula>
<mml:math display="inline" id="im106"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.48</td>
<td valign="middle" align="center">88.29 <inline-formula>
<mml:math display="inline" id="im107"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.67</td>
<td valign="middle" align="center">15.53 <inline-formula>
<mml:math display="inline" id="im108"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.07</td>
<td valign="middle" align="center">4.71 <inline-formula>
<mml:math display="inline" id="im109"><mml:mo>&#xd7;</mml:mo></mml:math></inline-formula> 10<sup>7</sup></td>
<td valign="middle" align="center">265.2 KiB</td>
<td valign="middle" align="center">24.1 KiB + 3.12 MiB</td>
<td valign="middle" align="center">6.79</td>
</tr>
<tr>
<td valign="middle" align="center">14</td>
<td valign="middle" align="center">E4_R8_S16</td>
<td valign="middle" align="center">214.8K</td>
<td valign="middle" align="center">0.819</td>
<td valign="middle" align="center">95.35 <inline-formula>
<mml:math display="inline" id="im110"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.79</td>
<td valign="middle" align="center">93.67 <inline-formula>
<mml:math display="inline" id="im111"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.68</td>
<td valign="middle" align="center">38.02 <inline-formula>
<mml:math display="inline" id="im112"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.64</td>
<td valign="middle" align="center">1.31 <inline-formula>
<mml:math display="inline" id="im113"><mml:mo>&#xd7;</mml:mo></mml:math></inline-formula> 10<sup>8</sup></td>
<td valign="middle" align="center">866.6 KiB</td>
<td valign="middle" align="center">24.3 KiB + 4.69 MiB</td>
<td valign="middle" align="center">16.61</td>
</tr>
<tr>
<td valign="middle" align="center">15</td>
<td valign="middle" align="center">E4_R8_S24</td>
<td valign="middle" align="center">475.8K</td>
<td valign="middle" align="center">1.815</td>
<td valign="middle" align="center">97.26 <inline-formula>
<mml:math display="inline" id="im114"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.72</td>
<td valign="middle" align="center">96.64 <inline-formula>
<mml:math display="inline" id="im115"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.76</td>
<td valign="middle" align="center">89.40 <inline-formula>
<mml:math display="inline" id="im116"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 1.00</td>
<td valign="middle" align="center">3.12 <inline-formula>
<mml:math display="inline" id="im117"><mml:mo>&#xd7;</mml:mo></mml:math></inline-formula> 10<sup>8</sup></td>
<td valign="middle" align="center">1.83 MiB</td>
<td valign="middle" align="center">24.4 KiB + 7.81 MiB</td>
<td valign="middle" align="center">39.08</td>
</tr>
<tr>
<td valign="middle" align="center">16</td>
<td valign="middle" align="center">E4_R16_S8</td>
<td valign="middle" align="center">51.9K</td>
<td valign="middle" align="center">0.198</td>
<td valign="middle" align="center">88.98 <inline-formula>
<mml:math display="inline" id="im118"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.61</td>
<td valign="middle" align="center">84.95 <inline-formula>
<mml:math display="inline" id="im119"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.53</td>
<td valign="middle" align="center">15.04 <inline-formula>
<mml:math display="inline" id="im120"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.64</td>
<td valign="middle" align="center">4.71 <inline-formula>
<mml:math display="inline" id="im121"><mml:mo>&#xd7;</mml:mo></mml:math></inline-formula> 10<sup>7</sup></td>
<td valign="middle" align="center">236.3 KiB</td>
<td valign="middle" align="center">24.2 KiB + 3.12 MiB</td>
<td valign="middle" align="center">6.58</td>
</tr>
<tr>
<td valign="middle" align="center">17</td>
<td valign="middle" align="center">E4_R16_S16</td>
<td valign="middle" align="center">186.0K</td>
<td valign="middle" align="center">0.710</td>
<td valign="middle" align="center">95.43 <inline-formula>
<mml:math display="inline" id="im122"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.69</td>
<td valign="middle" align="center">94.11 <inline-formula>
<mml:math display="inline" id="im123"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.71</td>
<td valign="middle" align="center">38.49 <inline-formula>
<mml:math display="inline" id="im124"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.26</td>
<td valign="middle" align="center">1.31 <inline-formula>
<mml:math display="inline" id="im125"><mml:mo>&#xd7;</mml:mo></mml:math></inline-formula> 10<sup>8</sup></td>
<td valign="middle" align="center">754.3 KiB</td>
<td valign="middle" align="center">24.4 KiB + 4.69 MiB</td>
<td valign="middle" align="center">16.81</td>
</tr>
<tr>
<td valign="middle" align="center">18</td>
<td valign="middle" align="center">E4_R16_S24</td>
<td valign="middle" align="center">410.4K</td>
<td valign="middle" align="center">1.565</td>
<td valign="middle" align="center">96.23 <inline-formula>
<mml:math display="inline" id="im126"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.81</td>
<td valign="middle" align="center">94.90 <inline-formula>
<mml:math display="inline" id="im127"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.65</td>
<td valign="middle" align="center">86.43 <inline-formula>
<mml:math display="inline" id="im128"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.68</td>
<td valign="middle" align="center">3.13 <inline-formula>
<mml:math display="inline" id="im129"><mml:mo>&#xd7;</mml:mo></mml:math></inline-formula> 10<sup>8</sup></td>
<td valign="middle" align="center">1.59 MiB</td>
<td valign="middle" align="center">24.4 KiB + 7.81 MiB</td>
<td valign="middle" align="center">37.79</td>
</tr>
<tr>
<td valign="middle" align="center">19</td>
<td valign="middle" align="center">E6_R4_S8</td>
<td valign="middle" align="center">130.6K</td>
<td valign="middle" align="center">0.498</td>
<td valign="middle" align="center">94.58 <inline-formula>
<mml:math display="inline" id="im130"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.55</td>
<td valign="middle" align="center">92.79 <inline-formula>
<mml:math display="inline" id="im131"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.78</td>
<td valign="middle" align="center">21.31 <inline-formula>
<mml:math display="inline" id="im132"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.28</td>
<td valign="middle" align="center">6.19 <inline-formula>
<mml:math display="inline" id="im133"><mml:mo>&#xd7;</mml:mo></mml:math></inline-formula> 10<sup>7</sup></td>
<td valign="middle" align="center">541.0 KiB</td>
<td valign="middle" align="center">24.1 KiB + 4.69 MiB</td>
<td valign="middle" align="center">9.32</td>
</tr>
<tr>
<td valign="middle" align="center">20</td>
<td valign="middle" align="center">E6_R4_S16</td>
<td valign="middle" align="center">485.5K</td>
<td valign="middle" align="center">1.852</td>
<td valign="middle" align="center">96.80 <inline-formula>
<mml:math display="inline" id="im134"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.82</td>
<td valign="middle" align="center">95.85 <inline-formula>
<mml:math display="inline" id="im135"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.91</td>
<td valign="middle" align="center">56.68 <inline-formula>
<mml:math display="inline" id="im136"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.42</td>
<td valign="middle" align="center">1.71 <inline-formula>
<mml:math display="inline" id="im137"><mml:mo>&#xd7;</mml:mo></mml:math></inline-formula> 10<sup>8</sup></td>
<td valign="middle" align="center">1.87 MiB</td>
<td valign="middle" align="center">24.4 KiB + 7.03 MiB</td>
<td valign="middle" align="center">24.77</td>
</tr>
<tr>
<td valign="middle" align="center">21</td>
<td valign="middle" align="center">E6_R4_S24</td>
<td valign="middle" align="center">1085.2K</td>
<td valign="middle" align="center">4.140</td>
<td valign="middle" align="center">97.29 <inline-formula>
<mml:math display="inline" id="im138"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.69</td>
<td valign="middle" align="center">96.42 <inline-formula>
<mml:math display="inline" id="im139"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.89</td>
<td valign="middle" align="center">118.34 <inline-formula>
<mml:math display="inline" id="im140"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 1.37</td>
<td valign="middle" align="center">4.10 <inline-formula>
<mml:math display="inline" id="im141"><mml:mo>&#xd7;</mml:mo></mml:math></inline-formula> 10<sup>8</sup></td>
<td valign="middle" align="center">4.15 MiB (ext.)</td>
<td valign="middle" align="center">24.4 KiB + 11.72 MiB</td>
<td valign="middle" align="center">51.72</td>
</tr>
<tr>
<td valign="middle" align="center">22</td>
<td valign="middle" align="center">E6_R8_S8</td>
<td valign="middle" align="center">97.2K</td>
<td valign="middle" align="center">0.371</td>
<td valign="middle" align="center">93.23 <inline-formula>
<mml:math display="inline" id="im142"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.59</td>
<td valign="middle" align="center">90.73 <inline-formula>
<mml:math display="inline" id="im143"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.67</td>
<td valign="middle" align="center">19.83 <inline-formula>
<mml:math display="inline" id="im144"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.24</td>
<td valign="middle" align="center">6.18 <inline-formula>
<mml:math display="inline" id="im145"><mml:mo>&#xd7;</mml:mo></mml:math></inline-formula> 10<sup>7</sup></td>
<td valign="middle" align="center">410.2 KiB</td>
<td valign="middle" align="center">24.1 KiB + 4.69 MiB</td>
<td valign="middle" align="center">8.66</td>
</tr>
<tr>
<td valign="middle" align="center">23</td>
<td valign="middle" align="center">E6_R8_S16</td>
<td valign="middle" align="center">355.9K</td>
<td valign="middle" align="center">1.358</td>
<td valign="middle" align="center">96.32 <inline-formula>
<mml:math display="inline" id="im146"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.74</td>
<td valign="middle" align="center">95.21 <inline-formula>
<mml:math display="inline" id="im147"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.65</td>
<td valign="middle" align="center">49.91 <inline-formula>
<mml:math display="inline" id="im148"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.62</td>
<td valign="middle" align="center">1.71 <inline-formula>
<mml:math display="inline" id="im149"><mml:mo>&#xd7;</mml:mo></mml:math></inline-formula> 10<sup>8</sup></td>
<td valign="middle" align="center">1.38 MiB</td>
<td valign="middle" align="center">24.3 KiB + 7.03 MiB</td>
<td valign="middle" align="center">21.81</td>
</tr>
<tr>
<td valign="middle" align="center">24</td>
<td valign="middle" align="center">E6_R8_S24</td>
<td valign="middle" align="center">790.9K</td>
<td valign="middle" align="center">3.017</td>
<td valign="middle" align="center">97.00 <inline-formula>
<mml:math display="inline" id="im150"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.88</td>
<td valign="middle" align="center">96.18 <inline-formula>
<mml:math display="inline" id="im151"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.77</td>
<td valign="middle" align="center">115.56 <inline-formula>
<mml:math display="inline" id="im152"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 1.67</td>
<td valign="middle" align="center">4.10 <inline-formula>
<mml:math display="inline" id="im153"><mml:mo>&#xd7;</mml:mo></mml:math></inline-formula> 10<sup>8</sup></td>
<td valign="middle" align="center">3.03 MiB (ext.)</td>
<td valign="middle" align="center">24.4 KiB + 11.72 MiB</td>
<td valign="middle" align="center">50.51</td>
</tr>
<tr>
<td valign="middle" align="center">25</td>
<td valign="middle" align="center">E6_R16_S8</td>
<td valign="middle" align="center">80.5K</td>
<td valign="middle" align="center">0.307</td>
<td valign="middle" align="center">93.75 <inline-formula>
<mml:math display="inline" id="im154"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.47</td>
<td valign="middle" align="center">91.42 <inline-formula>
<mml:math display="inline" id="im155"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.68</td>
<td valign="middle" align="center">21.39 <inline-formula>
<mml:math display="inline" id="im156"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.24</td>
<td valign="middle" align="center">6.18 <inline-formula>
<mml:math display="inline" id="im157"><mml:mo>&#xd7;</mml:mo></mml:math></inline-formula> 10<sup>7</sup></td>
<td valign="middle" align="center">344.9 KiB</td>
<td valign="middle" align="center">24.2 KiB + 4.69 MiB</td>
<td valign="middle" align="center">9.35</td>
</tr>
<tr>
<td valign="middle" align="center">26</td>
<td valign="middle" align="center">E6_R16_S16</td>
<td valign="middle" align="center">291.1K</td>
<td valign="middle" align="center">1.110</td>
<td valign="middle" align="center">95.83 <inline-formula>
<mml:math display="inline" id="im158"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.79</td>
<td valign="middle" align="center">94.58 <inline-formula>
<mml:math display="inline" id="im159"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.66</td>
<td valign="middle" align="center">51.80 <inline-formula>
<mml:math display="inline" id="im160"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.90</td>
<td valign="middle" align="center">1.71 <inline-formula>
<mml:math display="inline" id="im161"><mml:mo>&#xd7;</mml:mo></mml:math></inline-formula> 10<sup>8</sup></td>
<td valign="middle" align="center">1.13 MiB</td>
<td valign="middle" align="center">24.4 KiB + 7.03 MiB</td>
<td valign="middle" align="center">22.64</td>
</tr>
<tr>
<td valign="middle" align="center">27</td>
<td valign="middle" align="center">E6_R16_S24</td>
<td valign="middle" align="center">643.7K</td>
<td valign="middle" align="center">2.456</td>
<td valign="middle" align="center">96.40 <inline-formula>
<mml:math display="inline" id="im162"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.91</td>
<td valign="middle" align="center">95.26 <inline-formula>
<mml:math display="inline" id="im163"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 0.81</td>
<td valign="middle" align="center">118.93 <inline-formula>
<mml:math display="inline" id="im164"><mml:mo>&#xb1;</mml:mo></mml:math></inline-formula> 1.76</td>
<td valign="middle" align="center">4.10 <inline-formula>
<mml:math display="inline" id="im165"><mml:mo>&#xd7;</mml:mo></mml:math></inline-formula> 10<sup>8</sup></td>
<td valign="middle" align="center">2.47 MiB (ext.)</td>
<td valign="middle" align="center">24.4 KiB + 11.72 MiB</td>
<td valign="middle" align="center">51.98</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>RED and BLUE indicate the first and second highest ranking results, respectively.</p></fn>
</table-wrap-foot>
</table-wrap>
<sec id="s3_2_1">
<label>3.2.1</label>
<title>Effect of expand ratio</title>
<p>The expansion ratio <italic>E</italic> determines the internal channel width within each IR block, thus shaping the model&#x2019;s feature capacity and non-linearity. When <italic>E</italic> increases from 3 to 6, both accuracy and F1-score improve steadily because wider intermediate layers capture more diverse spatial patterns. However, this benefit comes with a steep rise in computational cost and memory usage. For example, the configuration with <italic>E</italic> = 6 attains the best accuracy (97.29%) but incurs a ninefold increase in inference latency (118.34 ms versus 12.67 ms) and a 26-fold rise in flash memory compared with <italic>E</italic> = 3. Beyond a certain point, further expansion yields little accuracy gain but severely limits deployability on MCUs. In practice, a moderate setting of <italic>E</italic> = 4 offers the best balance&#x2014;achieving 96&#x2013;97% accuracy while keeping inference time below 90 ms and flash usage under 2 MB. This trend echoes prior findings on lightweight vision networks <xref ref-type="bibr" rid="B20">Sandler et&#xa0;al. (2018)</xref>, where moderate expansion preserves accuracy without sacrificing efficiency.</p>
</sec>
<sec id="s3_2_2">
<label>3.2.2</label>
<title>Effect of reduction ratio</title>
<p>The reduction ratio <italic>R</italic> in the channel attention (CA) module governs the degree of channel compression within the squeeze&#x2013;excitation branch. A small <italic>R</italic> (e.g., 4) retains more intermediate channels and strengthens attention responses to subtle inter-class variations such as leaf texture or vein details. Yet, this comes at the expense of a larger parameter count and memory demand&#x2014;for instance, increasing from 38.8 K parameters at <italic>R</italic> = 16 to 51.5 K at <italic>R</italic> = 4. In contrast, a large <italic>R</italic> (e.g., 16) overly compresses the feature descriptors, leading to minor but consistent accuracy drops (up to 2&#x2013;3%). Across all configurations, <italic>R</italic> = 8 provides the most balanced outcome: F1-scores remain above 94% while parameters are reduced by roughly one quarter compared to <italic>R</italic> = 4. Configurations with <italic>R</italic> = 8 also show fast inference (12&#x2013;115 ms) and stable energy use, confirming that moderate compression achieves the best trade-off between attention expressiveness and MCU-level efficiency. This observation further validates the lightweight attention strategy adopted in TinyWeedNet.</p>
</sec>
<sec id="s3_2_3">
<label>3.2.3</label>
<title>Effect of stem channels</title>
<p>The number of stem channels <italic>S</italic> controls the capacity of the initial convolution to extract low-level edges and texture features. Increasing <italic>S</italic> from 8 to 24 leads to clear improvements in accuracy and F1-score (typically 1&#x2013;3%), as richer early representations enhance gradient flow and feature discrimination. However, these benefits come with quadratic growth in both parameters and flash memory, from 0.196 MiB to 1.588 MiB, and require more data to be stored externally (up to 7.81 MiB), which adds energy overhead. Empirically, <italic>S</italic> = 24 represents a practical upper bound, offering strong feature extraction while staying within the 2 MB on-chip flash limit. Configurations beyond <italic>E</italic> = 6 or <italic>S</italic> = 24 exceeded STM32H7 storage capacity, emphasizing that architecture design in TinyML must be co-optimized with hardware constraints.</p>
</sec>
<sec id="s3_2_4">
<label>3.2.4</label>
<title>Overall validation of the proposed configuration</title>
<p>When jointly considering accuracy, latency, and on-chip memory limits, the configuration (E4 R8 S24) defining the proposed TinyWeedNet architecture demonstrates the most balanced performance among all 27 tested combinations. It achieves 97.26% accuracy and 96.64% F1-score while maintaining a moderate model size (1.815 MB) and an inference latency (89.40 ms) for agricultural robotic vision systems. Unlike typical brute-force neural architecture searches, this configuration was <italic>a priori</italic> derived from design principles emphasizing multi-scale diversity, moderate expansion, and lightweight attention. The subsequent sensitivity experiments validate that this combination yields the best accuracy&#x2013;efficiency&#x2013;energy synergy, with per-inference energy of 39.08 mJ - 8&#xd7; lower than the heaviest configuration (ID 21). Furthermore, the relatively small standard deviations across all trials (&#x2264;1%) indicate statistical robustness, confirming that TinyWeedNet&#x2019;s performance remains stable under random initialization and environmental noise.</p>
<p>Overall, the hyperparameter sensitivity analysis not only verifies TinyWeedNet&#x2019;s architectural choices but also provides generalizable insights for TinyML model design. In particular, (1) moderate expansion (E = 4-5) prevents accuracy saturation while ensuring low latency, (2) mid-level attention reduction (R = 8) balances precision and overhead, and (3) a strong but bounded stem (S = 24) offers maximal texture sensitivity without memory overflow. These findings collectively demonstrate that the TinyWeedNet configuration is hardware-aware, energy-efficient, and empirically validated for MCU deployment in weed classification scenarios.</p>
</sec>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Ablation study results</title>
<p><xref ref-type="table" rid="T5"><bold>Table&#xa0;5</bold></xref>; <xref ref-type="fig" rid="f5"><bold>Figure&#xa0;5</bold></xref> presents the ablation study results conducted to quantify the contribution of each architectural component in TinyWeedNet, including the multi-scale convolution, CA, depthwise separable convolution, and final 1 &#xd7; 1 projection layer. All variants were retrained under identical settings to isolate the effect of individual modules on both predictive accuracy and MCU deployment metrics.</p>
<table-wrap id="T5" position="float">
<label>Table&#xa0;5</label>
<caption>
<p>Ablation study of the proposed TinyWeedNet on model architecture and MCU deployment performance.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Experiment</th>
<th valign="middle" align="center">Params.</th>
<th valign="middle" align="center">Model size</th>
<th valign="middle" align="center">Accuracy (%)</th>
<th valign="middle" align="center">F1-score (%)</th>
<th valign="middle" align="center">Inf. time (ms)</th>
<th valign="middle" align="center">MACC</th>
<th valign="middle" align="center">Flash (int.)</th>
<th valign="middle" align="center">RAM (int. + ext.)</th>
<th valign="middle" align="center">Energy/Inf. (mJ)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">Baseline</td>
<td valign="middle" align="center">475K</td>
<td valign="middle" align="center">1.81 MB</td>
<td valign="middle" align="center">97.26 &#xb1; 0.72</td>
<td valign="middle" align="center">96.64 &#xb1; 0.76</td>
<td valign="middle" align="center">89.345 &#xb1; 1.021</td>
<td valign="middle" align="center">3.12&#xd7;10<sup>8</sup></td>
<td valign="middle" align="center">1.83 MiB</td>
<td valign="middle" align="center">24.4 KiB + 7.81 MiB</td>
<td valign="middle" align="center">39.08</td>
</tr>
<tr>
<td valign="middle" align="left">w/o MS Conv</td>
<td valign="middle" align="center">475K</td>
<td valign="middle" align="center">1.81 MB</td>
<td valign="middle" align="center">94.29 &#xb1; 0.67</td>
<td valign="middle" align="center">91.96 &#xb1; 0.55</td>
<td valign="middle" align="center">89.821 &#xb1; 0.606</td>
<td valign="middle" align="center">3.12&#xd7;10<sup>8</sup></td>
<td valign="middle" align="center">1.83 MiB</td>
<td valign="middle" align="center">18.56 KiB + 7.81 MiB</td>
<td valign="middle" align="center">39.23</td>
</tr>
<tr>
<td valign="middle" align="left">w/o Attention</td>
<td valign="middle" align="center">820K</td>
<td valign="middle" align="center">3.13 MB</td>
<td valign="middle" align="center">93.14 &#xb1; 0.78</td>
<td valign="middle" align="center">90.75 &#xb1; 0.69</td>
<td valign="middle" align="center">86.893 &#xb1; 0.661</td>
<td valign="middle" align="center">3.10&#xd7;10<sup>8</sup></td>
<td valign="middle" align="center">3.15 MiB (ext.)</td>
<td valign="middle" align="center">10.88 KiB + 7.81 MiB</td>
<td valign="middle" align="center">38.00</td>
</tr>
<tr>
<td valign="middle" align="left">w/o DepthwiseConv</td>
<td valign="middle" align="center">5.10M</td>
<td valign="middle" align="center">19.72 MB</td>
<td valign="middle" align="center">96.68 &#xb1; 0.92</td>
<td valign="middle" align="center">95.58 &#xb1; 0.86</td>
<td valign="middle" align="center">593.423 &#xb1; 4.960</td>
<td valign="middle" align="center">1.60&#xd7;10<sup>9</sup></td>
<td valign="middle" align="center">19.74 MiB (ext.)</td>
<td valign="middle" align="center">18.95 KiB + 7.81 MiB</td>
<td valign="middle" align="center">259.16</td>
</tr>
<tr>
<td valign="middle" align="left">w/o Final Conv1&#xd7;1</td>
<td valign="middle" align="center">445K</td>
<td valign="middle" align="center">1.69 MB</td>
<td valign="middle" align="center">94.69 &#xb1; 0.71</td>
<td valign="middle" align="center">92.78 &#xb1; 0.59</td>
<td valign="middle" align="center">88.577 &#xb1; 0.830</td>
<td valign="middle" align="center">3.10&#xd7;10<sup>8</sup></td>
<td valign="middle" align="center">1.71 MiB</td>
<td valign="middle" align="center">17.94 KiB + 7.81 MiB</td>
<td valign="middle" align="center">38.69</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>RED and BLUE indicate the first and second highest ranking results, respectively.</p></fn>
</table-wrap-foot>
</table-wrap>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>Trade-off between F1-score and energy consumption per inference for different TinyWeedNet ablation variants on the target MCU. Bubble size represents the model size. The x-axis is broken to highlight the low-energy region (30&#x2013;40 mJ), while the high-energy outlier (w/o DepthwiseConv) is shown separately.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1747863-g005.tif">
<alt-text content-type="machine-generated">Scatter plot with two panels comparing ablation variants of a machine learning model. Left panel shows F1-score versus energy per inference for five variants, with Baseline scoring highest and lowest energy. Right panel highlights a large red point for &#x201c;w/o DepthwiseConv&#x201d; with higher energy, F1-score near Baseline, and much larger model size. Bubble sizes represent model size, with a legend showing examples of 1.69, 1.81, and 19.72 megabytes. Error bars reflect variability.</alt-text>
</graphic></fig>
<sec id="s3_3_1">
<label>3.3.1</label>
<title>Impact of multi-scale convolution</title>
<p>Removing the multi-scale convolution branch (&#x201c;w/o MS Conv&#x201d;) leads to a drop in accuracy (from 97.26% to 94.29%) and F1-score (from 96.64% to 91.96%), confirming that the multi-scale design (integrating kernels of 1 &#xd7; 1, 3 &#xd7; 3, and 5 &#xd7; 5) is essential for capturing spatial features at multiple receptive-field resolutions. Interestingly, both inference time and energy consumption remain almost unchanged (&#x394;0.48 ms, &#x394;0.15 mJ), implying that the MS block introduces negligible computational cost relative to its gain in&#xa0;representational richness. In terms of parameter efficiency, the&#xa0;baseline achieves 204.8%/M accuracy-per-parameter (APP), while &#x201c;w/o MS Conv&#x201d; drops to 198.5%/M, highlighting that MS&#xa0;not only improves raw accuracy but also increases accuracy density per parameter. The energy-per-MACC also remains stable (&#x2248;1.25&#xd7;10<sup>&#x2212;10</sup> J/MACC), demonstrating that the gain is architectural rather than computational.</p>
</sec>
<sec id="s3_3_2">
<label>3.3.2</label>
<title>Impact of channel attention</title>
<p>Disabling the channel attention mechanism (&#x201c;w/o Attention&#x201d;) decreases F1-score to 90.75%, illustrating a loss of discriminative ability in feature reweighting. Although inference becomes slightly faster (86.9 ms) with lower energy consumption (38.0 mJ), this accuracy&#x2013;efficiency trade-off is unfavorable for real deployments. Moreover, the variant unexpectedly increases model parameters (820K vs. 475K), confirming that the lightweight CA module not only strengthens feature expressiveness but also imposes a regularizing effect, allowing the backbone to remain compact without losing performance. From an energy&#x2013;delay perspective, the Energy&#x2013;Delay Product (EDP) improves only marginally (3.30&#xd7;10<sup>3</sup> vs. 3.49&#xd7;10<sup>3</sup> mJ&#xb7;ms), while accuracy drops significantly, making the baseline configuration more Pareto-optimal. Overall, the CA module with reduction ratio <italic>R</italic> = 8 offers the best synergy between attention strength and hardware efficiency, ensuring both high accuracy and stable runtime.</p>
</sec>
<sec id="s3_3_3">
<label>3.3.3</label>
<title>Impact of depthwise separable convolution</title>
<p>Replacing all depthwise separable convolutions with standard <inline-formula>
<mml:math display="inline" id="im166"><mml:mrow><mml:mn>3</mml:mn><mml:mo>&#xd7;</mml:mo><mml:mn>3</mml:mn></mml:mrow></mml:math></inline-formula> convolutions (&#x201c;w/o DepthwiseConv&#x201d;) results in a catastrophic increase in parameter count from 0.48 M to 5.10 M, inflating model size over tenfold (1.8 MB <inline-formula>
<mml:math display="inline" id="im167"><mml:mo>&#x2192;</mml:mo></mml:math></inline-formula> 19.7 MB) and MACC operations by 5.13&#xd7;. While the accuracy remains high (96.68%), the inference latency rises from 89.3 ms to 593.4 ms, and energy consumption surges from 39.1 mJ to 259.2 mJ. This translates to a 44&#xd7; increase in EDP (1.54 <inline-formula>
<mml:math display="inline" id="im168"><mml:mo>&#xd7;</mml:mo></mml:math></inline-formula> 105 mJ&#xb7;ms), making this variant less attractive for MCU applications. The per-MACC energy also deteriorates (0.162 nJ/MACC vs. 0.125 nJ/MACC), implying that standard convolutions magnify memory traffic and data-movement overhead. This confirms that depthwise separable convolutions are the dominant contributor to TinyWeedNet&#x2019;s computational efficiency, maintaining high accuracy while minimizing latency and energy cost.</p>
</sec>
<sec id="s3_3_4">
<label>3.3.4</label>
<title>Impact of final <inline-formula>
<mml:math display="inline" id="im169"><mml:mrow><mml:mn>1</mml:mn><mml:mo>&#xd7;</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:math></inline-formula> Projection Layer</title>
<p>Removing the final 1 &#xd7; 1 projection layer before global average pooling (&#x201c;w/o Final Conv1&#xd7;1&#x201d;) leads to an accuracy reduction from 97.26% to 94.69% with a slight decrease in model size (1.81 MB &#x2192; 1.69 MB). This projection consolidates high-level semantic features and improves inter-class separability prior to classification. Its computational overhead is minimal (0.3ms difference) and energy consumption remains virtually unchanged (38.7 mJ), demonstrating that its inclusion provides substantial accuracy gains with negligible runtime penalty. Although its accuracy-per-parameter (212.8%/M) seems higher numerically due to the reduced parameter count, the absolute performance loss of over 2.5% indicates that this layer is essential for final-stage feature fusion and stable convergence.</p>
</sec>
<sec id="s3_3_5">
<label>3.3.5</label>
<title>Cross-module interactions and deployment implications</title>
<p>The ablation experiments also reveal non-linear interactions among components:</p>
<list list-type="bullet">
<list-item>
<p>Multi-Scale Convolutions&#x2194;Channel Attention synergy: The multi-scale features enhance the effectiveness of channel attention by providing diverse frequency and scale cues. Removing either module causes a super-linear drop (4&#x2013;6% in F1-score) despite unchanged latency, underscoring their coupled role in maintaining feature robustness.</p></list-item>
<list-item>
<p>Depthwise Convolutions&#x2194;Memory hierarchy: Depthwise convolutions help keep intermediate feature maps within on-chip SRAM, avoiding off-chip DRAM transfers. Once replaced by standard convolutions, the model exceeds internal flash limits (19.7 MB), forcing partial external memory access and introducing severe DMA stalls that amplify energy cost beyond what MACC scaling alone predicts.</p></list-item>
<list-item>
<p>Projection&#x2194;Classifier alignment: The final 1 &#xd7; 1 layer acts as a semantic bottleneck aligning channel responses for the classifier head. Its absence reduces F1-score by nearly 4% with minimal runtime impact, confirming its role as a structural regularizer rather than a compute burden.</p></list-item>
</list>
<p>These interactions highlight that removing modules based solely on FLOP reduction can be misleading in TinyML contexts; instead, optimizing for on-chip memory residency, energy-per-MACC stability, and accuracy-per-parameter density yields better deployment efficiency.</p>
</sec>
</sec>
<sec id="s3_4">
<label>3.4</label>
<title>Robustness analysis results</title>
<p><xref ref-type="table" rid="T6"><bold>Table&#xa0;6</bold></xref> summarizes the robustness performance of the proposed model on the DeepWeeds test set under controlled domain shifts. Overall, the model exhibits stable behavior under moderate perturbations, with a gradual and bounded degradation in F1-score as the severity level increases.</p>
<table-wrap id="T6" position="float">
<label>Table&#xa0;6</label>
<caption>
<p>TinyWeedNet robustness evaluation on the DeepWeeds test set under controlled domain shifts.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Corruption Type</th>
<th valign="middle" align="center">Severity</th>
<th valign="middle" align="center">F1-score (%)</th>
<th valign="middle" align="center">&#x394;F1-score (%)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">Clean</td>
<td valign="middle" align="center">&#x2013;</td>
<td valign="middle" align="center">96.1</td>
<td valign="middle" align="center">0.0</td>
</tr>
<tr>
<td valign="top" rowspan="3" align="left">Illumination (Brightness)</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">95.7</td>
<td valign="middle" align="center">-0.4</td>
</tr>
<tr>
<td valign="middle" align="center">2</td>
<td valign="middle" align="center">95.0</td>
<td valign="middle" align="center">-1.1</td>
</tr>
<tr>
<td valign="middle" align="center">3</td>
<td valign="middle" align="center">94.0</td>
<td valign="middle" align="center">-2.1</td>
</tr>
<tr>
<td valign="top" rowspan="3" align="left">Illumination (Contrast)</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">95.4</td>
<td valign="middle" align="center">-0.7</td>
</tr>
<tr>
<td valign="middle" align="center">2</td>
<td valign="middle" align="center">94.5</td>
<td valign="middle" align="center">-1.6</td>
</tr>
<tr>
<td valign="middle" align="center">3</td>
<td valign="middle" align="center">93.1</td>
<td valign="middle" align="center">-3.0</td>
</tr>
<tr>
<td valign="top" rowspan="3" align="left">Illumination (Gamma)</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">95.5</td>
<td valign="middle" align="center">-0.6</td>
</tr>
<tr>
<td valign="middle" align="center">2</td>
<td valign="middle" align="center">94.6</td>
<td valign="middle" align="center">-1.5</td>
</tr>
<tr>
<td valign="middle" align="center">3</td>
<td valign="middle" align="center">93.4</td>
<td valign="middle" align="center">-2.7</td>
</tr>
<tr>
<td valign="top" rowspan="3" align="left">Illumination (White balance)</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">95.3</td>
<td valign="middle" align="center">-0.8</td>
</tr>
<tr>
<td valign="middle" align="center">2</td>
<td valign="middle" align="center">94.2</td>
<td valign="middle" align="center">-1.9</td>
</tr>
<tr>
<td valign="middle" align="center">3</td>
<td valign="middle" align="center">92.8</td>
<td valign="middle" align="center">-3.3</td>
</tr>
<tr>
<td valign="top" rowspan="3" align="left">Weather (Gaussian blur)</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">94.8</td>
<td valign="middle" align="center">-1.3</td>
</tr>
<tr>
<td valign="middle" align="center">2</td>
<td valign="middle" align="center">93.5</td>
<td valign="middle" align="center">-2.6</td>
</tr>
<tr>
<td valign="middle" align="center">3</td>
<td valign="middle" align="center">91.5</td>
<td valign="middle" align="center">-4.6</td>
</tr>
<tr>
<td valign="top" rowspan="3" align="left">Soil/background proxy (Hue shift)</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">95.6</td>
<td valign="middle" align="center">-0.5</td>
</tr>
<tr>
<td valign="middle" align="center">2</td>
<td valign="middle" align="center">94.8</td>
<td valign="middle" align="center">-1.3</td>
</tr>
<tr>
<td valign="middle" align="center">3</td>
<td valign="middle" align="center">93.8</td>
<td valign="middle" align="center">-2.3</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Severity levels 1, 2, and 3 correspond to mild, moderate, and severe perturbations, respectively, with increasing transformation strength for illumination variations, Gaussian blur, and background color shifts.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>For illumination-related variations, including brightness, contrast, gamma correction, and white-balance shifts, the performance degradation remains limited. Across all illumination perturbations, the maximum F1-score drop at the most severe level (Severity 3) does not exceed 3.3 percentage points. In particular, brightness and gamma adjustments result in relatively small performance changes, indicating that the model is robust to common exposure fluctuations and non-linear illumination effects encountered in outdoor agricultural environments. White-balance shifts introduce slightly larger degradation, suggesting increased sensitivity to extreme color temperature variations.</p>
<p>Weather-induced degradation modeled by Gaussian blur leads to the largest performance decrease among all tested perturbations. At Severity 3, the F1-score drop reaches 4.6 percentage points, reflecting the challenge posed by reduced image sharpness due to motion, defocus, or adverse weather conditions. Nevertheless, the model maintains a high absolute F1-score of 91.5% even under severe blur, indicating a reasonable level of robustness in visually degraded scenarios.</p>
<p>Background-related variations approximated by hue shifts result in relatively minor performance degradation. Even at the highest severity level, the F1-score drop is limited to 2.3 percentage points, suggesting that the model is not overly sensitive to moderate changes in soil appearance or background color distributions. This behavior is desirable for deployment across different agricultural fields with varying soil and vegetation characteristics.</p>
<p>In summary, the robustness analysis demonstrates that the proposed model maintains consistent classification performance under a range of controlled appearance variations, with the most significant sensitivity observed under severe blur conditions. These results indicate that the model is well-suited for real-world agricultural deployment where illumination and background variations are common, while also highlighting weather-induced image degradation as a key factor for future improvement.</p>
</sec>
</sec>
<sec id="s4" sec-type="conclusions">
<label>4</label>
<title>Conclusions and future work</title>
<p>In this study, we developed TinyWeedNet, a hardware-efficient lightweight CNN designed for real-time on-device weed identification in precision agriculture. Through a TinyML deployment workflow, the model was successfully implemented on an STM32H7 microcontroller, achieving 97.26% accuracy on the DeepWeeds dataset with sub-90 ms inference latency and an average energy cost of approximately 39 mJ per prediction. These results confirm that lightweight neural architectures, when carefully co-designed with hardware constraints, can support robust and responsive weed identification on low-power embedded platforms.</p>
<p>Comprehensive benchmark comparisons, hyperparameter sensitivity analyses, and ablation experiments further revealed how architectural factors, including expansion ratio, channel attention reduction, and stem width, shape performance under resource limitations. The configuration using a moderate expansion ratio (E = 4), compact attention reduction (R = 8), and a 24-channel stem achieved the best balance between accuracy, computational efficiency, and energy consumption. Ablation findings emphasized the importance of multi-scale feature extraction and lightweight attention mechanisms for maintaining high recognition performance, while highlighting the significant cost of removing depthwise separable convolutions. Together, these results underscore the value of hardware-aware architectural design in developing practical TinyML models for agricultural applications.</p>
<p>Beyond model development, this work contributes a reproducible evaluation methodology grounded in physically measured metrics (accuracy, latency, memory usage, and energy per inference). Such measurement-driven characterization provides an actionable energy&#x2013;accuracy&#x2013;latency map that can guide the design and optimization of embedded vision systems for autonomous field robots, UAVs, and distributed sensing nodes in smart farming.</p>
<p>The compact footprint and low power demand of TinyWeedNet make it particularly suitable for long-term, battery-powered or energy-harvesting agricultural deployments, where continuous in-field operation is required without reliance on cloud connectivity. Future research will extend the present work along several complementary directions to further enhance the practical applicability of TinyWeedNet in real-world agricultural deployments. First, a comprehensive cross-platform evaluation will be conducted across heterogeneous embedded hardware, including multiple MCU families (e.g., ESP32 and nRF52) as well as emerging RISC-V&#x2013;based system-on-chips. This study will systematically benchmark classification accuracy, inference latency, memory footprint, and energy consumption under realistic deployment constraints, providing a more general assessment beyond a single hardware platform. Second, although depthwise separable convolutions were adopted in this work to achieve a favorable accuracy&#x2013;efficiency trade-off, additional hardware-aware optimization techniques will be explored. These include grouped convolutions, mixed-precision inference, and adaptive or fine-grained quantization strategies, aiming to further reduce computational overhead and memory usage on severely resource-constrained devices. Third, the current study relies exclusively on RGB imagery. We acknowledge that incorporating multispectral or temporal information has the potential to significantly improve robustness under varying illumination conditions, soil backgrounds, crop growth stages, and environmental factors. Due to space limitations and the primary objective of assessing the feasibility of TinyML-based weed classification on ultra-low-power devices, such extensions were not included in this work. Nevertheless, we are actively preparing follow-up studies leveraging high-throughput multispectral plant phenotyping platforms, with multimodal data fusion forming a core component of future research. In terms of robustness and generalization, future evaluations will be extended to multiple crop types and farm-scale datasets, in conjunction with diverse embedded platforms. This will enable a more comprehensive analysis of cross-domain generalization beyond a single dataset or hardware configuration. Finally, although TinyWeedNet is designed as a classification model, prior studies have demonstrated the feasibility of deploying image segmentation models on microcontrollers. Building upon this foundation, future work will investigate MCU-compatible weed segmentation and localization models, enabling closed-loop perception&#x2013;control&#x2013;actuation pipelines for fully autonomous and energy-efficient weed management systems.</p>
<p>In summary, this study demonstrates that TinyML-oriented lightweight CNNs, when systematically evaluated and optimized using hardware-level measurements, offer a viable pathway toward reliable, energy-efficient, and autonomous weed identification in precision agriculture.</p>
</sec>
</body>
<back>
<sec id="s5" sec-type="data-availability">
<title>Data availability statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation.</p></sec>
<sec id="s6" sec-type="author-contributions">
<title>Author contributions</title>
<p>YZ: Software, Data curation, Writing &#x2013; original draft, Resources, Conceptualization, Investigation, Visualization, Methodology, Funding acquisition, Validation, Formal analysis. YL: Writing &#x2013; review &amp; editing. LM-R: Writing &#x2013; review &amp; editing. QQ: Writing &#x2013; review &amp; editing. SB: Supervision, Funding acquisition, Resources, Writing &#x2013; review &amp; editing.</p></sec>
<sec id="s8" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p></sec>
<sec id="s9" sec-type="ai-statement">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If&#xa0;you identify any issues, please contact us.</p></sec>
<sec id="s10" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p></sec>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Alshuhail</surname> <given-names>A.</given-names></name>
<name><surname>Mengash</surname> <given-names>H. A.</given-names></name>
<name><surname>Alanazi</surname> <given-names>M. H.</given-names></name>
<name><surname>Saeed</surname> <given-names>M. K.</given-names></name>
<name><surname>Ghaleb</surname> <given-names>M.</given-names></name>
<name><surname>Al Duhayyim</surname> <given-names>M.</given-names></name>
<etal/>
</person-group>. (<year>2025</year>). 
<article-title>Tinyml-enabled structural health monitoring for real-time anomaly detection in civil infrastructure</article-title>. <source>Alexandria. Eng. J.</source> <volume>129</volume>, <fpage>1340</fpage>&#x2013;<lpage>1348</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.aej.2025.08.046</pub-id>
</mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Anand</surname> <given-names>T.</given-names></name>
<name><surname>Sinha</surname> <given-names>S.</given-names></name>
<name><surname>Mandal</surname> <given-names>M.</given-names></name>
<name><surname>Chamola</surname> <given-names>V.</given-names></name>
<name><surname>Yu</surname> <given-names>F. R.</given-names></name>
</person-group> (<year>2021</year>). 
<article-title>Agrisegnet: Deep aerial semantic segmentation framework for iot-assisted precision agriculture</article-title>. <source>IEEE Sens. J.</source> <volume>21</volume>, <fpage>17581</fpage>&#x2013;<lpage>17590</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/JSEN.2021.3071290</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Asante</surname> <given-names>N. O.</given-names></name>
<name><surname>Mei</surname> <given-names>L.</given-names></name>
<name><surname>Wang</surname> <given-names>X.</given-names></name>
<name><surname>Magno</surname> <given-names>M.</given-names></name>
</person-group> (<year>2025</year>). &#x201c;
<article-title>Tinyeegconformer: An attention-based eeg decoding model for embedded systems</article-title>,&#x201d; in <conf-name>2025 IEEE Sensors Applications Symposium (SAS)</conf-name>. (<publisher-loc>Newcastle, United Kingdom</publisher-loc>) <fpage>1</fpage>&#x2013;<lpage>6</lpage>.
</mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Chollet</surname> <given-names>F.</given-names></name>
</person-group> (<year>2017</year>). &#x201c;
<article-title>Xception: Deep learning with depthwise separable convolutions</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE conference on computer vision and pattern recognition</conf-name>. (<publisher-loc>Honolulu, HI, USA</publisher-loc>) <fpage>1251</fpage>&#x2013;<lpage>1258</lpage>.
</mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Czymmek</surname> <given-names>V.</given-names></name>
<name><surname>Harders</surname> <given-names>L. O.</given-names></name>
<name><surname>Knoll</surname> <given-names>F. J.</given-names></name>
<name><surname>Hussmann</surname> <given-names>S.</given-names></name>
</person-group> (<year>2019</year>). &#x201c;
<article-title>Vision-based deep learning approach for real-time detection of weeds in organic farming</article-title>,&#x201d; in <conf-name>2019 IEEE International Instrumentation and Measurement Technology Conference (I2MTC)</conf-name>. (<publisher-loc>Auckland, New Zealand</publisher-loc>) <fpage>1</fpage>&#x2013;<lpage>5</lpage>.
</mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Dadashzadeh</surname> <given-names>M.</given-names></name>
<name><surname>Abbaspour-Gilandeh</surname> <given-names>Y.</given-names></name>
<name><surname>Mesri-Gundoshmian</surname> <given-names>T.</given-names></name>
<name><surname>Sabzi</surname> <given-names>S.</given-names></name>
<name><surname>Arribas</surname> <given-names>J. I.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>A stereoscopic video computer vision system for weed discrimination in rice field under both natural and controlled light conditions by machine learning</article-title>. <source>Measurement</source> <volume>237</volume>, <fpage>115072</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.measurement.2024.115072</pub-id>
</mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Garibaldi-M&#xe1;rquez</surname> <given-names>F.</given-names></name>
<name><surname>Flores</surname> <given-names>G.</given-names></name>
<name><surname>Mercado-Ravell</surname> <given-names>D. A.</given-names></name>
<name><surname>Ram&#xed;rez-Pedraza</surname> <given-names>A.</given-names></name>
<name><surname>Valent&#xed;n-Coronado</surname> <given-names>L. M.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>Weed classification from natural corn field-multi-plant images based on shallow and deep learning</article-title>. <source>Sensors</source> <volume>22</volume>, <fpage>3021</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/s22083021</pub-id>, PMID: <pub-id pub-id-type="pmid">35459006</pub-id>
</mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>He</surname> <given-names>K.</given-names></name>
<name><surname>Zhang</surname> <given-names>X.</given-names></name>
<name><surname>Ren</surname> <given-names>S.</given-names></name>
<name><surname>Sun</surname> <given-names>J.</given-names></name>
</person-group> (<year>2016</year>). &#x201c;
<article-title>Deep residual learning for image recognition</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE conference on computer vision and pattern recognition</conf-name>. (<publisher-loc>Las Vegas, NV, USA</publisher-loc>) <fpage>770</fpage>&#x2013;<lpage>778</lpage>.
</mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Holmes</surname> <given-names>W. S.</given-names></name>
<name><surname>Ooi</surname> <given-names>M. P.-L.</given-names></name>
<name><surname>Look</surname> <given-names>M.</given-names></name>
<name><surname>Chow</surname> <given-names>Y.</given-names></name>
<name><surname>Simpkin</surname> <given-names>R.</given-names></name>
<name><surname>Blanchon</surname> <given-names>D.</given-names></name>
<etal/>
</person-group>. (<year>2019</year>). &#x201c;
<article-title>Proximal near-infrared spectral reflectance characterisation of weeds species in New Zealand pasture</article-title>,&#x201d; in <conf-name>2019 IEEE International Instrumentation and Measurement Technology Conference (I2MTC)</conf-name>. (<publisher-loc>Auckland, New Zealand</publisher-loc>) <fpage>1</fpage>&#x2013;<lpage>6</lpage>.
</mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Hussmann</surname> <given-names>S.</given-names></name>
<name><surname>Wang</surname> <given-names>Y.</given-names></name>
<name><surname>Czymmek</surname> <given-names>V.</given-names></name>
<name><surname>Knoll</surname> <given-names>F. J.</given-names></name>
</person-group> (<year>2019</year>). &#x201c;
<article-title>Image matching algorithm for weed control applications in organic farming</article-title>,&#x201d; in <conf-name>2019 IEEE International Instrumentation and Measurement Technology Conference (I2MTC)</conf-name>. (<publisher-loc>Auckland, New Zealand</publisher-loc>) <fpage>1</fpage>&#x2013;<lpage>6</lpage>.
</mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Iandola</surname> <given-names>F. N.</given-names></name>
<name><surname>Han</surname> <given-names>S.</given-names></name>
<name><surname>Moskewicz</surname> <given-names>M. W.</given-names></name>
<name><surname>Ashraf</surname> <given-names>K.</given-names></name>
<name><surname>Dally</surname> <given-names>W. J.</given-names></name>
<name><surname>Keutzer</surname> <given-names>K.</given-names></name>
</person-group> (<year>2016</year>). 
<article-title>Squeezenet: Alexnet-level accuracy with 50x fewer parameters and&#xa1; 0.5 mb model size</article-title>. <source>arXiv</source>. Available online at: <uri xlink:href="https://arxiv.org/abs/1602.07360">https://arxiv.org/abs/1602.07360</uri>. 
</mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Ishak</surname> <given-names>A. J.</given-names></name>
<name><surname>Hussain</surname> <given-names>A.</given-names></name>
<name><surname>Mustafa</surname> <given-names>M. M.</given-names></name>
</person-group> (<year>2009</year>). 
<article-title>Weed image classification using gabor wavelet and gradient field distribution</article-title>. <source>Comput. Electron. Agric.</source> <volume>66</volume>, <fpage>53</fpage>&#x2013;<lpage>61</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.compag.2008.12.003</pub-id>
</mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Jim&#xe9;nez</surname> <given-names>A.-F.</given-names></name>
<name><surname>Ortiz</surname> <given-names>B. V.</given-names></name>
<name><surname>Lena</surname> <given-names>B. P.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Real-time mapping of crop canopy temperature using a wireless network of infrared thermometers on a central pivot</article-title>. <source>Measurement</source> <volume>230</volume>, <fpage>114570</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.measurement.2024.114570</pub-id>
</mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Liu</surname> <given-names>J.</given-names></name>
<name><surname>Abbas</surname> <given-names>I.</given-names></name>
<name><surname>Noor</surname> <given-names>R. S.</given-names></name>
</person-group> (<year>2021</year>). 
<article-title>Development of deep learning-based variable rate agrochemical spraying system for targeted weeds control in strawberry crop</article-title>. <source>Agronomy</source> <volume>11</volume>, <fpage>1480</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/agronomy11081480</pub-id>
</mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Lottes</surname> <given-names>P.</given-names></name>
<name><surname>Khanna</surname> <given-names>R.</given-names></name>
<name><surname>Pfeifer</surname> <given-names>J.</given-names></name>
<name><surname>Siegwart</surname> <given-names>R.</given-names></name>
<name><surname>Stachniss</surname> <given-names>C.</given-names></name>
</person-group> (<year>2017</year>). &#x201c;
<article-title>Uav-based crop and weed classification for smart farming</article-title>,&#x201d; in <conf-name>2017 IEEE international conference on robotics and automation (ICRA)</conf-name>. <fpage>3024</fpage>&#x2013;<lpage>3031</lpage> (<publisher-loc>Marina Bay Sands in Singapore</publisher-loc>: 
<publisher-name>IEEE</publisher-name>).
</mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Luo</surname> <given-names>T.</given-names></name>
<name><surname>Zhao</surname> <given-names>J.</given-names></name>
<name><surname>Gu</surname> <given-names>Y.</given-names></name>
<name><surname>Zhang</surname> <given-names>S.</given-names></name>
<name><surname>Qiao</surname> <given-names>X.</given-names></name>
<name><surname>Tian</surname> <given-names>W.</given-names></name>
<etal/>
</person-group>. (<year>2023</year>). 
<article-title>Classification of weed seeds based on visual images and deep learning</article-title>. <source>Inf. Process. Agric.</source> <volume>10</volume>, <fpage>40</fpage>&#x2013;<lpage>51</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.inpa.2021.10.002</pub-id>
</mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Maheswaran</surname> <given-names>S.</given-names></name>
<name><surname>Sathesh</surname> <given-names>S.</given-names></name>
<name><surname>Gomathi</surname> <given-names>R.</given-names></name>
<name><surname>Indhumathi</surname> <given-names>N.</given-names></name>
<name><surname>Prasanth</surname> <given-names>S.</given-names></name>
<name><surname>Charumathi</surname> <given-names>K.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). &#x201c;
<article-title>Automated weed identification and classification using artificial intelligence</article-title>,&#x201d; in <conf-name>2024 15th International Conference on Computing Communication and Networking Technologies (ICCCNT)</conf-name>. (<publisher-loc>Kamand, India</publisher-loc>) <fpage>1</fpage>&#x2013;<lpage>7</lpage> (
<publisher-name>IEEE</publisher-name>).
</mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Olsen</surname> <given-names>A.</given-names></name>
<name><surname>Konovalov</surname> <given-names>D. A.</given-names></name>
<name><surname>Philippa</surname> <given-names>B.</given-names></name>
<name><surname>Ridd</surname> <given-names>P.</given-names></name>
<name><surname>Wood</surname> <given-names>J. C.</given-names></name>
<name><surname>Johns</surname> <given-names>J.</given-names></name>
<etal/>
</person-group>. (<year>2019</year>). 
<article-title>Deepweeds: A multiclass weed species image dataset for deep learning</article-title>. <source>Sci. Rep.</source> <volume>9</volume>, <fpage>2058</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-018-38343-3</pub-id>, PMID: <pub-id pub-id-type="pmid">30765729</pub-id>
</mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Parvis</surname> <given-names>M.</given-names></name>
<name><surname>Pirola</surname> <given-names>M.</given-names></name>
</person-group> (<year>1999</year>). 
<article-title>A measurement system for on-line estimation of weed coverage</article-title>. <source>IEEE Trans. Instrum. Meas.</source> <volume>48</volume>, <fpage>990</fpage>&#x2013;<lpage>994</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/19.799659</pub-id>
</mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Sandler</surname> <given-names>M.</given-names></name>
<name><surname>Howard</surname> <given-names>A.</given-names></name>
<name><surname>Zhu</surname> <given-names>M.</given-names></name>
<name><surname>Zhmoginov</surname> <given-names>A.</given-names></name>
<name><surname>Chen</surname> <given-names>L.-C.</given-names></name>
</person-group> (<year>2018</year>). &#x201c;
<article-title>Mobilenetv2: Inverted residuals and linear bottlenecks</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE&#xa0;conference on computer vision and pattern recognition</conf-name>. (<publisher-loc>Salt Lake City, UT, USA</publisher-loc>) <fpage>4510</fpage>&#x2013;<lpage>4520</lpage>.
</mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Sathesh</surname> <given-names>S.</given-names></name>
<name><surname>Maheswaran</surname> <given-names>S.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>The design and development of delta arm for&#xa0;multi-purpose agribots</article-title>. <source>IETE. J. Res.</source> <volume>70</volume>, <fpage>7526</fpage>&#x2013;<lpage>7536</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1080/03772063.2024.2350934</pub-id>
</mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Sathesh</surname> <given-names>S.</given-names></name>
<name><surname>Maheswaran</surname> <given-names>S.</given-names></name>
<name><surname>Navanithi</surname> <given-names>K.</given-names></name>
<name><surname>Dhivakar</surname> <given-names>R.</given-names></name>
<name><surname>Dhushyanth</surname> <given-names>R.</given-names></name>
<name><surname>Ajay</surname> <given-names>K.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). &#x201c;
<article-title>Farmoline-the new age agro drone in modern embedded agriculture to spray healthy organic pesticides</article-title>,&#x201d; in <conf-name>2024 15th International Conference on Computing Communication and Networking Technologies (ICCCNT)</conf-name>. (<publisher-loc>Kamand, India</publisher-loc>) <fpage>1</fpage>&#x2013;<lpage>7</lpage> (
<publisher-name>IEEE</publisher-name>).
</mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Sathesh</surname> <given-names>S.</given-names></name>
<name><surname>Maheswaran</surname> <given-names>S.</given-names></name>
<name><surname>Sridhar</surname> <given-names>N.</given-names></name>
<name><surname>Dhivakar</surname> <given-names>R.</given-names></name>
<name><surname>Dhushyanth</surname> <given-names>R.</given-names></name>
<name><surname>Ajay</surname> <given-names>K.</given-names></name>
</person-group> (<year>2025</year>a). &#x201c;
<article-title>Design and development of quadruped robot for carrying and operating agro tools in agricultural fields</article-title>,&#x201d; in <conf-name>2025 International Conference on Electronics, Computing, Communication and Control Technology (ICECCC)</conf-name>. (<publisher-loc>Bengaluru, India</publisher-loc>) <fpage>1</fpage>&#x2013;<lpage>6</lpage> (
<publisher-name>IEEE</publisher-name>).
</mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Sathesh</surname> <given-names>S.</given-names></name>
<name><surname>Maheswaran</surname> <given-names>S.</given-names></name>
<name><surname>Venkateswaran</surname> <given-names>K.</given-names></name>
<name><surname>Hariharini</surname> <given-names>V.</given-names></name>
<name><surname>Indhu Priya</surname> <given-names>D.</given-names></name>
<name><surname>Hemamalini</surname> <given-names>M.</given-names></name>
</person-group> (<year>2025</year>b). &#x201c;
<article-title>Design and development of end effector for weed removal</article-title>,&#x201d; in <conf-name>2025 International Conference on Electronics, Computing, Communication and Control Technology (ICECCC)</conf-name>. <fpage>1</fpage>&#x2013;<lpage>7</lpage> (<publisher-loc>Bengaluru, India</publisher-loc>: 
<publisher-name>IEEE</publisher-name>).
</mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Simonyan</surname> <given-names>K.</given-names></name>
<name><surname>Zisserman</surname> <given-names>A.</given-names></name>
</person-group> (<year>2014</year>). 
<article-title>Very deep convolutional networks for large-scale image recognition</article-title>. <source>arXiv</source>. Available online at: <uri xlink:href="https://arxiv.org/abs/1409.1556">https://arxiv.org/abs/1409.1556</uri>.
</mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Sunil</surname> <given-names>G.</given-names></name>
<name><surname>Zhang</surname> <given-names>Y.</given-names></name>
<name><surname>Koparan</surname> <given-names>C.</given-names></name>
<name><surname>Ahmed</surname> <given-names>M. R.</given-names></name>
<name><surname>Howatt</surname> <given-names>K.</given-names></name>
<name><surname>Sun</surname> <given-names>X.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>Weed and crop species classification using computer vision and deep learning technologies in greenhouse conditions</article-title>. <source>J. Agric. Food Res.</source> <volume>9</volume>, <fpage>100325</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jafr.2022.100325</pub-id>
</mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Szegedy</surname> <given-names>C.</given-names></name>
<name><surname>Vanhoucke</surname> <given-names>V.</given-names></name>
<name><surname>Ioffe</surname> <given-names>S.</given-names></name>
<name><surname>Shlens</surname> <given-names>J.</given-names></name>
<name><surname>Wojna</surname> <given-names>Z.</given-names></name>
</person-group> (<year>2016</year>). &#x201c;
<article-title>Rethinking the inception architecture for computer vision</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE conference on computer vision and pattern recognition</conf-name>. (<publisher-loc>Las Vegas, NV, USA</publisher-loc>) <fpage>2818</fpage>&#x2013;<lpage>2826</lpage>.
</mixed-citation>
</ref>
<ref id="B28">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Tan</surname> <given-names>M.</given-names></name>
<name><surname>Le</surname> <given-names>Q.</given-names></name>
</person-group> (<year>2019</year>). &#x201c;
<article-title>Efficientnet: Rethinking model scaling for convolutional neural networks</article-title>,&#x201d; in <conf-name>Proceedings of the 36th International Conference on Machine Learning</conf-name>.  (<publisher-loc>Long Beach, California, USA</publisher-loc>: 
<publisher-name>PMLR</publisher-name>) <fpage>6105</fpage>&#x2013;<lpage>6114</lpage>.
</mixed-citation>
</ref>
<ref id="B29">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Tan</surname> <given-names>M.</given-names></name>
<name><surname>Le</surname> <given-names>Q. E.</given-names></name>
<etal/>
</person-group>. (<year>2019</year>). &#x201c;
<article-title>Rethinking model scaling for convolutional neural networks</article-title>,&#x201d; in <conf-name>Proceedings of the International conference on machine learning</conf-name>, <conf-loc>Long Beach, CA, USA</conf-loc>, Vol. <volume>15</volume>.
</mixed-citation>
</ref>
<ref id="B30">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Upadhyay</surname> <given-names>A.</given-names></name>
<name><surname>Chandel</surname> <given-names>N. S.</given-names></name>
<name><surname>Singh</surname> <given-names>K. P.</given-names></name>
<name><surname>Chakraborty</surname> <given-names>S. K.</given-names></name>
<name><surname>Nandede</surname> <given-names>B. M.</given-names></name>
<name><surname>Kumar</surname> <given-names>M.</given-names></name>
<etal/>
</person-group>. (<year>2025</year>). 
<article-title>Deep learning and computer vision in plant disease detection: a comprehensive review of techniques, models, and trends in precision agriculture</article-title>. <source>Artif. Intell. Rev.</source> <volume>58</volume>, <fpage>92</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s10462-024-11100-x</pub-id>
</mixed-citation>
</ref>
<ref id="B31">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wiafe</surname> <given-names>E. K.</given-names></name>
<name><surname>Betitame</surname> <given-names>K.</given-names></name>
<name><surname>Ram</surname> <given-names>B. G.</given-names></name>
<name><surname>Sun</surname> <given-names>X.</given-names></name>
</person-group> (<year>2025</year>). 
<article-title>Technical study on the efficiency and models of weed control methods using unmanned ground vehicles: A review</article-title>. <source>Artif. Intell. Agric</source>. <volume>15</volume>, <fpage>622</fpage>&#x2013;<lpage>641</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.aiia.2025.05.003</pub-id>
</mixed-citation>
</ref>
<ref id="B32">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wu</surname> <given-names>X.</given-names></name>
<name><surname>Aravecchia</surname> <given-names>S.</given-names></name>
<name><surname>Lottes</surname> <given-names>P.</given-names></name>
<name><surname>Stachniss</surname> <given-names>C.</given-names></name>
<name><surname>Pradalier</surname> <given-names>C.</given-names></name>
</person-group> (<year>2020</year>). 
<article-title>Robotic weed control using automated weed and crop classification</article-title>. <source>J. Field Robot.</source> <volume>37</volume>, <fpage>322</fpage>&#x2013;<lpage>340</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/rob.21938</pub-id>
</mixed-citation>
</ref>
<ref id="B33">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zeynali</surname> <given-names>M.</given-names></name>
<name><surname>Alipour</surname> <given-names>K.</given-names></name>
<name><surname>Tarvirdizadeh</surname> <given-names>B.</given-names></name>
<name><surname>Ghamari</surname> <given-names>M.</given-names></name>
</person-group> (<year>2025</year>). 
<article-title>Non-invasive blood glucose monitoring using ppg signals with various deep learning models and implementation using tinyml</article-title>. <source>Sci. Rep.</source> <volume>15</volume>, <fpage>581</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-024-84265-8</pub-id>, PMID: <pub-id pub-id-type="pmid">39753714</pub-id>
</mixed-citation>
</ref>
<ref id="B34">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name><surname>Zhang</surname> <given-names>Y.</given-names></name>
</person-group> (<year>2025</year>). <source>Efficient On-Device Intelligence for Structural Health Monitoring: A&#xa0;TinyML Perspective</source> (<publisher-loc>Sundsvall, Sweden</publisher-loc>: 
<publisher-name>Mid Sweden University</publisher-name>).
</mixed-citation>
</ref>
<ref id="B35">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhang</surname> <given-names>Y.</given-names></name>
<name><surname>Adin</surname> <given-names>V.</given-names></name>
<name><surname>Bader</surname> <given-names>S.</given-names></name>
<name><surname>Oelmann</surname> <given-names>B.</given-names></name>
</person-group> (<year>2023</year>). 
<article-title>Leveraging acoustic emission and machine learning for concrete materials damage classification on embedded devices</article-title>. <source>IEEE Trans. Instrum. Meas.</source> <volume>72</volume>, <fpage>1</fpage>&#x2013;<lpage>8</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TIM.2023.3307751</pub-id>
</mixed-citation>
</ref>
<ref id="B36">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Zhang</surname> <given-names>Y.</given-names></name>
<name><surname>Martinez-Rau</surname> <given-names>L. S.</given-names></name>
<name><surname>Vu</surname> <given-names>Q. N. P.</given-names></name>
<name><surname>Oelmann</surname> <given-names>B.</given-names></name>
<name><surname>Bader</surname> <given-names>S.</given-names></name>
</person-group> (<year>2025</year>a). &#x201c;
<article-title>Survey of quantization techniques for on-device vision-based crack detection</article-title>,&#x201d; in <conf-name>2025 IEEE International Instrumentation and Measurement Technology Conference (I2MTC)</conf-name>. (<publisher-loc>Chemnitz, Germany</publisher-loc>) <fpage>1</fpage>&#x2013;<lpage>6</lpage>.
</mixed-citation>
</ref>
<ref id="B37">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhang</surname> <given-names>Y.</given-names></name>
<name><surname>Pullin</surname> <given-names>R.</given-names></name>
<name><surname>Oelmann</surname> <given-names>B.</given-names></name>
<name><surname>Bader</surname> <given-names>S.</given-names></name>
</person-group> (<year>2025</year>b). 
<article-title>On-device fault diagnosis with augmented acoustic emission data: A case study on carbon fiber panels</article-title>. <source>IEEE&#xa0;Trans. Instrum. Meas.</source> <volume>74</volume>, <fpage>1</fpage>&#x2013;<lpage>12</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TIM.2025.3577849</pub-id>
</mixed-citation>
</ref>
<ref id="B38">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Zhang</surname> <given-names>Y.</given-names></name>
<name><surname>Xu</surname> <given-names>Y.</given-names></name>
<name><surname>Martinez-Rau</surname> <given-names>L. S.</given-names></name>
<name><surname>Vu</surname> <given-names>Q. N. P.</given-names></name>
<name><surname>Oelmann</surname> <given-names>B.</given-names></name>
<name><surname>Bader</surname> <given-names>S.</given-names></name>
</person-group> (<year>2025</year>c). &#x201c;
<article-title>On-device crack segmentation for edge structural health monitoring</article-title>,&#x201d; in <conf-name>2025 IEEE Sensors Applications Symposium (SAS)</conf-name>. (<publisher-loc>Newcastle, United Kingdom</publisher-loc>) <fpage>1</fpage>&#x2013;<lpage>6</lpage>.
</mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn id="n1" fn-type="custom" custom-type="edited-by">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1937850">Parvathaneni Naga Srinivasu</ext-link>, Amrita Vishwa Vidyapeetham University, India</p></fn>
<fn id="n2" fn-type="custom" custom-type="reviewed-by">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3303472">Devshri Satyarthi</ext-link>, Atal Bihari Vajpayee Indian Institute of Information Technology and Management, India</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3310057">Maheswaran Shanmugam</ext-link>, Anna University, India</p></fn>
</fn-group>
<fn-group>
<fn id="fn1"><label>1</label>
<p><ext-link ext-link-type="uri" xlink:href="https://stm32ai.st.com/stm32-cube-ai/">https://stm32ai.st.com/stm32-cube-ai/</ext-link></p></fn>
<fn id="fn2"><label>2</label>
<p><ext-link ext-link-type="uri" xlink:href="https://pytorch.org/">https://pytorch.org/</ext-link></p></fn>
<fn id="fn3"><label>3</label>
<p><ext-link ext-link-type="uri" xlink:href="https://onnx.ai/">https://onnx.ai/</ext-link></p></fn>
</fn-group>
</back>
</article>