<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Plant Sci.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Plant Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Plant Sci.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">1664-462X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpls.2026.1773924</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>A multi-dimensional pyramid strategy for limited sample classification of hyperspectral cropland imagery</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name><surname>Yang</surname><given-names>Mingchao</given-names></name>
<xref ref-type="corresp" rid="c001"><sup>*</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3326680/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project-administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
</contrib-group>
<aff id="aff1"><institution>Comprehensive Geophysical Survey Team, Zhejiang Coal Geology Bureau</institution>, <city>Hangzhou</city>, <country country="cn">China</country></aff>
<author-notes>
<corresp id="c001"><label>*</label>Correspondence: Mingchao Yang, <email xlink:href="mailto:yangmingchao0808@gmail.com">yangmingchao0808@gmail.com</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-03-04">
<day>04</day>
<month>03</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>17</volume>
<elocation-id>1773924</elocation-id>
<history>
<date date-type="received">
<day>23</day>
<month>12</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>26</day>
<month>01</month>
<year>2026</year>
</date>
<date date-type="rev-recd">
<day>16</day>
<month>01</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2026 Yang.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Yang</copyright-holder>
<license>
<ali:license_ref start_date="2026-03-04">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<p>Hyperspectral crop classification is often challenged by substantial intra-class spectral variability, high inter-class similarity, and the scarcity of high-quality labeled samples. These issues frequently lead to insufficient feature fusion or excessive computational complexity in conventional classification methods. To address these problems, this study proposes MDPC-Net, a limited sample hyperspectral crop classification method that couples a multi-dimensional pyramid with a Transformer architecture. The model extracts crop features from spectral, spatial, and joint spectral&#x2013;spatial dimensions to capture fine-grained characteristics. A feature reorganization strategy is further incorporated to effectively reduce dimensional redundancy, while the Transformer modules enhance global dependency modeling, thereby improving the discrimination of crop features in complex environments. Comparative experiments with six classical models on three datasets&#x2014;Matiwan Village, WHU-HongHu, and WHU-LongKou&#x2014;demonstrate that MDPC-Net achieves competitive accuracy with substantially lower computational complexity, effectively balancing the trade-off between classification performance and efficiency. The proposed approach provides a promising solution for fine-grained hyperspectral crop classification under limited sample conditions.</p>
</abstract>
<kwd-group>
<kwd>crop classification</kwd>
<kwd>feature fusion</kwd>
<kwd>feature pyramid</kwd>
<kwd>hyperspectral remote sensing</kwd>
<kwd>limited sample learning</kwd>
</kwd-group>    
<funding-group>
<award-group id="gs1">
<funding-source id="sp1">
<institution-wrap>
<institution>National Key Research and Development Program of China</institution>
<institution-id institution-id-type="doi" vocab="open-funder-registry" vocab-identifier="10.13039/open_funder_registry">10.13039/501100012166</institution-id>
</institution-wrap>
</funding-source>
</award-group>
<funding-statement>The author(s) declared that financial support was received for this work and/or its publication. This work was supported by National Key Research and Development Program of China: &#x201c;Earth observation and ground monitoring applications for crop food security in China and South Africa&#x201d; (2023YFE0110400) and Science and Technology Project of Zhejiang Provincial Department of Natural Resources: &#x201c;Development of crop area monitoring technologies and platforms based on Remote Sensing + AI&#x201d; (2025ZJCH017).</funding-statement>
</funding-group>
<counts>
<fig-count count="10"/>
<table-count count="5"/>
<equation-count count="17"/>
<ref-count count="35"/>
<page-count count="16"/>
<word-count count="10270"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Sustainable and Intelligent Phytoprotection</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>Precise crop classification forms the foundation for agricultural production management (<xref ref-type="bibr" rid="B27">Tang et&#xa0;al., 2024</xref>), policy formulation, and food security, providing essential information for decision support in agricultural systems (<xref ref-type="bibr" rid="B10">Guerri et&#xa0;al., 2024</xref>). Hyperspectral imagery (HSI), with its exceptionally high spectral resolution, captures subtle reflectance variations across continuous wavelength ranges, and has therefore been widely employed in hyperspectral image&#x2013;based crop classification (HSICC) (<xref ref-type="bibr" rid="B17">Khan et&#xa0;al., 2024</xref>; <xref ref-type="bibr" rid="B29">Ullah et&#xa0;al., 2025</xref>).</p>
<p>However, crop classification remains challenging due to the intrinsic variability of agricultural ecosystems (<xref ref-type="bibr" rid="B1">Aasen et&#xa0;al., 2018</xref>). Growth-stage differences and stress conditions within the same crop species often lead to the &#x201c;same object, different spectra&#x201d; phenomenon, whereas spectral similarity among different crops in certain wavelength regions results in the &#x201c;different objects, same spectrum&#x201d; problem (<xref ref-type="bibr" rid="B8">Gallo et&#xa0;al., 2023</xref>). Both issues substantially reduce inter-class separability. Meanwhile, acquiring large quantities of high-quality, pixel-level annotated samples in farmland environments is costly and labor-intensive, making data scarcity a persistent bottleneck for supervised HSICC and motivating increasing interest in limited sample learning paradigms (<xref ref-type="bibr" rid="B9">Gao et&#xa0;al., 2017</xref>). Traditional machine learning approaches rely heavily on handcrafted spectral indices and texture descriptors for feature extraction and dimensionality reduction (<xref ref-type="bibr" rid="B4">Ali et&#xa0;al., 2023</xref>), followed by statistical or shallow learning algorithms such as support vector machines (SVM), random forests (RF), k-nearest neighbors (KNN), linear discriminant analysis (LDA), and na&#xef;ve Bayes (NB) for classification. These methods depend strongly on expert experience in feature engineering and are prone to the &#x201c;curse of dimensionality&#x201d; when dealing with high-dimensional spectral data (<xref ref-type="bibr" rid="B12">Hamidi et&#xa0;al., 2021</xref>). Moreover, they typically treat spectral and spatial information independently and lack end-to-end joint learning capabilities (<xref ref-type="bibr" rid="B18">Konduri et&#xa0;al., 2020</xref>; <xref ref-type="bibr" rid="B16">Khan et&#xa0;al., 2023</xref>). As a result, they fail to model deeper spectral&#x2013;spatial relationships, making it difficult to distinguish spectrally similar crop types in complex agricultural environments and limiting their ability to achieve fine-grained classification in real-world farmland scenes.</p>
<p>Deep learning methods have been increasingly applied to HSICC tasks in recent years and have achieved remarkable progress (<xref ref-type="bibr" rid="B5">Chandrasekharan et&#xa0;al., 2016</xref>). By constructing multi-layer nonlinear mappings, deep learning establishes an end-to-end learning mechanism capable of automatically capturing intricate spectral features and spatial contextual dependencies in hyperspectral data (<xref ref-type="bibr" rid="B2">Agilandeeswari et&#xa0;al., 2022</xref>). Convolutional neural networks (CNNs) are widely used to extract spectral&#x2013;spatial features of crops. One-dimensional CNNs (1D-CNNs) focus primarily on spectral sequence modeling while neglecting spatial neighborhood information (<xref ref-type="bibr" rid="B14">Hu et&#xa0;al., 2015</xref>), limiting their ability to differentiate spectrally similar crops in farmland scenes with blurred boundaries or mixed pixels. Two-dimensional CNNs (2D-CNNs) can extract spatial textures but are insufficient in modeling complex inter-band spectral dependencies, which constrains their ability to capture subtle spectral differences among crop varieties (<xref ref-type="bibr" rid="B23">Nazeri et&#xa0;al., 2019</xref>). Three-dimensional CNNs (3D-CNNs) jointly extract spectral&#x2013;spatial features but suffer from high computational complexity and require large amounts of high-quality annotated samples&#x2014;conditions often unmet in real agricultural environments&#x2014;thereby restricting their practical applicability (<xref ref-type="bibr" rid="B7">Fernandes et&#xa0;al., 2019</xref>). Moreover, CNN-based methods inherently emphasize local neighborhood information and often fail to capture global dependencies, making their predictions susceptible to salt-and-pepper noise.</p>
<p>To alleviate the dependence on large-scale labeled data, limited sample learning has recently emerged as an effective paradigm for remote sensing image classification limited sample limited sample approaches can be broadly categorized into three groups. Metric-based methods learn embedding spaces in which samples are classified based on similarity measures, enabling knowledge transfer from base classes to novel classes with limited samples. Meta-learning-based approaches aim to acquire task-agnostic initialization or learning strategies that can rapidly adapt to new classes under limited sample settings. More recently, transformer-inspired limited sample frameworks have been explored to leverage global contextual modeling and attention mechanisms for improved feature generalization. Despite their success in natural image domains, these methods face notable challenges when applied to hyperspectral data, including high spectral dimensionality, complex spectral&#x2013;spatial coupling, and severe intra-class variability, which often lead to degraded generalization performance under extremely limited training samples.</p>
<p>To overcome these limitations and enhance global feature modeling in HSICC, researchers have explored improved CNN variants and Transformer-based architectures. (<xref ref-type="bibr" rid="B25">Roy et&#xa0;al. (2020)</xref> proposed HybridSN (<xref ref-type="bibr" rid="B25">Roy et&#xa0;al., 2020</xref>), a hybrid spectral CNN that integrates 3D-CNN and 2D-CNN layers to improve spectral&#x2013;spatial feature representation while controlling model complexity. SqueezeNet (<xref ref-type="bibr" rid="B15">Iandola et&#xa0;al., 2016</xref>) employs pointwise convolutions to reduce feature dimensionality, followed by multi-scale convolutions, significantly compressing parameter counts. MobileNets (<xref ref-type="bibr" rid="B13">Howard et&#xa0;al., 2017</xref>) utilize depthwise separable convolutions to build lightweight neural networks by applying a single filter to each input channel, greatly reducing computational costs. Although these models achieve lower computational complexity, their classification accuracy remains limited. To enhance global context modeling, <xref ref-type="bibr" rid="B30">Wang et&#xa0;al. (2023)</xref> introduced ESSAN (<xref ref-type="bibr" rid="B30">Wang et&#xa0;al., 2023</xref>), which incorporates dilated convolutions and Transformer modules to improve large-scale feature perception and contextual representation (<xref ref-type="bibr" rid="B28">Tu et&#xa0;al., 2022</xref>). further introduced a pixel aggregation strategy that groups homogeneous regions and integrates them into a hierarchical Transformer framework, enabling adaptive multi-scale feature construction and more effective modeling of local spatial semantics (<xref ref-type="bibr" rid="B22">Michelon et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B19">Liang et&#xa0;al., 2024</xref>). These developments improve the model&#x2019;s ability to perceive field shapes, crop distribution patterns, and critical spectral bands.</p>
<p>Despite recent advances, existing HSICC methods still struggle to simultaneously ensure low computational complexity and high classification accuracy. The growing demand for efficient and precise crop mapping in precision agriculture poses new challenges for model design and feature organization. SANet (<xref ref-type="bibr" rid="B31">Zhang et&#xa0;al., 2024</xref>) integrates spectral and contextual information while emphasizing intra-spectral autocorrelation. By combining spatial&#x2013;spectral non-local blocks with multi-scale spectral self-attention (SSA), SANet allocates more attention resources to spatial and spectral dimensions and models inherent spectral&#x2013;spatial correlations, thereby strengthening the representation of contextual structures and key spectral dimensions (<xref ref-type="bibr" rid="B3">Al Duhayyim et&#xa0;al., 2023</xref>) (<xref ref-type="bibr" rid="B6">Chen et&#xa0;al., 2024</xref>). introduced FrFSSPN, a frequency&#x2013;spectral&#x2013;spatial prototype network based on fractional Fourier transform, which integrates frequency-domain information with spectral&#x2013;spatial representations to enlarge inter-class separability while preserving intra-class consistency. CMTNet (<xref ref-type="bibr" rid="B11">Guo et&#xa0;al., 2025</xref>) further enhances model robustness under limited sample conditions by incorporating a spectral&#x2013;spatial feature extraction module for shallow features and enforcing cross-level constraints to improve classification stability (<xref ref-type="bibr" rid="B20">Lu et&#xa0;al., 2024</xref>; <xref ref-type="bibr" rid="B11">Guo et&#xa0;al., 2025</xref>).</p>
<p>Although these methods partially address data scarcity and global dependency modeling, most existing limited sample HSICC frameworks still rely on fragmented feature extraction pipelines or incur substantial computational overhead when integrating multi-dimensional information. In particular, the extremely high spectral resolution of hyperspectral imagery poses challenges for attention-based mechanisms, which may overemphasize long-range dependencies while neglecting local spectral&#x2013;spatial structures. These limitations highlight the need for a unified and efficient architecture that can effectively balance representation capability and computational efficiency under limited sample settings.</p>
<p>Although these methods utilize CNN variants and Transformers to capture global dependencies and consider crop-specific growth variability, significant challenges remain for multi-channel hyperspectral tasks. Current HSICC models still suffer from fragmented feature extraction pipelines, making it difficult to effectively integrate features across multiple dimensions and thereby limiting their ability to capture complex spectral&#x2013;spatial characteristics of crops. Moreover, as network depth increases, feature fusion modules often lead to substantial computational overhead, which restricts their adaptability to practical applications. Due to the extremely high spectral resolution of HSI, the multi-head self-attention (MHSA) mechanism may focus excessively on long-range global dependencies while overlooking local spatial structures. These limitations highlight the need for a unified architecture capable of balancing representation ability and computational efficiency.</p>
<p>To address these challenges, this study proposes a novel HSICC framework named MDPC-Net (Multi-Dimensional Pyramid Coupling Transformer Network), which integrates a multi-dimensional progressive feature extractor, a linear-projection pyramid fusion module, and Transformer-based global semantic modeling. First, three parallel progressive dilated convolution branches&#x2014;1D, 2D, and 3D&#x2014;are designed to extract spectral, spatial, and joint spectral&#x2013;spatial features from different data dimensions. Second, the linear-projection feature pyramid reorganizes and fuses multi-dimensional features while reducing computational cost. Finally, Transformer modules leverage multi-head self-attention and multilayer perception mechanisms to capture global semantic dependencies across spectral&#x2013;spatial domains.</p>
<p>The main contributions of this work are summarized as follows:</p>
<list list-type="order">
<list-item>
<p>A multi-dimensional feature extraction backbone based on progressive dilated convolutions is proposed. Three parallel branches (1D, 2D, and 3D) capture hyperspectral features from different dimensions, while progressive dilation enables multi-level receptive field aggregation.</p></list-item>
<list-item>
<p>A linear-projection feature pyramid fusion module is designed. This module employs multi-scale depthwise separable convolutions to extract spatial features at different scales and reorganizes these features into a unified representation, enhancing multi-scale perception.</p></list-item>
<list-item>
<p>A unified architecture combining the multi-dimensional feature backbone, linear-projection pyramid, and Transformer is developed. This design effectively models global spectral&#x2013;spatial dependencies and strengthens the discrimination of crops with similar spectral signatures. Experiments on three datasets demonstrate that MDPC-Net achieves high classification accuracy while maintaining low computational complexity.</p></list-item>
</list>
</sec>
<sec id="s2">
<label>2</label>
<title>Methods</title>
<sec id="s2_1">
<label>2.1</label>
<title>Overall architecture of MDPC-net</title>
<p>The overall architecture of MDPC-Net is illustrated in <xref ref-type="fig" rid="f1"><bold>Figure&#xa0;1</bold></xref>. The model consists of three components: A multi-dimensional feature extraction backbone based on progressive dilated convolutions, a linear-projection feature pyramid for multi-branch feature fusion, and an embedding and Transformer-based feature integration module for final representation learning and classification.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>The overall network architecture of MDPC-Net.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1773924-g001.tif">
<alt-text content-type="machine-generated">Diagram of a multi-dimensional feature extraction process showing 1D, 2D, and 3D feature extraction branches from a data patch. Each branch uses convolution layers with different dilation rates. Features are linearly projected and reorganized into multiple dimensions, producing deep global features. A secondary section details embedding, layer normalization, multi-head attention, and fully connected layers for processing tokens. Icons represent residual connections, feature concatenation, ReLU, and batch normalization.</alt-text>
</graphic></fig>
<p>First, for each pixel in the original HSI, an image patch of size <inline-formula>
<mml:math display="inline" id="im1"><mml:mrow><mml:mn>11</mml:mn><mml:mo>&#xd7;</mml:mo><mml:mn>11</mml:mn></mml:mrow></mml:math></inline-formula> is extracted and used as the basic processing unit. The patch is simultaneously fed into three parallel branches&#x2014;1D, 2D, and 3D&#x2014;to extract features from different data dimensions. All three branches employ progressive dilated convolutions, where zeros are inserted between convolutional kernel elements to gradually enlarge the receptive field without increasing the number of parameters or computational cost. This design enables the model to capture fine-grained local details while progressively learning broader global dependencies.</p>
<p>The feature maps obtained from the three branches are then forwarded to the linear-projection feature pyramid module. Through a linear mapping, features from all branches are projected into a unified dimensional space. The multi-dimensional features are then partitioned along the channel dimension into several groups for structural reorganization. A subsequent linear projection aggregates these reorganized groups to form deep, globally enriched representations.</p>
<p>Finally, the resulting global features are transformed into token sequences via an embedding layer and fed into the Transformer module. The Transformer captures long-range dependencies and facilitates comprehensive feature interactions across different scales, further enhancing the model&#x2019;s ability to interpret complex spectral&#x2013;spatial characteristics of crops in hyperspectral imagery.</p>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Multi-dimensional feature extraction module</title>
<p>Conventional convolution is a fundamental operation in convolutional neural networks, where a kernel slides over the input with a predefined stride, and each kernel element is multiplied with the corresponding input element and summed to produce the output. The receptive field of standard convolution is determined by the kernel size. When larger-scale contextual information is required, the kernel size must be increased, which inevitably leads to a substantial growth in the number of parameters and computational cost. Furthermore, standard convolution extracts features at a single scale, making it insufficient for inputs containing multi-scale structures, as it may fail to capture comprehensive feature information.</p>
<p>Dilated convolution extends standard convolution by inserting zeros between kernel elements, where the dilation rate controls the spacing between sampled positions. This mechanism enlarges the receptive field without increasing the kernel size or parameter count, enabling the model to capture broader contextual information. However, using a fixed dilation rate restricts the receptive field to a single scale, potentially missing information at other scales and leading to the so-called &#x201c;gridding effect,&#x201d; which degrades feature extraction performance.</p>
<p><xref ref-type="disp-formula" rid="eq1">Equations 1</xref>, <xref ref-type="disp-formula" rid="eq2">2</xref> present the effective kernel size and receptive field (RF) size of dilated convolution, respectively:</p>
<disp-formula id="eq1"><label>(1)</label>
<mml:math display="block" id="M1"><mml:mrow><mml:mtable equalrows="true" equalcolumns="true"><mml:mtr><mml:mtd><mml:mrow><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mi>l</mml:mi><mml:mo>+</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mi>l</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">)</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mi>d</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<disp-formula id="eq2"><label>(2)</label>
<mml:math display="block" id="M2"><mml:mrow><mml:mtable equalrows="true" equalcolumns="true"><mml:mtr><mml:mtd><mml:mrow><mml:msub><mml:mtext>R</mml:mtext><mml:mrow><mml:mi>m</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mtext>R</mml:mtext><mml:mi>m</mml:mi></mml:msub><mml:mo>+</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>L</mml:mi><mml:mrow><mml:mi>m</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">)</mml:mo><mml:mo>&#xd7;</mml:mo><mml:mstyle displaystyle="true"><mml:munderover><mml:mo>&#x220f;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>m</mml:mi></mml:munderover><mml:mrow><mml:msub><mml:mtext>S</mml:mtext><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:mstyle></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im2"><mml:mi>L</mml:mi></mml:math></inline-formula> denotes the effective kernel size, <inline-formula>
<mml:math display="inline" id="im3"><mml:mi>l</mml:mi></mml:math></inline-formula> represents the size of the original convolution kernel, and <inline-formula>
<mml:math display="inline" id="im4"><mml:mi>d</mml:mi></mml:math></inline-formula> is the dilation rate. <inline-formula>
<mml:math display="inline" id="im5"><mml:mrow><mml:msub><mml:mi>R</mml:mi><mml:mrow><mml:mi>m</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> denotes the receptive field size of the <inline-formula>
<mml:math display="inline" id="im6"><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>m</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula>-th dilated convolution layer, and <inline-formula>
<mml:math display="inline" id="im7"><mml:mrow><mml:msub><mml:mi>S</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> represents the stride of the <inline-formula>
<mml:math display="inline" id="im8"><mml:mi>i</mml:mi></mml:math></inline-formula>-th layer.</p>
<p>The Multi-Dimensional Feature Extraction Module (MDFEM) adopts a multi-scale representation strategy based on Progressive Dilated Convolution (PDC). The core idea of PDC is to construct hierarchical receptive fields through an increasing sequence of dilation rates. In our design, a dilation rate sequence of <inline-formula>
<mml:math display="inline" id="im9"><mml:mrow><mml:mi>d</mml:mi><mml:mo>=</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mn>2</mml:mn><mml:mo>,</mml:mo><mml:mn>5</mml:mn><mml:mo stretchy="false">]</mml:mo></mml:mrow></mml:math></inline-formula> is employed, which mathematically forms a receptive field pyramid. To further enhance the model&#x2019;s capability in capturing diverse characteristics of hyperspectral data, three parallel branches&#x2014;1D, 2D, and 3D&#x2014;are incorporated to extract features from different dimensional perspectives.</p>
<p>The 1D branch applies one-dimensional convolution to focus on spectral-domain features, enabling the extraction of subtle variations along the spectral signatures. The 2D branch employs two-dimensional convolution to learn spatial information, such as structural patterns and textural details within hyperspectral imagery. The 3D branch uses three-dimensional convolution to jointly model the spectral and spatial dimensions, thereby capturing integrated spectral&#x2013;spatial representations. The outputs from the three branches are flattened along the spatial dimension and concatenated to form a unified feature tensor. The fused feature representations of each branch are formulated in <xref ref-type="disp-formula" rid="eq3">Equations 3</xref>&#x2013;<xref ref-type="disp-formula" rid="eq5">5</xref>, respectively.</p>
<disp-formula id="eq3"><label>(3)</label>
<mml:math display="block" id="M3"><mml:mrow><mml:mtable equalrows="true" equalcolumns="true"><mml:mtr><mml:mtd><mml:mrow><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mn>1</mml:mn><mml:mi>D</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>C</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>G</mml:mi><mml:mn>0</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>G</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>G</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>G</mml:mi><mml:mn>5</mml:mn></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<disp-formula id="eq4"><label>(4)</label>
<mml:math display="block" id="M4"><mml:mrow><mml:mtable equalrows="true" equalcolumns="true"><mml:mtr><mml:mtd><mml:mrow><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mn>2</mml:mn><mml:mi>D</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>C</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>F</mml:mi><mml:mn>0</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>F</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>F</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>F</mml:mi><mml:mn>5</mml:mn></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<disp-formula id="eq5"><label>(5)</label>
<mml:math display="block" id="M5"><mml:mrow><mml:mtable equalrows="true" equalcolumns="true"><mml:mtr><mml:mtd><mml:mrow><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mn>3</mml:mn><mml:mi>D</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>C</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>H</mml:mi><mml:mn>0</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>H</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>H</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>H</mml:mi><mml:mn>5</mml:mn></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im10"><mml:mrow><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mn>1</mml:mn><mml:mi>D</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula>
<mml:math display="inline" id="im11"><mml:mrow><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mn>2</mml:mn><mml:mi>D</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>, and <inline-formula>
<mml:math display="inline" id="im12"><mml:mrow><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mn>3</mml:mn><mml:mi>D</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> denote the feature maps extracted from the 1D, 2D, and 3D branches, respectively. <inline-formula>
<mml:math display="inline" id="im13"><mml:mrow><mml:msub><mml:mi>G</mml:mi><mml:mi>d</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula>
<mml:math display="inline" id="im14"><mml:mrow><mml:msub><mml:mi>F</mml:mi><mml:mi>d</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, and <inline-formula>
<mml:math display="inline" id="im15"><mml:mrow><mml:msub><mml:mi>H</mml:mi><mml:mi>d</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> represent the outputs of the 1D, 2D, and 3D convolutions with dilation rates <inline-formula>
<mml:math display="inline" id="im16"><mml:mrow><mml:mi>d</mml:mi><mml:mo>=</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mn>2</mml:mn><mml:mo>,</mml:mo><mml:mn>5</mml:mn><mml:mo stretchy="false">]</mml:mo></mml:mrow></mml:math></inline-formula>, respectively.</p>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>Linear-projection feature pyramid module</title>
<p>The core idea of the Linear-Projection Feature Pyramid Module (LPFPM) is to capture spatial features at multiple scales through depthwise convolutions (DC) and to fuse these features to enhance the model&#x2019;s ability to perceive multi-scale information. Specifically, the multi-dimensional feature maps <inline-formula>
<mml:math display="inline" id="im17"><mml:mrow><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mn>1</mml:mn><mml:mi>D</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula>
<mml:math display="inline" id="im18"><mml:mrow><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mn>2</mml:mn><mml:mi>D</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>, and <inline-formula>
<mml:math display="inline" id="im19"><mml:mrow><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mn>3</mml:mn><mml:mi>D</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> produced by the MDFEM are first concatenated to obtain the fused representation <inline-formula>
<mml:math display="inline" id="im20"><mml:mrow><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mtext>fusion</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>. The formulation is given in <xref ref-type="disp-formula" rid="eq6">Equation 6</xref>:</p>
<disp-formula id="eq6"><label>(6)</label>
<mml:math display="block" id="M6"><mml:mrow><mml:mtable equalrows="true" equalcolumns="true"><mml:mtr><mml:mtd><mml:mrow><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>f</mml:mi><mml:mi>u</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>C</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mn>1</mml:mn><mml:mi>D</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mn>2</mml:mn><mml:mi>D</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mn>3</mml:mn><mml:mi>D</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>Next, <inline-formula>
<mml:math display="inline" id="im21"><mml:mrow><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mtext>fusion</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> is passed through a linear projection layer to reduce its feature dimensionality, yielding <inline-formula>
<mml:math display="inline" id="im22"><mml:mrow><mml:msubsup><mml:mi>X</mml:mi><mml:mrow><mml:mi>f</mml:mi><mml:mi>u</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mo>&#x2032;</mml:mo></mml:mrow></mml:msubsup></mml:mrow></mml:math></inline-formula>. This dimensionality reduction step effectively decreases the computational cost of the subsequent convolutional operations and improves overall efficiency. The process is formulated in <xref ref-type="disp-formula" rid="eq7">Equation 7</xref>.</p>
<disp-formula id="eq7"><label>(7)</label>
<mml:math display="block" id="M7"><mml:mrow><mml:msubsup><mml:mi>X</mml:mi><mml:mrow><mml:mi>f</mml:mi><mml:mi>u</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mo>&#x2032;</mml:mo></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mi>L</mml:mi><mml:mi>i</mml:mi><mml:mi>n</mml:mi><mml:mi>e</mml:mi><mml:mi>a</mml:mi><mml:mi>r</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>f</mml:mi><mml:mi>u</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>Subsequently, the reduced feature representation is flattened and partitioned into <inline-formula>
<mml:math display="inline" id="im23"><mml:mi>N</mml:mi></mml:math></inline-formula> groups along the channel dimension, forming <inline-formula>
<mml:math display="inline" id="im24"><mml:mi>N</mml:mi></mml:math></inline-formula> feature maps with different dimensional configurations. Each group is processed by convolutional layers of different scales, as illustrated in <xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2</bold></xref>. This operation is formulated in <xref ref-type="disp-formula" rid="eq8">Equation 8</xref>:</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Feature fusion and reorganization.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1773924-g002.tif">
<alt-text content-type="machine-generated">Diagram illustrating a process of merging 1D, 2D, and 3D features into multi-dimensional fused features, followed by linear projection. It includes feature flattening and dimension reorganization into multiple cubes labeled Dimension 1, 2, and N.</alt-text>
</graphic></fig>
<disp-formula id="eq8"><label>(8)</label>
<mml:math display="block" id="M8"><mml:mrow><mml:msubsup><mml:mi>X</mml:mi><mml:mrow><mml:mn>11</mml:mn></mml:mrow><mml:mo>&#x2032;</mml:mo></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mi>X</mml:mi><mml:mrow><mml:mn>22</mml:mn></mml:mrow><mml:mo>&#x2032;</mml:mo></mml:msubsup><mml:mo>,</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:msubsup><mml:mi>X</mml:mi><mml:mrow><mml:mi>N</mml:mi><mml:mi>N</mml:mi></mml:mrow><mml:mo>&#x2032;</mml:mo></mml:msubsup><mml:mo>=</mml:mo><mml:mi>S</mml:mi><mml:mi>p</mml:mi><mml:mi>l</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msubsup><mml:mi>X</mml:mi><mml:mrow><mml:mi>f</mml:mi><mml:mi>u</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi></mml:mrow><mml:mo>&#x2032;</mml:mo></mml:msubsup><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im25"><mml:mrow><mml:msubsup><mml:mi>X</mml:mi><mml:mn>1</mml:mn><mml:mo>&#x2032;</mml:mo></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mi>X</mml:mi><mml:mn>2</mml:mn><mml:mo>&#x2032;</mml:mo></mml:msubsup><mml:mo>,</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:msubsup><mml:mi>X</mml:mi><mml:mi>N</mml:mi><mml:mo>&#x2032;</mml:mo></mml:msubsup></mml:mrow></mml:math></inline-formula> denote the <inline-formula>
<mml:math display="inline" id="im26"><mml:mi>N</mml:mi></mml:math></inline-formula> groups of feature vectors obtained after channel-wise partitioning.</p>
<p>Finally, depthwise pointwise convolution (DPC) is applied to the feature maps of different dimensional groups. Depthwise convolution is a special form of grouped convolution in which the number of groups is equal to the number of input channels, meaning that each channel is processed independently with its own convolution kernel while the number of output channels remains unchanged (<xref ref-type="fig" rid="f3"><bold>Figure&#xa0;3</bold></xref>). Since no cross-channel interaction is involved and each channel uses a separate filter, this decomposition significantly reduces the number of parameters and computational cost.</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Depthwise pointwise convolution.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1773924-g003.tif">
<alt-text content-type="machine-generated">Diagram showing a grid-based hierarchical structure with arrows indicating a flow through various grid representations. It leads to a section labeled &#x201c;Linear Projection&#x201d; and finally to &#x201c;Deep Global Features,&#x201d; represented by layered red and blue rectangles.</alt-text>
</graphic></fig>
<p>Assuming that the input feature map is <inline-formula>
<mml:math display="inline" id="im27"><mml:mrow><mml:mi>X</mml:mi><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mi>&#x211d;</mml:mi><mml:mrow><mml:mi>H</mml:mi><mml:mo>&#xd7;</mml:mo><mml:mi>W</mml:mi><mml:mo>&#xd7;</mml:mo><mml:msub><mml:mi>C</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>, a standard convolution with a kernel size of <inline-formula>
<mml:math display="inline" id="im28"><mml:mrow><mml:mi>K</mml:mi><mml:mo>&#xd7;</mml:mo><mml:mi>K</mml:mi></mml:mrow></mml:math></inline-formula> and <inline-formula>
<mml:math display="inline" id="im29"><mml:mrow><mml:msub><mml:mi>C</mml:mi><mml:mrow><mml:mi>o</mml:mi><mml:mi>u</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> output channels results in a computational cost <inline-formula>
<mml:math display="inline" id="im30"><mml:mrow><mml:msub><mml:mi>&#x3c9;</mml:mi><mml:mi>s</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, as defined in <xref ref-type="disp-formula" rid="eq9">Equation 9</xref>. In comparison, the computational cost of depthwise pointwise convolution (DPC) is denoted as <inline-formula>
<mml:math display="inline" id="im31"><mml:mrow><mml:msub><mml:mi>&#x3c9;</mml:mi><mml:mrow><mml:mi>D</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula>, as shown in <xref ref-type="disp-formula" rid="eq10">Equation 10</xref>.</p>
<disp-formula id="eq9"><label>(9)</label>
<mml:math display="block" id="M9"><mml:mrow><mml:mtable equalrows="true" equalcolumns="true"><mml:mtr><mml:mtd><mml:mrow><mml:msub><mml:mi>&#x3c9;</mml:mi><mml:mi>s</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mi>K</mml:mi><mml:mo>&#xd7;</mml:mo><mml:mi>K</mml:mi><mml:mo>&#xd7;</mml:mo><mml:mo>&#xa0;</mml:mo><mml:mi>H</mml:mi><mml:mo>&#xd7;</mml:mo><mml:mi>W</mml:mi><mml:mo>&#xd7;</mml:mo><mml:msub><mml:mi>C</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub><mml:mo>&#xd7;</mml:mo><mml:msub><mml:mi>C</mml:mi><mml:mrow><mml:mi>o</mml:mi><mml:mi>u</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<disp-formula id="eq10"><label>(10)</label>
<mml:math display="block" id="M10"><mml:mrow><mml:mtable equalrows="true" equalcolumns="true"><mml:mtr><mml:mtd><mml:mrow><mml:msub><mml:mi>&#x3c9;</mml:mi><mml:mrow><mml:mi>D</mml:mi><mml:mi>P</mml:mi><mml:mi>C</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mi>K</mml:mi><mml:mo>+</mml:mo><mml:msub><mml:mi>C</mml:mi><mml:mrow><mml:mi>o</mml:mi><mml:mi>u</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>&#xd7;</mml:mo><mml:mi>K</mml:mi><mml:mo>&#xd7;</mml:mo><mml:mo>&#xa0;</mml:mo><mml:mi>H</mml:mi><mml:mo>&#xd7;</mml:mo><mml:mi>W</mml:mi><mml:mo>&#xd7;</mml:mo><mml:msub><mml:mi>C</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>After applying DPC, the reduction in the number of parameters, denoted as <inline-formula>
<mml:math display="inline" id="im32"><mml:mrow><mml:msub><mml:mi>&#x3c9;</mml:mi><mml:mtext>&#x394;</mml:mtext></mml:msub></mml:mrow></mml:math></inline-formula>, is calculated as shown in <xref ref-type="disp-formula" rid="eq11">Equation 11</xref>.</p>
<disp-formula id="eq11"><label>(11)</label>
<mml:math display="block" id="M11"><mml:mrow><mml:mtable equalrows="true" equalcolumns="true"><mml:mtr><mml:mtd><mml:mrow><mml:msub><mml:mi>&#x3c9;</mml:mi><mml:mtext>&#x394;</mml:mtext></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>K</mml:mi><mml:mo>&#xd7;</mml:mo><mml:mi>K</mml:mi><mml:mo>+</mml:mo><mml:msub><mml:mi>C</mml:mi><mml:mrow><mml:mi>o</mml:mi><mml:mi>u</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>&#xd7;</mml:mo><mml:msub><mml:mi>C</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mi>K</mml:mi><mml:mo>&#xd7;</mml:mo><mml:mi>K</mml:mi><mml:mo>&#xd7;</mml:mo><mml:msub><mml:mi>C</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub><mml:mo>&#xd7;</mml:mo><mml:msub><mml:mi>C</mml:mi><mml:mrow><mml:mi>o</mml:mi><mml:mi>u</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:msub><mml:mi>C</mml:mi><mml:mrow><mml:mi>o</mml:mi><mml:mi>u</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:mo>+</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:msup><mml:mi>K</mml:mi><mml:mn>2</mml:mn></mml:msup></mml:mrow></mml:mfrac></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
</sec>
<sec id="s2_4">
<label>2.4</label>
<title>Transformer encoder</title>
<p>The Transformer encoder consists of an embedding layer, a multi-head self-attention (MHSA) mechanism, and a multi-layer perceptron (MLP), as illustrated in <xref ref-type="fig" rid="f4"><bold>Figure&#xa0;4</bold></xref>. First, the deep global features extracted by the LPFPM are transformed into a sequence of tokens that can be processed by the Transformer architecture. The tokens are then passed through a layer normalization operation to stabilize the feature distribution prior to attention computation. The encoder follows a standard Transformer architecture, configured with a token dimension of 64, a depth of 1 layer, 8 attention heads, an MLP hidden dimension of 8, and a dropout rate of 0.1.</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Transformer architecture.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1773924-g004.tif">
<alt-text content-type="machine-generated">Flowchart of a neural network architecture. It begins with an Embedding Layer, followed by tokens and Layer Normalization. Multi-Head Attention feeds back with an addition operation, followed by another Layer Normalization, MLP, and a final Fully Connected Layer.</alt-text>
</graphic></fig>
<p>First, the spectral-spatial features extracted by the parallel 2D, 1D, and 3D convolutional branches are fused by the Multi-scale Receptive Field Fusion (MRFP) module. The MRFP employs three parallel depth-wise convolutions at different scales (original, downsampled, and upsampled) to capture multi-contextual information, followed by feature concatenation and a pointwise convolution for fusion. These fused deep global features are then projected and transformed into a sequence of tokens for processing by the Transformer. Specifically, the tokenization process is implemented via two sets of learnable projection matrices, which compress the spatial-spectral feature sequence into a fixed number of 4 tokens, each with a dimension of 64. The tokens, along with a prepended learnable classification token, are subsequently passed through a layer normalization operation to stabilize the feature distribution prior to attention computation.</p>
<p>In the MHSA module, each pixel-level feature vector is projected into three independent subspaces&#x2014;query (Q), key (K), and value (V)&#x2014;allowing the model to capture feature relationships across different representation subspaces. The outputs from multiple attention heads are then concatenated and linearly transformed to project them back into the output space, which reduces dimensionality while retaining essential information. The similarity between the query and key vectors is computed as shown in <xref ref-type="disp-formula" rid="eq12">Equation 12</xref>:</p>
<disp-formula id="eq12"><label>(12)</label>
<mml:math display="block" id="M12"><mml:mrow><mml:mtable equalrows="true" equalcolumns="true"><mml:mtr><mml:mtd><mml:mrow><mml:mi>M</mml:mi><mml:mi>H</mml:mi><mml:mi>S</mml:mi><mml:mi>A</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>X</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mi>A</mml:mi><mml:mi>t</mml:mi><mml:mi>t</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mi>t</mml:mi><mml:mi>i</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>Q</mml:mi><mml:mo>,</mml:mo><mml:mi>K</mml:mi><mml:mo>,</mml:mo><mml:mi>V</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mi>S</mml:mi><mml:mi>o</mml:mi><mml:mi>f</mml:mi><mml:mi>t</mml:mi><mml:mi>m</mml:mi><mml:mi>a</mml:mi><mml:mi>x</mml:mi><mml:mo>(</mml:mo><mml:mfrac><mml:mrow><mml:mi>Q</mml:mi><mml:msup><mml:mi>K</mml:mi><mml:mi>T</mml:mi></mml:msup></mml:mrow><mml:mrow><mml:msqrt><mml:mrow><mml:msub><mml:mi>d</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:msqrt></mml:mrow></mml:mfrac><mml:mo>)</mml:mo><mml:mi>V</mml:mi></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>The dot-product operation between <inline-formula>
<mml:math display="inline" id="im33"><mml:mi>Q</mml:mi></mml:math></inline-formula> and <inline-formula>
<mml:math display="inline" id="im34"><mml:mrow><mml:msup><mml:mi>K</mml:mi><mml:mi>T</mml:mi></mml:msup></mml:mrow></mml:math></inline-formula> reflects the similarity between the two vectors. To obtain normalized attention weights, a softmax function is applied, converting the similarity matrix into an attention weight matrix. The resulting attention weights are then multiplied with the value vectors <inline-formula>
<mml:math display="inline" id="im35"><mml:mi>V</mml:mi></mml:math></inline-formula>, enabling each token to attend to other tokens and capture complex relationships such as &#x201c;different spectra within the same region&#x201d; and &#x201c;similar spectra across different regions,&#x201d; thereby uncovering intrinsic spectral variations within crops.</p>
<p>The MLP is a fundamental feed-forward neural network that ensures unidirectional information flow. Data enter from the input layer, are processed through one or more hidden layers, and finally pass to the output layer, with no feedback connections between layers. The output of the MLP layer is formulated in <xref ref-type="disp-formula" rid="eq13">Equation 13</xref>:</p>
<disp-formula id="eq13"><label>(13)</label>
<mml:math display="block" id="M13"><mml:mrow><mml:mtable equalrows="true" equalcolumns="true"><mml:mtr><mml:mtd><mml:mrow><mml:mtext>M</mml:mtext><mml:mtext>L</mml:mtext><mml:mtext>P</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:mi>X</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mi>&#x3c4;</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mtext>X</mml:mtext><mml:msub><mml:mi>W</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>b</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:msub><mml:mi>W</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>b</mml:mi><mml:mn>2</mml:mn></mml:msub></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im36"><mml:mrow><mml:msub><mml:mi>W</mml:mi><mml:mn>1</mml:mn></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula>
<mml:math display="inline" id="im37"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mn>1</mml:mn></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula>
<mml:math display="inline" id="im38"><mml:mrow><mml:msub><mml:mi>W</mml:mi><mml:mn>2</mml:mn></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula>
<mml:math display="inline" id="im39"><mml:mrow><mml:msub><mml:mi>b</mml:mi><mml:mn>2</mml:mn></mml:msub></mml:mrow></mml:math></inline-formula> represent the weights and biases of the two linear transformations, respectively, and <inline-formula>
<mml:math display="inline" id="im40"><mml:mi>&#x3c4;</mml:mi></mml:math></inline-formula> denotes the nonlinear activation function. To mitigate covariate shift and provide more stable gradient signals, normalization layers are introduced before both the MHSA and MLP blocks.</p>
</sec>
</sec>
<sec id="s3">
<label>3</label>
<title>Data sources</title>
<p>In this section, we introduce the three hyperspectral datasets used in our experiments. The original HSI images and the corresponding ground-truth label maps are illustrated in <xref ref-type="fig" rid="f5"><bold>Figures&#xa0;5</bold></xref>&#x2013;<xref ref-type="fig" rid="f7"><bold>7</bold></xref>. For all datasets, non-crop categories are uniformly assigned to an &#x201c;other&#x201d; class.</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>Original image and sample labels of the matiwan village dataset. <bold>(a)</bold> Original image. <bold>(b)</bold> Sample labels. <bold>(c)</bold> Color mapping of categories and pixel counts of training, validation, and test sets.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1773924-g005.tif">
<alt-text content-type="machine-generated">Satellite imagery and classification details of a landscape. (a) Satellite image of agricultural fields with varied patterns and colors. (b) Map highlighting different land types using contrasting colors. (c) Table listing 16 land classes with ID, color code, class name, and counts for training, validation, and test datasets. Classes include Rice, Willow, Maize, and others, with varied sample sizes.</alt-text>
</graphic></fig>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>Original image and sample labels of the WHU-HongHu dataset. <bold>(a)</bold> Original image. <bold>(b)</bold> Sample labels. <bold>(c)</bold> Color mapping of categories and pixel counts of training, validation, and test sets.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1773924-g006.tif">
<alt-text content-type="machine-generated">A composite image featuring three panels: (a) an aerial view of agricultural fields, varying in color and texture; (b) a corresponding color-coded segmentation map highlighting different crop areas; (c) a table listing crop classes with corresponding color codes, training and validation instances, and the total number per class.</alt-text>
</graphic></fig>
<fig id="f7" position="float">
<label>Figure&#xa0;7</label>
<caption>
<p>Original image and sample labels of the WHU-LongKou dataset. <bold>(a)</bold> Original image. <bold>(b)</bold> Sample labels. <bold>(c)</bold> Color mapping of categories and pixel counts of training, validation, and test sets.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1773924-g007.tif">
<alt-text content-type="machine-generated">Two images labeled (a) and (b) depict satellite views of agricultural fields. Image (a) shows a natural color view, while image (b) shows a color-coded classification of crops. The table in image (c) matches colors with crop types: corn, cotton, sesame, broad-leaf soybean, narrow-leaf soybean, rice, and others. It includes training, validation, testing, and total data counts, summing to 206,765.</alt-text>
</graphic></fig>
<sec id="s3_1">
<label>3.1</label>
<title>Matiwan Village dataset</title>
<p>The first dataset was acquired from aerial hyperspectral imagery collected in Matiwan Village, Xiong&#x2019;an New Area, Baoding City, Hebei Province, China. The dataset contains 256 spectral bands covering a wavelength range of 400&#x2013;1000 nm, with a spectral resolution of 2.1 nm. The spatial resolution of the imagery is 0.5&#xa0;m, and the scene consists of 3750 &#xd7; 1580 pixels. A total of 18 land-cover categories are present in the dataset, among which 16 crop types are selected as the target classes for this study, while the remaining categories are grouped into an &#x201c;other&#x201d; class.</p>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>WHU-HongHu dataset</title>
<p>The second dataset was collected on November 20, 2017, in Honghu City, Hubei Province, China. During data acquisition, parts of the UAV-based hyperspectral imagery were slightly affected by cloud occlusion. The dataset covers a spatial extent of 940 &#xd7; 475 pixels with a spatial resolution of 0.043 m and contains 270 spectral bands. It consists of a variety of complex crop types, with certain categories (e.g., Chinese cabbage and bok choy) exhibiting high spectral similarity. A total of 22 land-cover classes are included in the dataset, among which 19 crop types are selected as the target categories for this study, while the remaining classes are grouped into an &#x201c;other&#x201d; category.</p>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>WHU-LongKou dataset</title>
<p>The third dataset was acquired on July 17, 2018, using UAV-based hyperspectral imaging in Longkou Town, Hubei Province, China. Weather conditions during acquisition were clear, with no cloud or rainfall interference. The dataset has a spatial size of 400 &#xd7; 550 pixels with an approximate spatial resolution of 0.463 m. It contains 270 spectral bands spanning a wavelength range of 400&#x2013;1000 nm. A total of nine land-cover categories are present in the dataset, among which seven crop types are selected as the target classes for this study, while the remaining categories are assigned to an &#x201c;other&#x201d; class.</p>
</sec>
</sec>
<sec id="s4">
<label>4</label>
<title>Experimental settings and results</title>
<sec id="s4_1">
<label>4.1</label>
<title>Experimental settings</title>
<p>(1) Parameter settings: To ensure fairness and reproducibility, all experiments were conducted on a workstation equipped with an Intel(R) Core(TM) i9-14900KF CPU, an NVIDIA GeForce RTX 5090 GPU, and 128 GB RAM. All models were implemented and trained using the PyTorch deep learning framework. The number of training epochs was set to 500. Each experiment was repeated five times, and the average performance was reported to improve the reliability of the results.</p>
<p>(2) Patch size configuration: Considering the trade-off among computational cost, classification accuracy, and the ability to capture both local details and contextual information within hyperspectral imagery, an input patch size of <inline-formula>
<mml:math display="inline" id="im41"><mml:mrow><mml:mn>11</mml:mn><mml:mo>&#xd7;</mml:mo><mml:mn>11</mml:mn></mml:mrow></mml:math></inline-formula> was selected for all experiments.</p>
<p>(3) Sample proportion settings: For the WHU-LongKou and WHU-HongHu datasets, an equal number of training samples was adopted for fairness. Specifically, five random groups of samples were selected, with each group containing 20 pixels for training and 300 pixels for validation, while the remaining pixels were used as the test set. For the Matiwan Village dataset, five random groups were also selected, each containing 100 pixels for training and 300 pixels for validation, with the remaining pixels used for testing. This setting allows us to evaluate the model&#x2019;s performance under balanced limited sample scenarios.</p>
<p>(4) Learning rate settings: In this study, five learning rates&#x2014; <inline-formula>
<mml:math display="inline" id="im42"><mml:mrow><mml:mn>1</mml:mn><mml:mo>&#xd7;</mml:mo><mml:msup><mml:mrow><mml:mn>10</mml:mn></mml:mrow><mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mn>3</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>, <inline-formula>
<mml:math display="inline" id="im43"><mml:mrow><mml:mn>1</mml:mn><mml:mo>&#xd7;</mml:mo><mml:msup><mml:mrow><mml:mn>10</mml:mn></mml:mrow><mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mn>4</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>, <inline-formula>
<mml:math display="inline" id="im44"><mml:mrow><mml:mn>5</mml:mn><mml:mo>&#xd7;</mml:mo><mml:msup><mml:mrow><mml:mn>10</mml:mn></mml:mrow><mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mn>4</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>, <inline-formula>
<mml:math display="inline" id="im45"><mml:mrow><mml:mn>1</mml:mn><mml:mo>&#xd7;</mml:mo><mml:msup><mml:mrow><mml:mn>10</mml:mn></mml:mrow><mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mn>5</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>, and <inline-formula>
<mml:math display="inline" id="im46"><mml:mrow><mml:mn>5</mml:mn><mml:mo>&#xd7;</mml:mo><mml:msup><mml:mrow><mml:mn>10</mml:mn></mml:mrow><mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mn>5</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula> &#x2014;were tested on the three datasets. After extensive experimentation, a learning rate of <inline-formula>
<mml:math display="inline" id="im47"><mml:mrow><mml:mn>1</mml:mn><mml:mo>&#xd7;</mml:mo><mml:msup><mml:mrow><mml:mn>10</mml:mn></mml:mrow><mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mn>4</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula> was determined to be the optimal choice.</p>
<p>(5) Evaluation metrics::Overall accuracy (OA), average accuracy (AA), and the Kappa coefficient are widely used metrics for evaluating classification performance.</p>
<p>OA measures the percentage of correctly classified pixels relative to the total number of pixels in the test set, reflecting the overall classification performance. It is computed as (<xref ref-type="disp-formula" rid="eq14">Equation 14</xref>):</p>
<disp-formula id="eq14"><label>(14)</label>
<mml:math display="block" id="M14"><mml:mrow><mml:mtable equalrows="true" equalcolumns="true"><mml:mtr><mml:mtd><mml:mrow><mml:mtext>O</mml:mtext><mml:mtext>A</mml:mtext><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:msubsup><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>k</mml:mi></mml:msubsup><mml:msub><mml:mrow><mml:mtext>TP</mml:mtext></mml:mrow><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mtext>N</mml:mtext></mml:mfrac><mml:mo>&#xd7;</mml:mo><mml:mn>100</mml:mn><mml:mo>%</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im48"><mml:mi>k</mml:mi></mml:math></inline-formula> denotes the number of crop categories, <inline-formula>
<mml:math display="inline" id="im49"><mml:mrow><mml:mi>T</mml:mi><mml:msub><mml:mi>P</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> represents the number of correctly classified pixels for class <inline-formula>
<mml:math display="inline" id="im50"><mml:mi>i</mml:mi></mml:math></inline-formula>, and <inline-formula>
<mml:math display="inline" id="im51"><mml:mi>N</mml:mi></mml:math></inline-formula> is the total number of pixels in the test set.</p>
<p>AA represents the arithmetic mean of the classification accuracy of each class, helping to reduce the influence of class imbalance. It is defined as (<xref ref-type="disp-formula" rid="eq15">Equation 15</xref>):</p>
<disp-formula id="eq15"><label>(15)</label>
<mml:math display="block" id="M15"><mml:mrow><mml:mtable equalrows="true" equalcolumns="true"><mml:mtr><mml:mtd><mml:mrow><mml:mtext>A</mml:mtext><mml:mtext>A</mml:mtext><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mtext>k</mml:mtext></mml:mfrac><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>k</mml:mi></mml:munderover><mml:mo>(</mml:mo><mml:mfrac><mml:mrow><mml:msub><mml:mrow><mml:mtext>TP</mml:mtext></mml:mrow><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mrow><mml:msub><mml:mi>n</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:mfrac><mml:mo>)</mml:mo><mml:mo>&#xd7;</mml:mo><mml:mn>100</mml:mn><mml:mo>%</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im52"><mml:mrow><mml:msub><mml:mi>n</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> refers to the number of true pixels belonging to class <inline-formula>
<mml:math display="inline" id="im53"><mml:mi>i</mml:mi></mml:math></inline-formula> in the test set.</p>
<p>The Kappa coefficient is an agreement measure that evaluates the consistency between classification results and ground truth while accounting for agreement occurring by chance. A higher Kappa value indicates more reliable classification performance. It is calculated as (<xref ref-type="disp-formula" rid="eq16">Equation 16</xref>):</p>
<disp-formula id="eq16"><label>(16)</label>
<mml:math display="block" id="M16"><mml:mrow><mml:mtable equalrows="true" equalcolumns="true"><mml:mtr><mml:mtd><mml:mrow><mml:mtext>K</mml:mtext><mml:mtext>a</mml:mtext><mml:mtext>p</mml:mtext><mml:mtext>p</mml:mtext><mml:mtext>a</mml:mtext><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mtext>OA</mml:mtext><mml:mo>&#xa0;&#x2212;&#xa0;</mml:mo><mml:msub><mml:mtext>p</mml:mtext><mml:mi>e</mml:mi></mml:msub></mml:mrow><mml:mrow><mml:mn>1</mml:mn><mml:mo>&#xa0;&#x2212;&#xa0;</mml:mo><mml:msub><mml:mtext>p</mml:mtext><mml:mi>e</mml:mi></mml:msub></mml:mrow></mml:mfrac></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>where the expected agreement <inline-formula>
<mml:math display="inline" id="im54"><mml:mrow><mml:msub><mml:mi>p</mml:mi><mml:mi>e</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> under random classification is given by (<xref ref-type="disp-formula" rid="eq17">Equation 17</xref>):</p>
<disp-formula id="eq17"><label>(17)</label>
<mml:math display="block" id="M17"><mml:mrow><mml:mtable equalrows="true" equalcolumns="true"><mml:mtr><mml:mtd><mml:mrow><mml:msub><mml:mi>p</mml:mi><mml:mi>e</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>k</mml:mi></mml:munderover><mml:mfrac><mml:mrow><mml:msub><mml:mtext>n</mml:mtext><mml:mi>i</mml:mi></mml:msub><mml:mo>&#xd7;</mml:mo><mml:msub><mml:mtext>m</mml:mtext><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mrow><mml:msup><mml:mtext>N</mml:mtext><mml:mn>2</mml:mn></mml:msup></mml:mrow></mml:mfrac></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:math>
</disp-formula>
<p>Here, <inline-formula>
<mml:math display="inline" id="im55"><mml:mrow><mml:msub><mml:mi>m</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> denotes the number of pixels predicted as class <inline-formula>
<mml:math display="inline" id="im56"><mml:mi>i</mml:mi></mml:math></inline-formula> in the classification results.</p>
</sec>
<sec id="s4_2">
<label>4.2</label>
<title>Experimental results</title>
<p>We compare the proposed method with six state-of-the-art deep learning&#x2013;based HSI classification approaches, including conventional CNN-based models such as 3D-CNN (<xref ref-type="bibr" rid="B35">Zunair et&#xa0;al., 2020</xref>), However, since the classification accuracy of 3D-CNN for each category lags behind that of other advanced models, it is not included in the comparison table. As well as Transformer-based methods such as SSFTT (<xref ref-type="bibr" rid="B26">Sun et&#xa0;al., 2022</xref>), MorphFormer (<xref ref-type="bibr" rid="B24">Roy et&#xa0;al., 2023</xref>), GAHT (<xref ref-type="bibr" rid="B21">Mei et&#xa0;al., 2022</xref>), GSC-ViT (<xref ref-type="bibr" rid="B34">Zhao et&#xa0;al., 2024</xref>), CTMixer (<xref ref-type="bibr" rid="B32">Zhang et&#xa0;al., 2022</xref>), and WD-SSMamba (<xref ref-type="bibr" rid="B33">Zhang et&#xa0;al., 2025</xref>).</p>
<p>SSFTT captures spectral&#x2013;spatial representations and higher-level semantic features through a spectral&#x2013;spatial feature tokenization transformer. MorphFormer is a learnable spectral&#x2013;spatial morphological network that improves interaction between structural and shape-related information in HSI labeling. GAHT introduces a group-aware pixel embedding module that constrains MHSA within local spectral&#x2013;spatial contexts. GSC-ViT incorporates a grouped separable convolution (GSC) module that significantly reduces convolutional parameters while effectively capturing local spectral&#x2013;spatial information. CTMixer adopts a dual-branch architecture combining CNNs and Transformers to jointly extract local and global hyperspectral features. Mamba-based models, including WD-SSMamba, utilize a state-space model architecture, which dynamically captures long-range dependencies while reducing computational complexity through efficient token generation. The Mamba framework facilitates lightweight and effective modeling of spectral and spatial features in hyperspectral image classification tasks.</p>
<sec id="s4_2_1">
<label>4.2.1</label>
<title>Matiwan Village dataset</title>
<p>The classification performance of MDPC-Net and the six comparison models on the Matiwan Village dataset is summarized in <xref ref-type="table" rid="T1"><bold>Table&#xa0;1</bold></xref>. MDPC-Net achieves an OA of 88.82%, an AA of 94.20%, and a Kappa coefficient of 0.8702 across 16 crop categories, demonstrating strong overall classification capability under limited-sample conditions. MDPC-Net attains the highest classification accuracy for 15 out of 16 crop types, confirming the effectiveness of the proposed multi-dimensional feature coupling strategy.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Accuracy evaluation of MDPC-Net vs. six comparative models on the matiwan village dataset.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" colspan="2" align="center">Label</th>
<th valign="middle" align="center">SSFTT</th>
<th valign="middle" align="center">GAHT</th>
<th valign="middle" align="center">Morphformer</th>
<th valign="middle" align="center">GSC-Vit</th>
<th valign="middle" align="center">CTMixer</th>
<th valign="middle" align="center">WD-SSMamb</th>
<th valign="middle" align="center">MDPC-Net</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">Rice</td>
<td valign="middle" align="center">98.77 &#xb1; 0.52</td>
<td valign="middle" align="center">98.48 &#xb1; 0.47</td>
<td valign="middle" align="center">98.74 &#xb1; 0.53</td>
<td valign="middle" align="center">98.70 &#xb1; 0.43</td>
<td valign="middle" align="center">99.36 &#xb1; 0.51</td>
<td valign="middle" align="center">99.39 &#xb1; 1.52</td>
<td valign="middle" align="center"><bold>99.63 &#xb1; 0.47</bold></td>
</tr>
<tr>
<td valign="middle" align="center">2</td>
<td valign="middle" align="center">Rice Stubble</td>
<td valign="middle" align="center">99.43 &#xb1; 0.37</td>
<td valign="middle" align="center">99.37 &#xb1; 0.50</td>
<td valign="middle" align="center">99.16 &#xb1; 0.62</td>
<td valign="middle" align="center">98.93 &#xb1; 1.02</td>
<td valign="middle" align="center">99.63 &#xb1; 0.33</td>
<td valign="middle" align="center">99.41 &#xb1; 1.08</td>
<td valign="middle" align="center"><bold>99.82 &#xb1; 0.18</bold></td>
</tr>
<tr>
<td valign="middle" align="center">3</td>
<td valign="middle" align="center">Willow</td>
<td valign="middle" align="center">85.01 &#xb1; 1.30</td>
<td valign="middle" align="center">81.84 &#xb1; 1.72</td>
<td valign="middle" align="center">79.10 &#xb1; 1.78</td>
<td valign="middle" align="center">81.78 &#xb1; 1.85</td>
<td valign="middle" align="center">86.56 &#xb1; 2.19</td>
<td valign="middle" align="center">96.02 &#xb1; 1.65</td>
<td valign="middle" align="center"><bold>96.29 &#xb1; 0.57</bold></td>
</tr>
<tr>
<td valign="middle" align="center">4</td>
<td valign="middle" align="center">Vegetable Field</td>
<td valign="middle" align="center">77.17 &#xb1; 4.18</td>
<td valign="middle" align="center">76.52 &#xb1; 3.42</td>
<td valign="middle" align="center">73.38 &#xb1; 1.07</td>
<td valign="middle" align="center">77.37 &#xb1; 4.53</td>
<td valign="middle" align="center">78.22 &#xb1; 2.54</td>
<td valign="middle" align="center">92.24 &#xb1; 1.89</td>
<td valign="middle" align="center"><bold>92.54 &#xb1; 2.59</bold></td>
</tr>
<tr>
<td valign="middle" align="center">5</td>
<td valign="middle" align="center">Compound-Leaf Maple</td>
<td valign="middle" align="center">76.78 &#xb1; 1.16</td>
<td valign="middle" align="center">73.10 &#xb1; 2.04</td>
<td valign="middle" align="center">73.21 &#xb1; 2.28</td>
<td valign="middle" align="center">74.94 &#xb1; 3.12</td>
<td valign="middle" align="center">81.52 &#xb1; 2.05</td>
<td valign="middle" align="center">91.43 &#xb1; 1.12</td>
<td valign="middle" align="center"><bold>91.83 &#xb1; 1.15</bold></td>
</tr>
<tr>
<td valign="middle" align="center">6</td>
<td valign="middle" align="center">Ash Tree</td>
<td valign="middle" align="center">74.18 &#xb1; 2.31</td>
<td valign="middle" align="center">76.50 &#xb1; 4.24</td>
<td valign="middle" align="center">76.29 &#xb1; 1.68</td>
<td valign="middle" align="center">77.15 &#xb1; 3.85</td>
<td valign="middle" align="center">81.14 &#xb1; 2.71</td>
<td valign="middle" align="center">90.45 &#xb1; 0.74</td>
<td valign="middle" align="center"><bold>90.87 &#xb1; 1.43</bold></td>
</tr>
<tr>
<td valign="middle" align="center">7</td>
<td valign="middle" align="center">Maize</td>
<td valign="middle" align="center">74.82 &#xb1; 2.75</td>
<td valign="middle" align="center">73.34 &#xb1; 4.05</td>
<td valign="middle" align="center">71.50 &#xb1; 2.92</td>
<td valign="middle" align="center">70.64 &#xb1; 4.11</td>
<td valign="middle" align="center">81.68 &#xb1; 2.01</td>
<td valign="middle" align="center">89.82 &#xb1; 0.84</td>
<td valign="middle" align="center"><bold>90.01 &#xb1; 1.82</bold></td>
</tr>
<tr>
<td valign="middle" align="center">8</td>
<td valign="middle" align="center">Poplar</td>
<td valign="middle" align="center">88.44 &#xb1; 1.14</td>
<td valign="middle" align="center">88.29 &#xb1; 1.89</td>
<td valign="middle" align="center">87.14 &#xb1; 2.00</td>
<td valign="middle" align="center">87.20 &#xb1; 0.79</td>
<td valign="middle" align="center">89.33 &#xb1; 2.10</td>
<td valign="middle" align="center">94.96 &#xb1; 1.20</td>
<td valign="middle" align="center"><bold>95.45 &#xb1; 0.56</bold></td>
</tr>
<tr>
<td valign="middle" align="center">9</td>
<td valign="middle" align="center">Hovenia</td>
<td valign="middle" align="center">89.02 &#xb1; 2.18</td>
<td valign="middle" align="center">91.61 &#xb1; 1.48</td>
<td valign="middle" align="center">90.54 &#xb1; 1.45</td>
<td valign="middle" align="center">90.87 &#xb1; 0.98</td>
<td valign="middle" align="center">92.96 &#xb1; 2.80</td>
<td valign="middle" align="center">98.19 &#xb1; 0.63</td>
<td valign="middle" align="center"><bold>98.37 &#xb1; 0.58</bold></td>
</tr>
<tr>
<td valign="middle" align="center">10</td>
<td valign="middle" align="center">Chinese Scholar Tree</td>
<td valign="middle" align="center">68.28 &#xb1; 2.67</td>
<td valign="middle" align="center">68.25 &#xb1; 1.67</td>
<td valign="middle" align="center">66.27 &#xb1; 4.78</td>
<td valign="middle" align="center">66.42 &#xb1; 3.79</td>
<td valign="middle" align="center">70.11 &#xb1; 5.26</td>
<td valign="middle" align="center">87.98 &#xb1; 1.92</td>
<td valign="middle" align="center"><bold>88.29 &#xb1; 1.21</bold></td>
</tr>
<tr>
<td valign="middle" align="center">11</td>
<td valign="middle" align="center">Peach Tree</td>
<td valign="middle" align="center">87.65 &#xb1; 2.10</td>
<td valign="middle" align="center">88.09 &#xb1; 2.99</td>
<td valign="middle" align="center">86.10 &#xb1; 2.81</td>
<td valign="middle" align="center">88.94 &#xb1; 3.63</td>
<td valign="middle" align="center">89.35 &#xb1; 1.64</td>
<td valign="middle" align="center">97.05 &#xb1; 1.15</td>
<td valign="middle" align="center"><bold>97.17 &#xb1; 0.87</bold></td>
</tr>
<tr>
<td valign="middle" align="center">12</td>
<td valign="middle" align="center">Pear Tree</td>
<td valign="middle" align="center">62.14 &#xb1; 1.65</td>
<td valign="middle" align="center">59.46 &#xb1; 2.81</td>
<td valign="middle" align="center">57.38 &#xb1; 2.87</td>
<td valign="middle" align="center">59.85 &#xb1; 7.02</td>
<td valign="middle" align="center">62.46 &#xb1; 3.61</td>
<td valign="middle" align="center">78.35 &#xb1; 0.55</td>
<td valign="middle" align="center"><bold>78.50 &#xb1; 2.00</bold></td>
</tr>
<tr>
<td valign="middle" align="center">13</td>
<td valign="middle" align="center">Soybean</td>
<td valign="middle" align="center">99.44 &#xb1; 0.78</td>
<td valign="middle" align="center">98.74 &#xb1; 0.78</td>
<td valign="middle" align="center">99.02 &#xb1; 0.26</td>
<td valign="middle" align="center">99.09 &#xb1; 0.52</td>
<td valign="middle" align="center">99.51 &#xb1; 0.52</td>
<td valign="middle" align="center">99.47 &#xb1; 1.64</td>
<td valign="middle" align="center"><bold>99.86 &#xb1; 0.17</bold></td>
</tr>
<tr>
<td valign="middle" align="center">14</td>
<td valign="middle" align="center">Phoebe Tree</td>
<td valign="middle" align="center">96.77 &#xb1; 0.50</td>
<td valign="middle" align="center">96.64 &#xb1; 1.15</td>
<td valign="middle" align="center">96.72 &#xb1; 0.69</td>
<td valign="middle" align="center">97.49 &#xb1; 0.80</td>
<td valign="middle" align="center">96.65 &#xb1; 0.52</td>
<td valign="middle" align="center">98.5 &#xb1; 0.70</td>
<td valign="middle" align="center"><bold>98.64 &#xb1; 0.24</bold></td>
</tr>
<tr>
<td valign="middle" align="center">15</td>
<td valign="middle" align="center">Elm Tree</td>
<td valign="middle" align="center">96.24 &#xb1; 0.39</td>
<td valign="middle" align="center">96.06 &#xb1; 1.27</td>
<td valign="middle" align="center">94.10 &#xb1; 1.19</td>
<td valign="middle" align="center">94.28 &#xb1; 0.88</td>
<td valign="middle" align="center">96.97 &#xb1; 1.06</td>
<td valign="middle" align="center">98.97 &#xb1; 0.66</td>
<td valign="middle" align="center"><bold>99.41 &#xb1; 0.41</bold></td>
</tr>
<tr>
<td valign="middle" align="center">16</td>
<td valign="middle" align="center">Others</td>
<td valign="middle" align="center">84.99 &#xb1; 1.32</td>
<td valign="middle" align="center">83.70 &#xb1; 1.26</td>
<td valign="middle" align="center">84.85 &#xb1; 1.34</td>
<td valign="middle" align="center">84.60 &#xb1; 1.20</td>
<td valign="middle" align="center">86.52 &#xb1; 1.19</td>
<td valign="middle" align="center">90.2 &#xb1; 1.04</td>
<td valign="middle" align="center"><bold>90.49 &#xb1; 1.64</bold></td>
</tr>
<tr>
<td valign="middle" colspan="2" align="center">OA</td>
<td valign="middle" align="center">76.47 &#xb1; 0.36</td>
<td valign="middle" align="center">75.16 &#xb1; 1.28</td>
<td valign="middle" align="center">74.03 &#xb1; 1.04</td>
<td valign="middle" align="center">78.26 &#xb1; 1.76</td>
<td valign="middle" align="center">75.26 &#xb1; 3.14</td>
<td valign="middle" align="center"><bold>88.66 &#xb1; 1.29</bold></td>
<td valign="middle" align="center"><bold>88.82 &#xb1; 0.75</bold></td>
</tr>
<tr>
<td valign="middle" colspan="2" align="center">AA</td>
<td valign="middle" align="center">84.95 &#xb1; 0.19</td>
<td valign="middle" align="center">84.38 &#xb1; 0.83</td>
<td valign="middle" align="center">83.34 &#xb1; 0.73</td>
<td valign="middle" align="center">87.00 &#xb1; 0.69</td>
<td valign="middle" align="center">84.26 &#xb1; 1.32</td>
<td valign="middle" align="center"><bold>93.86 &#xb1; 0.52</bold></td>
<td valign="middle" align="center"><bold>94.20 &#xb1; 0.26</bold></td>
</tr>
<tr>
<td valign="middle" colspan="2" align="center">K&#xd7;100</td>
<td valign="middle" align="center">73.11 &#xb1; 0.38</td>
<td valign="middle" align="center">71.64 &#xb1; 1.39</td>
<td valign="middle" align="center">70.46 &#xb1; 1.12</td>
<td valign="middle" align="center">75.12 &#xb1; 1.91</td>
<td valign="middle" align="center">71.80 &#xb1; 3.35</td>
<td valign="middle" align="center"><bold>86.77 &#xb1; 0.83</bold></td>
<td valign="middle" align="center"><bold>87.02 &#xb1; 0.84</bold></td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The bold values indicate the highest accuracy results among the compared methods for each corresponding metric.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>Nevertheless, a small number of spectrally similar classes still exhibit relatively lower classification accuracy. For instance, the accuracies for pear tree and Chinese scholar tree are 78.50% and 88.29%, respectively. This performance degradation can be primarily attributed to substantial spectral overlap between these vegetation types, which share similar canopy structures, biochemical compositions, and phenological characteristics in the Matiwan Village region. Under such conditions, even high-dimensional hyperspectral features provide limited inter-class separability, making precise discrimination inherently challenging. Although MDPC-Net still outperforms most comparison models on these classes, the remaining performance gap highlights an intrinsic limitation imposed by spectral similarity rather than model insufficiency. From a methodological perspective, this observation suggests that classification performance for highly spectrally similar crops may benefit from incorporating additional discriminative cues, such as multi-temporal information, fine-grained texture descriptors, or domain-specific prior knowledge (e.g., phenological constraints). Future extensions of the proposed framework could explore temporal-aware feature modeling or adaptive class-specific refinement mechanisms to further enhance discrimination among such challenging categories. The qualitative classification results are illustrated in <xref ref-type="fig" rid="f8"><bold>Figure&#xa0;8</bold></xref>. Compared with the ground-truth map (<xref ref-type="fig" rid="f8"><bold>Figure&#xa0;8a</bold></xref>), methods such as WD-SSMamba (<xref ref-type="fig" rid="f8"><bold>Figure&#xa0;8c</bold></xref>) and SSFTT (<xref ref-type="fig" rid="f8"><bold>Figure&#xa0;8d</bold></xref>) tend to generate fragmented predictions in large homogeneous regions, erroneously splitting continuous crop fields into multiple categories. This phenomenon indicates limited robustness in maintaining spatial consistency, particularly under complex agricultural layouts. GSC-ViT (<xref ref-type="fig" rid="f8"><bold>Figure&#xa0;8g</bold></xref>) better captures large-scale structural patterns but still suffers from noticeable loss of local details, especially near field boundaries.</p>
<fig id="f8" position="float">
<label>Figure&#xa0;8</label>
<caption>
<p>Sample classification results of different models on the matiwan village dataset. The numerical labels and their corresponding color patches indicate distinct crop categories, with the specific crop type for each label detailed in <xref ref-type="fig" rid="f5"><bold>Figure&#xa0;5</bold></xref>.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1773924-g008.tif">
<alt-text content-type="machine-generated">Comparison of eight segmented maps labeled (a) to (h) representing different methods: Ground Truth, MDPC-Net, WD-SSMamb, SSFTT, GAHT, Morphformer, GSC-Vit, and CTMixer. Each map uses a key of sixteen colors representing various categories.</alt-text>
</graphic></fig>
<p>In contrast, MDPC-Net (<xref ref-type="fig" rid="f8"><bold>Figure&#xa0;8b</bold></xref>) demonstrates superior spatial coherence and structural fidelity. Its predictions exhibit strong alignment with the ground truth across diverse spatial patterns, including large continuous green regions, regular purple and brown block-like fields, and smaller scattered blue and red patches. This improvement can be attributed to the proposed multi-dimensional progressive feature extraction and linear-projection pyramid fusion, which jointly preserve both global contextual semantics and fine-grained local structures.</p>
<p>Furthermore, Mamba-based models, such as WD-SSMamba (<xref ref-type="fig" rid="f8"><bold>Figure&#xa0;8c</bold></xref>), show noticeable improvements over conventional CNN- and Transformer-based approaches by more effectively modeling long-range dependencies while maintaining computational efficiency. However, compared with MDPC-Net, their spatial predictions still exhibit occasional inconsistencies in boundary delineation and local detail preservation. This comparison highlights that while Mamba-based architectures provide an efficient mechanism for global dependency modeling, the explicit multi-dimensional feature coupling and hierarchical fusion strategy employed by MDPC-Net is more effective in addressing the complex spectral&#x2013;spatial characteristics of agricultural scenes.</p>
</sec>
<sec id="s4_2_2">
<label>4.2.2</label>
<title>WHU-HongHu dataset</title>
<p>The classification performance on the WHU-HongHu dataset is summarized in <xref ref-type="table" rid="T2"><bold>Table&#xa0;2</bold></xref>. MDPC-Net achieves an OA of 89.95%, an AA of 89.63%, and a Kappa coefficient of 0.8730 across 18 crop categories, demonstrating strong overall performance on this large-scale and fine-grained agricultural dataset. MDPC-Net outperforms the six comparison methods for most crop types, confirming its robustness under complex class distributions and limited training samples.</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Accuracy evaluation of MDPC-Net versus six comparative models on the WHU-HongHu dataset.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" colspan="2" align="center">Label</th>
<th valign="middle" align="center">SSFTT</th>
<th valign="middle" align="center">GAHT</th>
<th valign="middle" align="center">Morphformer</th>
<th valign="middle" align="center">GSC-Vit</th>
<th valign="middle" align="center">CTMixer</th>
<th valign="middle" align="center">WD-SSMamba</th>
<th valign="middle" align="center">MDPC-Net</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">Cotton</td>
<td valign="middle" align="center">93.55 &#xb1; 2.00</td>
<td valign="middle" align="center">93.32 &#xb1; 1.86</td>
<td valign="middle" align="center">95.67 &#xb1; 1.20</td>
<td valign="middle" align="center">95.68 &#xb1; 1.69</td>
<td valign="middle" align="center"><bold>96.75 &#xb1; 1.42</bold></td>
<td valign="middle" align="center">94.94 &#xb1; 1.42</td>
<td valign="middle" align="center">96.24 &#xb1; 1.42</td>
</tr>
<tr>
<td valign="middle" align="center">2</td>
<td valign="middle" align="center">Cotton Stubble</td>
<td valign="middle" align="center">91.54 &#xb1; 2.95</td>
<td valign="middle" align="center">90.43 &#xb1; 2.73</td>
<td valign="middle" align="center">92.73 &#xb1; 3.86</td>
<td valign="middle" align="center">92.18 &#xb1; 3.06</td>
<td valign="middle" align="center">92.28 &#xb1; 1.42</td>
<td valign="middle" align="center">95.09 &#xb1; 1.8</td>
<td valign="middle" align="center"><bold>95.38 &#xb1; 1.80</bold></td>
</tr>
<tr>
<td valign="middle" align="center">3</td>
<td valign="middle" align="center">Rapeseed</td>
<td valign="middle" align="center">91.44 &#xb1; 1.85</td>
<td valign="middle" align="center">87.64 &#xb1; 2.96</td>
<td valign="middle" align="center">89.79 &#xb1; 3.13</td>
<td valign="middle" align="center">88.93 &#xb1; 1.50</td>
<td valign="middle" align="center">90.73 &#xb1; 2.34</td>
<td valign="middle" align="center">93.96 &#xb1; 1.67</td>
<td valign="middle" align="center"><bold>94.12 &#xb1; 3.12</bold></td>
</tr>
<tr>
<td valign="middle" align="center">4</td>
<td valign="middle" align="center">Chinese Cabbage</td>
<td valign="middle" align="center">66.10 &#xb1; 5.30</td>
<td valign="middle" align="center">61.54 &#xb1; 4.91</td>
<td valign="middle" align="center">61.99 &#xb1; 2.90</td>
<td valign="middle" align="center">67.10 &#xb1; 3.97</td>
<td valign="middle" align="center">66.70 &#xb1; 2.57</td>
<td valign="middle" align="center">72.85 &#xb1; 1.38</td>
<td valign="middle" align="center"><bold>73.09 &#xb1; 3.65</bold></td>
</tr>
<tr>
<td valign="middle" align="center">5</td>
<td valign="middle" align="center">Pakchoi</td>
<td valign="middle" align="center">68.72 &#xb1; 3.09</td>
<td valign="middle" align="center">57.23 &#xb1; 7.23</td>
<td valign="middle" align="center">67.85 &#xb1; 10.04</td>
<td valign="middle" align="center">64.05 &#xb1; 6.77</td>
<td valign="middle" align="center">68.20 &#xb1; 5.79</td>
<td valign="middle" align="center"><bold>80.46 &#xb1; 0.67</bold></td>
<td valign="middle" align="center">80.27 &#xb1; 3.86</td>
</tr>
<tr>
<td valign="middle" align="center">6</td>
<td valign="middle" align="center">Cabbage</td>
<td valign="middle" align="center">97.23 &#xb1; 1.51</td>
<td valign="middle" align="center">94.29 &#xb1; 2.57</td>
<td valign="middle" align="center">97.41 &#xb1; 0.40</td>
<td valign="middle" align="center">95.59 &#xb1; 1.70</td>
<td valign="middle" align="center">97.25 &#xb1; 1.15</td>
<td valign="middle" align="center">97.53 &#xb1; 1.0</td>
<td valign="middle" align="center"><bold>97.89 &#xb1; 1.02</bold></td>
</tr>
<tr>
<td valign="middle" align="center">7</td>
<td valign="middle" align="center">Kohlrabi</td>
<td valign="middle" align="center">85.55 &#xb1; 3.12</td>
<td valign="middle" align="center">73.83 &#xb1; 4.55</td>
<td valign="middle" align="center">84.16 &#xb1; 6.87</td>
<td valign="middle" align="center">78.18 &#xb1; 3.61</td>
<td valign="middle" align="center">81.19 &#xb1; 2.88</td>
<td valign="middle" align="center">87.40 &#xb1; 1.54</td>
<td valign="middle" align="center"><bold>87.70 &#xb1; 3.65</bold></td>
</tr>
<tr>
<td valign="middle" align="center">8</td>
<td valign="middle" align="center">Cucumber</td>
<td valign="middle" align="center">69.61 &#xb1; 4.29</td>
<td valign="middle" align="center">62.71 &#xb1; 4.51</td>
<td valign="middle" align="center">66.54 &#xb1; 6.25</td>
<td valign="middle" align="center">70.78 &#xb1; 5.38</td>
<td valign="middle" align="center">72.29 &#xb1; 5.08</td>
<td valign="middle" align="center">80.21 &#xb1; 1.89</td>
<td valign="middle" align="center"><bold>80.37 &#xb1; 4.62</bold></td>
</tr>
<tr>
<td valign="middle" align="center">9</td>
<td valign="middle" align="center">Mustard Greens</td>
<td valign="middle" align="center">74.27 &#xb1; 4.62</td>
<td valign="middle" align="center">70.36 &#xb1; 7.12</td>
<td valign="middle" align="center">74.11 &#xb1; 4.86</td>
<td valign="middle" align="center">75.62 &#xb1; 5.61</td>
<td valign="middle" align="center">73.58 &#xb1; 2.97</td>
<td valign="middle" align="center">77.42 &#xb1; 1.06</td>
<td valign="middle" align="center"><bold>77.56 &#xb1; 5.18</bold></td>
</tr>
<tr>
<td valign="middle" align="center">10</td>
<td valign="middle" align="center">Baby Mustard</td>
<td valign="middle" align="center">67.55 &#xb1; 5.04</td>
<td valign="middle" align="center">61.71 &#xb1; 5.88</td>
<td valign="middle" align="center">66.78 &#xb1; 6.48</td>
<td valign="middle" align="center">67.15 &#xb1; 3.30</td>
<td valign="middle" align="center">63.53 &#xb1; 4.72</td>
<td valign="middle" align="center">71.23 &#xb1; 1.17</td>
<td valign="middle" align="center"><bold>72.76 &#xb1; 4.58</bold></td>
</tr>
<tr>
<td valign="middle" align="center">11</td>
<td valign="middle" align="center">Spinach</td>
<td valign="middle" align="center">90.09 &#xb1; 2.71</td>
<td valign="middle" align="center">77.24 &#xb1; 4.75</td>
<td valign="middle" align="center">88.17 &#xb1; 0.90</td>
<td valign="middle" align="center">81.52 &#xb1; 4.81</td>
<td valign="middle" align="center">87.00 &#xb1; 1.80</td>
<td valign="middle" align="center">93.38 &#xb1; 0.72</td>
<td valign="middle" align="center"><bold>93.58 &#xb1; 1.22</bold></td>
</tr>
<tr>
<td valign="middle" align="center">12</td>
<td valign="middle" align="center">Chicory</td>
<td valign="middle" align="center">97.30 &#xb1; 1.72</td>
<td valign="middle" align="center">95.50 &#xb1; 4.15</td>
<td valign="middle" align="center">95.90 &#xb1; 3.53</td>
<td valign="middle" align="center">94.80 &#xb1; 3.04</td>
<td valign="middle" align="center">96.60 &#xb1; 2.52</td>
<td valign="middle" align="center">96.48 &#xb1; 1.56</td>
<td valign="middle" align="center"><bold>98.70 &#xb1; 1.08</bold></td>
</tr>
<tr>
<td valign="middle" align="center">13</td>
<td valign="middle" align="center">Mulched Lettuce</td>
<td valign="middle" align="center">92.07 &#xb1; 3.00</td>
<td valign="middle" align="center">89.71 &#xb1; 3.61</td>
<td valign="middle" align="center">92.44 &#xb1; 3.90</td>
<td valign="middle" align="center">90.45 &#xb1; 2.45</td>
<td valign="middle" align="center">93.06 &#xb1; 2.06</td>
<td valign="middle" align="center">93.73 &#xb1; 1.2</td>
<td valign="middle" align="center"><bold>94.44 &#xb1; 2.81</bold></td>
</tr>
<tr>
<td valign="middle" align="center">14</td>
<td valign="middle" align="center">Romaine Lettuce</td>
<td valign="middle" align="center">92.66 &#xb1; 3.47</td>
<td valign="middle" align="center">92.99 &#xb1; 3.58</td>
<td valign="middle" align="center">95.32 &#xb1; 3.00</td>
<td valign="middle" align="center">94.52 &#xb1; 3.99</td>
<td valign="middle" align="center">95.15 &#xb1; 2.75</td>
<td valign="middle" align="center">96.99 &#xb1; 1.39</td>
<td valign="middle" align="center"><bold>97.38 &#xb1; 2.45</bold></td>
</tr>
<tr>
<td valign="middle" align="center">15</td>
<td valign="middle" align="center">Carrot</td>
<td valign="middle" align="center">92.53 &#xb1; 2.19</td>
<td valign="middle" align="center">87.34 &#xb1; 3.91</td>
<td valign="middle" align="center">91.63 &#xb1; 3.47</td>
<td valign="middle" align="center">92.72 &#xb1; 2.67</td>
<td valign="middle" align="center">93.16 &#xb1; 3.32</td>
<td valign="middle" align="center">96.15 &#xb1; 1.33</td>
<td valign="middle" align="center"><bold>96.36 &#xb1; 1.32</bold></td>
</tr>
<tr>
<td valign="middle" align="center">16</td>
<td valign="middle" align="center">White Radish</td>
<td valign="middle" align="center">90.36 &#xb1; 2.34</td>
<td valign="middle" align="center">83.04 &#xb1; 4.73</td>
<td valign="middle" align="center">85.15 &#xb1; 2.20</td>
<td valign="middle" align="center">86.06 &#xb1; 4.11</td>
<td valign="middle" align="center">87.99 &#xb1; 4.38</td>
<td valign="middle" align="center">91.38 &#xb1; 0.55</td>
<td valign="middle" align="center"><bold>91.49 &#xb1; 3.89</bold></td>
</tr>
<tr>
<td valign="middle" align="center">17</td>
<td valign="middle" align="center">Garlic Sprout</td>
<td valign="middle" align="center">93.46 &#xb1; 1.91</td>
<td valign="middle" align="center">94.09 &#xb1; 3.01</td>
<td valign="middle" align="center">93.17 &#xb1; 2.97</td>
<td valign="middle" align="center">93.92 &#xb1; 2.31</td>
<td valign="middle" align="center">96.76 &#xb1; 2.20</td>
<td valign="middle" align="center">97.07 &#xb1; 1.09</td>
<td valign="middle" align="center"><bold>97.36 &#xb1; 0.93</bold></td>
</tr>
<tr>
<td valign="middle" align="center">18</td>
<td valign="middle" align="center">Common Bean</td>
<td valign="middle" align="center">96.08 &#xb1; 2.41</td>
<td valign="middle" align="center">90.49 &#xb1; 6.98</td>
<td valign="middle" align="center">95.70 &#xb1; 3.19</td>
<td valign="middle" align="center">94.11 &#xb1; 2.20</td>
<td valign="middle" align="center">94.49 &#xb1; 4.48</td>
<td valign="middle" align="center">97.12 &#xb1; 1.21</td>
<td valign="middle" align="center"><bold>97.89 &#xb1; 1.18</bold></td>
</tr>
<tr>
<td valign="middle" align="center">19</td>
<td valign="middle" align="center">Others</td>
<td valign="middle" align="center">75.57 &#xb1; 1.87</td>
<td valign="middle" align="center">76.66 &#xb1; 3.28</td>
<td valign="middle" align="center">78.20 &#xb1; 3.48</td>
<td valign="middle" align="center">77.46 &#xb1; 1.71</td>
<td valign="middle" align="center">77.57 &#xb1; 2.99</td>
<td valign="middle" align="center">78.94 &#xb1; 1.55</td>
<td valign="middle" align="center"><bold>80.35 &#xb1; 2.50</bold></td>
</tr>
<tr>
<td valign="middle" colspan="2" align="center">OA</td>
<td valign="middle" align="center">86.33 &#xb1; 1.11</td>
<td valign="middle" align="center">83.89 &#xb1; 1.17</td>
<td valign="middle" align="center">86.76 &#xb1; 1.33</td>
<td valign="middle" align="center">86.65 &#xb1; 1.12</td>
<td valign="middle" align="center">87.50 &#xb1; 0.70</td>
<td valign="middle" align="center">88.98 &#xb1; 1.48</td>
<td valign="middle" align="center"><bold>89.95 &#xb1; 0.90</bold></td>
</tr>
<tr>
<td valign="middle" colspan="2" align="center">AA</td>
<td valign="middle" align="center">85.56 &#xb1; 0.85</td>
<td valign="middle" align="center">81.06 &#xb1; 0.95</td>
<td valign="middle" align="center">84.88 &#xb1; 1.71</td>
<td valign="middle" align="center">84.25 &#xb1; 1.02</td>
<td valign="middle" align="center">85.49 &#xb1; 0.46</td>
<td valign="middle" align="center">88.99 &#xb1; 0.88</td>
<td valign="middle" align="center"><bold>89.63 &#xb1; 0.76</bold></td>
</tr>
<tr>
<td valign="middle" colspan="2" align="center">K&#xd7;100</td>
<td valign="middle" align="center">82.84 &#xb1; 1.32</td>
<td valign="middle" align="center">79.82 &#xb1; 1.39</td>
<td valign="middle" align="center">83.32 &#xb1; 1.64</td>
<td valign="middle" align="center">83.19 &#xb1; 1.35</td>
<td valign="middle" align="center">84.21 &#xb1; 0.83</td>
<td valign="middle" align="center">86.83 &#xb1; 0.51</td>
<td valign="middle" align="center"><bold>87.30 &#xb1; 1.09</bold></td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The bold values indicate the highest accuracy results among the compared methods for each corresponding metric.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>However, several spectrally and morphologically similar categories remain challenging to distinguish. For example, the classification accuracy for cotton is slightly lower than that of CTMixer, whereas cotton residues achieve higher accuracy than CTMixer. This phenomenon is mainly attributed to the high spectral similarity and transitional characteristics between cotton and cotton residues, especially during post-harvest stages, where residual vegetation and soil background introduce mixed spectral responses. Although MDPC-Net effectively leverages multi-dimensional contextual information, such subtle intra-class variability and inter-class overlap inherently limit discriminability under a single-date hyperspectral setting. Similarly, confusion is observed between Chinese cabbage and baby mustard choy, as well as mustard and small mustard, which exhibit closely related spectral signatures and comparable canopy structures. These crop types often share similar biochemical compositions and growth stages, leading to overlapping spectral&#x2013;spatial representations. While MDPC-Net still outperforms several baseline methods for these classes, the reduced performance relative to other crop categories highlights intrinsic classification difficulty driven by semantic proximity, rather than deficiencies in the proposed framework. From a methodological perspective, these observations suggest that further performance improvements for such confusing classes may require additional discriminative cues beyond single-scene spectral&#x2013;spatial features. Potential extensions include incorporating multi-temporal hyperspectral observations to exploit phenological differences, integrating object-level structural priors, or introducing class-adaptive feature refinement strategies that emphasize subtle intra-class variations. These directions could further enhance discrimination among crops with high spectral and morphological similarity.</p>
<p>The qualitative classification results are illustrated in <xref ref-type="fig" rid="f9"><bold>Figure&#xa0;9</bold></xref>. Compared with the ground-truth map (<xref ref-type="fig" rid="f9"><bold>Figure&#xa0;9a</bold></xref>), MDPC-Net (<xref ref-type="fig" rid="f9"><bold>Figure&#xa0;9b</bold></xref>) consistently produces classification maps with higher spatial coherence and visual fidelity than all comparison models. Among the Mamba-based methods, such as WD-SSMamba (<xref ref-type="fig" rid="f9"><bold>Figure&#xa0;9c</bold></xref>), improved modeling of structural patterns and spatial relationships can be observed when compared with traditional CNN-based approaches. Nevertheless, Mamba-based models still fall short of MDPC-Net in terms of fine-grained detail preservation and boundary accuracy.</p>
<fig id="f9" position="float">
<label>Figure&#xa0;9</label>
<caption>
<p>Sample classification results of different models on the WHU-HongHu dataset. The numerical labels and their corresponding color patches indicate distinct crop categories, with the specific crop type for each label detailed in <xref ref-type="fig" rid="f6"><bold>Figure&#xa0;6</bold></xref>.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1773924-g009.tif">
<alt-text content-type="machine-generated">Comparison of segmentation results across eight methods: (a) Ground Truth, (b) MDPC-Net, (c) WD-SSMamb, (d) SSFTT, (e) GAHT, (f) Morphformer, (g) GSC-Vit, and (h) CTMixer. Each uses a distinct color palette to classify regions, with a legend indicating nineteen color-coded categories on the right.</alt-text>
</graphic></fig>
<p>Specifically, while WD-SSMamba exhibits clearer region continuity and reduced noise, it still shows limitations in accurately delineating complex field boundaries and small, irregular plots. In contrast, MDPC-Net achieves more precise class transitions at field boundaries and maintains high correspondence with fine-scale ground-truth structures. This advantage is primarily attributed to the LPFPM module, which constructs a multi-scale feature pyramid through multi-dimensional residual connections and linear projections. By effectively integrating spectral, spatial, and spectral&#x2013;spatial features across scales, LPFPM enhances the model&#x2019;s adaptability to the heterogeneous and multi-scale characteristics inherent in real-world agricultural landscapes.</p>
</sec>
<sec id="s4_2_3">
<label>4.2.3</label>
<title>WHU-LongKou dataset</title>
<p>The accuracy evaluation of MDPC-Net and six competing models on the WHU-LongKou dataset is presented in <xref ref-type="table" rid="T3"><bold>Table&#xa0;3</bold></xref>. MDPC-Net achieves an OA of 95.70%, an AA of 97.22%, and a Kappa coefficient of 0.9411, indicating an overall high level of classification performance on this dataset. Among the six crop types, MDPC-Net yields slightly lower accuracy than SSFTT only for cotton, while achieving superior or comparable performance for the remaining categories.</p>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Accuracy evaluation of MDPC-Net versus six comparative models on the WHU-LongKou dataset.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" colspan="2" align="center">Label</th>
<th valign="middle" align="center">SSFTT</th>
<th valign="middle" align="center">GAHT</th>
<th valign="middle" align="center">Morphformer</th>
<th valign="middle" align="center">GSC-Vit</th>
<th valign="middle" align="center">CTMixer</th>
<th valign="middle" align="center">WD-SSMamb</th>
<th valign="middle" align="center">MDPC-Net</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">Corn</td>
<td valign="middle" align="center">98.66 &#xb1; 0.59</td>
<td valign="middle" align="center">98.21 &#xb1; 0.61</td>
<td valign="middle" align="center">96.97 &#xb1; 2.01</td>
<td valign="middle" align="center">98.42 &#xb1; 1.11</td>
<td valign="middle" align="center">99.30 &#xb1; 0.28</td>
<td valign="middle" align="center">99.03 &#xb1; 1.29</td>
<td valign="middle" align="center"><bold>99.52 &#xb1; 0.30</bold></td>
</tr>
<tr>
<td valign="middle" align="center">2</td>
<td valign="middle" align="center">Cotton</td>
<td valign="middle" align="center">98.58 &#xb1; 0.61</td>
<td valign="middle" align="center">89.65 &#xb1; 5.76</td>
<td valign="middle" align="center">96.63 &#xb1; 2.83</td>
<td valign="middle" align="center">95.07 &#xb1; 2.16</td>
<td valign="middle" align="center">95.79 &#xb1; 2.60</td>
<td valign="middle" align="center"><bold>98.95 &#xb1; 1.67</bold></td>
<td valign="middle" align="center">97.29 &#xb1; 1.10</td>
</tr>
<tr>
<td valign="middle" align="center">3</td>
<td valign="middle" align="center">Sesame</td>
<td valign="middle" align="center">97.66 &#xb1; 0.70</td>
<td valign="middle" align="center">96.30 &#xb1; 0.90</td>
<td valign="middle" align="center">94.55 &#xb1; 4.09</td>
<td valign="middle" align="center">98.55 &#xb1; 0.69</td>
<td valign="middle" align="center">98.25 &#xb1; 0.71</td>
<td valign="middle" align="center">99.12 &#xb1; 0.87</td>
<td valign="middle" align="center"><bold>99.34 &#xb1; 0.47</bold></td>
</tr>
<tr>
<td valign="middle" align="center">4</td>
<td valign="middle" align="center">Broad-leaf soybean</td>
<td valign="middle" align="center">91.13 &#xb1; 2.20</td>
<td valign="middle" align="center">87.07 &#xb1; 3.29</td>
<td valign="middle" align="center">87.32 &#xb1; 3.05</td>
<td valign="middle" align="center">87.03 &#xb1; 2.71</td>
<td valign="middle" align="center">90.38 &#xb1; 2.14</td>
<td valign="middle" align="center">99.25 &#xb1; 1.45</td>
<td valign="middle" align="center"><bold>92.49 &#xb1; 1.87</bold></td>
</tr>
<tr>
<td valign="middle" align="center">5</td>
<td valign="middle" align="center">Narrow-leaf soybean</td>
<td valign="middle" align="center">94.07 &#xb1; 2.48</td>
<td valign="middle" align="center">89.59 &#xb1; 5.28</td>
<td valign="middle" align="center">92.67 &#xb1; 4.48</td>
<td valign="middle" align="center">94.51 &#xb1; 3.72</td>
<td valign="middle" align="center">94.36 &#xb1; 1.46</td>
<td valign="middle" align="center">95.24 &#xb1; 1.76</td>
<td valign="middle" align="center"><bold>96.51 &#xb1; 1.48</bold></td>
</tr>
<tr>
<td valign="middle" align="center">6</td>
<td valign="middle" align="center">Rice</td>
<td valign="middle" align="center">98.52 &#xb1; 1.45</td>
<td valign="middle" align="center">99.54 &#xb1; 0.32</td>
<td valign="middle" align="center">97.80 &#xb1; 0.42</td>
<td valign="middle" align="center">99.58 &#xb1; 0.22</td>
<td valign="middle" align="center">98.39 &#xb1; 1.04</td>
<td valign="middle" align="center">99.18 &#xb1; 0.92</td>
<td valign="middle" align="center"><bold>99.77 &#xb1; 0.20</bold></td>
</tr>
<tr>
<td valign="middle" align="center">7</td>
<td valign="middle" align="center">Others</td>
<td valign="middle" align="center">92.56 &#xb1; 2.04</td>
<td valign="middle" align="center">92.08 &#xb1; 2.05</td>
<td valign="middle" align="center">88.69 &#xb1; 2.31</td>
<td valign="middle" align="center">92.08 &#xb1; 2.32</td>
<td valign="middle" align="center">94.19 &#xb1; 1.93</td>
<td valign="middle" align="center">95.37 &#xb1; 0.76</td>
<td valign="middle" align="center"><bold>95.64 &#xb1; 1.99</bold></td>
</tr>
<tr>
<td valign="middle" colspan="2" align="center">OA</td>
<td valign="middle" align="center">93.84 &#xb1; 0.64</td>
<td valign="middle" align="center">91.91 &#xb1; 1.50</td>
<td valign="middle" align="center">90.69 &#xb1; 2.27</td>
<td valign="middle" align="center">92.29 &#xb1; 1.33</td>
<td valign="middle" align="center">94.25 &#xb1; 0.30</td>
<td valign="middle" align="center">94.72 &#xb1; 0.87</td>
<td valign="middle" align="center"><bold>95.70 &#xb1; 0.87</bold></td>
</tr>
<tr>
<td valign="middle" colspan="2" align="center">AA</td>
<td valign="middle" align="center">95.88 &#xb1; 0.54</td>
<td valign="middle" align="center">93.21 &#xb1; 1.62</td>
<td valign="middle" align="center">93.52 &#xb1; 2.43</td>
<td valign="middle" align="center">95.03 &#xb1; 0.47</td>
<td valign="middle" align="center">95.81 &#xb1; 0.45</td>
<td valign="middle" align="center">95.84 &#xb1; 0.56</td>
<td valign="middle" align="center"><bold>97.22 &#xb1; 0.27</bold></td>
</tr>
<tr>
<td valign="middle" colspan="2" align="center">K&#xd7;100</td>
<td valign="middle" align="center">91.63 &#xb1; 0.84</td>
<td valign="middle" align="center">89.07 &#xb1; 1.94</td>
<td valign="middle" align="center">87.50 &#xb1; 2.95</td>
<td valign="middle" align="center">89.58 &#xb1; 1.73</td>
<td valign="middle" align="center">92.15 &#xb1; 0.41</td>
<td valign="middle" align="center">93.54 &#xb1; 0.76</td>
<td valign="middle" align="center"><bold>94.11 &#xb1; 1.17</bold></td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The bold values indicate the highest accuracy results among the compared methods for each corresponding metric.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>In addition, the classification accuracies for broad-leaf soybean and narrow-leaf soybean are relatively lower than those of other classes. This behavior can be primarily attributed to the strong spectral similarity and morphological resemblance between these two soybean varieties, which often share overlapping spectral signatures due to similar biochemical composition and growth conditions. Although these classes are spectrally distinguishable from other crops, their intra-group confusion poses a significant challenge for fine-grained classification. Notably, compared with other competing models, MDPC-Net demonstrates a markedly improved capability to discriminate between these two confusing categories, suggesting that the proposed multi-dimensional feature coupling strategy enhances sensitivity to subtle spectral&#x2013;spatial variations. From a broader perspective, the remaining confusion between soybean subtypes reflects an intrinsic limitation of single-date hyperspectral imagery, where spectral similarity dominates class separability. Future extensions of the proposed framework could explore the integration of multi-temporal observations, phenology-aware representations, or class-specific adaptive refinement modules to further improve discrimination among closely related crop varieties. Across all evaluated models, MDPC-Net also exhibits the lowest standard deviation for most classes, indicating that its predictions are more stable and less sensitive to within-class variability, such as the same crop type appearing across different plots or under slightly varying environmental conditions. This stability highlights the robustness of MDPC-Net and suggests that the learned representations generalize well across spatially heterogeneous agricultural parcels.</p>
<p>Qualitative comparisons between the ground-truth map (<xref ref-type="fig" rid="f10"><bold>Figure&#xa0;10a</bold></xref>) and the classification result produced by MDPC-Net (<xref ref-type="fig" rid="f10"><bold>Figure&#xa0;10b</bold></xref>) further support these findings. The overall color distribution and field boundaries are highly consistent with the reference labels. The &#x201c;others&#x201d; category exhibits clear and continuous boundaries, while the spatial locations and shapes of corn, cotton, rice, and sesame are accurately preserved, with no evident large-scale misclassification artifacts. Minor errors are mainly observed at the boundaries of broad-leaf soybean fields, where some pixels are misclassified as narrow-leaf soybean or cotton.</p>
<fig id="f10" position="float">
<label>Figure&#xa0;10</label>
<caption>
<p>Sample classification results of different models on the WHU-LongKou dataset. The numerical labels and their corresponding color patches indicate distinct crop categories, with the specific crop type for each label detailed in <xref ref-type="fig" rid="f7"><bold>Figure&#xa0;7</bold></xref>.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpls-17-1773924-g010.tif">
<alt-text content-type="machine-generated">Comparison of segmentation results with eight panels, labeled (a) to (h). Each panel represents a different method: (a) Ground Truth, (b) MDPC-Net, (c) WD-SSMamb, (d) SSFTT, (e) GAHT, (f) Morphformer, (g) GSC-Vit, (h) CTMixer. Various colors denote segments: tan (1), green (2), olive (3), yellow (4), cyan (5), magenta (6), and blue (7). All methods show similar patterns with variations in segmentation clarity.</alt-text>
</graphic></fig>
<p>In contrast, several competing methods suffer from noticeable loss of detail in small-area classes, particularly cotton, resulting in fragmented or incomplete field representations (<xref ref-type="fig" rid="f10"><bold>Figures 10c&#x2013;h</bold></xref>). These results indicate that MDPC-Net more effectively preserves global spatial structure, field morphology, and class distribution consistency, enabling more reliable modeling of crop associations in complex agricultural scenes.</p>
</sec>
</sec>
<sec id="s4_3">
<label>4.3</label>
<title>Ablation experiments</title>
<p>To thoroughly validate the effectiveness of the innovative components in our proposed method, we conducted ablation experiments on all three datasets. The ablation study is divided into two parts: Evaluating the contribution of single-branch feature extraction versus the three-branch MDFEM module; and assessing the effectiveness of the LPFPM module.</p>
<p>The results are summarized in <xref ref-type="table" rid="T4"><bold>Table&#xa0;4</bold></xref>. For each dataset, the first row reports the average accuracy obtained from the 1D, 2D, and 3D single-branch extractors combined with MDFEM; the second row presents results using MDFEM alone (without LPFPM); and the third row corresponds to MDFEM combined with LPFPM, i.e., the full MDPC-Net.</p>
<table-wrap id="T4" position="float">
<label>Table&#xa0;4</label>
<caption>
<p>Ablation study of LPFPM versus MDFEM on three datasets.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Datasets</th>
<th valign="middle" align="center">1D/2D/3D</th>
<th valign="middle" align="center">MDFEM</th>
<th valign="middle" align="center">LPFPM</th>
<th valign="middle" align="center">OA</th>
<th valign="middle" align="center">AA</th>
<th valign="middle" align="center">K&#xd7;100</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="3" align="center">Matiwan Village</td>
<td valign="middle" align="center">&#x2714;</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">&#x2714;</td>
<td valign="middle" align="center">85.82 &#xb1; 2.49</td>
<td valign="middle" align="center">92.67 &#xb1; 0.88</td>
<td valign="middle" align="center">84.16 &#xb1; 2.61</td>
</tr>
<tr>
<td valign="middle" align="center">&#x2714;</td>
<td valign="middle" align="center">&#x2714;</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">86.11 &#xb1; 1.67</td>
<td valign="middle" align="center">93.25 &#xb1; 0.79</td>
<td valign="middle" align="center">85.63 &#xb1; 1.78</td>
</tr>
<tr>
<td valign="middle" align="center"/>
<td valign="middle" align="center">&#x2714;</td>
<td valign="middle" align="center">&#x2714;</td>
<td valign="middle" align="center"><bold>88.82 &#xb1; 0.75</bold></td>
<td valign="middle" align="center"><bold>94.20 &#xb1; 0.26</bold></td>
<td valign="middle" align="center"><bold>87.02 &#xb1; 0.84</bold></td>
</tr>
<tr>
<td valign="middle" rowspan="3" align="center">WHU-HongHu</td>
<td valign="middle" align="center">&#x2714;</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">&#x2714;</td>
<td valign="middle" align="center">84.90 &#xb1; 0.47</td>
<td valign="middle" align="center">82.03 &#xb1; 1.30</td>
<td valign="middle" align="center">81.00 &#xb1; 0.60</td>
</tr>
<tr>
<td valign="middle" align="center">&#x2714;</td>
<td valign="middle" align="center">&#x2714;</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">87.50 &#xb1; 0.70</td>
<td valign="middle" align="center">85.49 &#xb1; 0.46</td>
<td valign="middle" align="center">84.21 &#xb1; 0.83</td>
</tr>
<tr>
<td valign="middle" align="center"/>
<td valign="middle" align="center">&#x2714;</td>
<td valign="middle" align="center">&#x2714;</td>
<td valign="middle" align="center"><bold>89.95 &#xb1; 0.90</bold></td>
<td valign="middle" align="center"><bold>89.63 &#xb1; 0.76</bold></td>
<td valign="middle" align="center"><bold>87.30 &#xb1; 1.09</bold></td>
</tr>
<tr>
<td valign="middle" rowspan="3" align="center">WHU-LongKou</td>
<td valign="middle" align="center">&#x2714;</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">&#x2714;</td>
<td valign="middle" align="center">92.88 &#xb1; 1.05</td>
<td valign="middle" align="center">93.89 &#xb1; 0.86</td>
<td valign="middle" align="center">90.35 &#xb1; 1.37</td>
</tr>
<tr>
<td valign="middle" align="center"/>
<td valign="middle" align="center">&#x2714;</td>
<td valign="middle" align="center"/>
<td valign="middle" align="center">93.83 &#xb1; 0.67</td>
<td valign="middle" align="center">95.66 &#xb1; 0.48</td>
<td valign="middle" align="center">91.61 &#xb1; 0.88</td>
</tr>
<tr>
<td valign="middle" align="center"/>
<td valign="middle" align="center">&#x2714;</td>
<td valign="middle" align="center">&#x2714;</td>
<td valign="middle" align="center"><bold>95.70 &#xb1; 0.87</bold></td>
<td valign="middle" align="center"><bold>97.22 &#xb1; 0.27</bold></td>
<td valign="middle" align="center"><bold>94.11 &#xb1; 1.17</bold></td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The bold values indicate the highest accuracy results among the compared methods for each corresponding metric.</p></fn>
</table-wrap-foot>
</table-wrap>
<sec id="s4_3_1">
<label>4.3.1</label>
<title>Individual modules vs. combined modules</title>
<p>Across all ablation configurations and on all three datasets&#x2014;Matiwan Village, WHU-HongHu, and WHU-LongKou&#x2014;adding either LPFPM or MDFEM alone consistently improves OA, AA, and Kappa. This demonstrates the independent effectiveness of both modules.</p>
<p>However, the performance improvement achieved by combining MDFEM + LPFPM is substantially greater than the sum of the improvements contributed by each module individually. This indicates that the two modules do not simply provide additive&#xa0;functional benefits; rather, they synergistically enhance feature representation.</p>
<p>Specifically, MDFEM strengthens the discriminability of multi-dimensional features (e.g., through enhanced spectral&#x2013;spatial fusion), while LPFPM refines the hierarchical transmission within the feature pyramid. When integrated, these two components enable the network to more accurately capture agricultural category variations&#x2014;such as crop-specific spectral differences and spatial distribution patterns&#x2014;leading to more precise recognition results.</p>
</sec>
<sec id="s4_3_2">
<label>4.3.2</label>
<title>Robustness across heterogeneous datasets</title>
<p>The three datasets represent varying levels of scene complexity: Matiwan Village corresponds to large heterogeneous farmlands, WHU-HongHu to medium-complex mosaicked fields, and WHU-LongKou to small and relatively simple plots.</p>
<p>In all cases, the combined MDFEM + LPFPM configuration yields consistent and superior improvements in OA, AA, and Kappa compared to individual modules alone (<xref ref-type="table" rid="T5"><bold>Table&#xa0;5</bold></xref>). This demonstrates that the proposed modules effectively adapt to different crop spectral variations and farmland scales across diverse agricultural landscapes.</p>
<table-wrap id="T5" position="float">
<label>Table&#xa0;5</label>
<caption>
<p>Parameters and FLOPs of MDPC-Net on three datasets compared with six models.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Datasets</th>
<th valign="middle" align="center">Methods</th>
<th valign="middle" align="center">Parameters/M</th>
<th valign="middle" align="center">FLOPs/M</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="7" align="center">Matiwan Village</td>
<td valign="middle" align="center">SSFTT</td>
<td valign="middle" align="center">1.190</td>
<td valign="middle" align="center">62.557</td>
</tr>
<tr>
<td valign="middle" align="center">GAHT</td>
<td valign="middle" align="center">1.498</td>
<td valign="middle" align="center">181.149</td>
</tr>
<tr>
<td valign="middle" align="center">Morphformer</td>
<td valign="middle" align="center">0.244</td>
<td valign="middle" align="center">45.698</td>
</tr>
<tr>
<td valign="middle" align="center">GSC-Vit</td>
<td valign="middle" align="center">0.708</td>
<td valign="middle" align="center">49.073</td>
</tr>
<tr>
<td valign="middle" align="center">CTMixer</td>
<td valign="middle" align="center">0.612</td>
<td valign="middle" align="center">74.314</td>
</tr>
<tr>
<td valign="middle" align="center">WD-SSMamba</td>
<td valign="middle" align="center"><bold>0.153</bold></td>
<td valign="middle" align="center"><bold>24.667</bold></td>
</tr>
<tr>
<td valign="middle" align="center"><bold>MDPC-Net</bold></td>
<td valign="middle" align="center">0.295</td>
<td valign="middle" align="center">33.506</td>
</tr>
<tr>
<td valign="middle" rowspan="7" align="center">WHU-HongHu</td>
<td valign="middle" align="center">SSFTT</td>
<td valign="middle" align="center">1.255</td>
<td valign="middle" align="center">65.999</td>
</tr>
<tr>
<td valign="middle" align="center">GAHT</td>
<td valign="middle" align="center">1.515</td>
<td valign="middle" align="center">183.100</td>
</tr>
<tr>
<td valign="middle" align="center">Morphformer</td>
<td valign="middle" align="center">0.256</td>
<td valign="middle" align="center">48.206</td>
</tr>
<tr>
<td valign="middle" align="center">GSC-Vit</td>
<td valign="middle" align="center">0.711</td>
<td valign="middle" align="center">49.506</td>
</tr>
<tr>
<td valign="middle" align="center">CTMixer</td>
<td valign="middle" align="center">0.613</td>
<td valign="middle" align="center">74.364</td>
</tr>
<tr>
<td valign="middle" align="center">WD-SSMamba</td>
<td valign="middle" align="center"><bold>0.152</bold></td>
<td valign="middle" align="center"><bold>24.712</bold></td>
</tr>
<tr>
<td valign="middle" align="center"><bold>MDPC-Net</bold></td>
<td valign="middle" align="center">0.306</td>
<td valign="middle" align="center">34.807</td>
</tr>
<tr>
<td valign="middle" rowspan="7" align="center">WHU-LongKou</td>
<td valign="middle" align="center">SSFTT</td>
<td valign="middle" align="center">1.254</td>
<td valign="middle" align="center">65.998</td>
</tr>
<tr>
<td valign="middle" align="center">GAHT</td>
<td valign="middle" align="center">1.514</td>
<td valign="middle" align="center">183.100</td>
</tr>
<tr>
<td valign="middle" align="center">Morphformer</td>
<td valign="middle" align="center">0.255</td>
<td valign="middle" align="center">48.205</td>
</tr>
<tr>
<td valign="middle" align="center">GSC-Vit</td>
<td valign="middle" align="center">0.711</td>
<td valign="middle" align="center">49.506</td>
</tr>
<tr>
<td valign="middle" align="center">CTMixer</td>
<td valign="middle" align="center">0.612</td>
<td valign="middle" align="center">74.362</td>
</tr>
<tr>
<td valign="middle" align="center">WD-SSMamba</td>
<td valign="middle" align="center"><bold>0.161</bold></td>
<td valign="middle" align="center"><bold>24.709</bold></td>
</tr>
<tr>
<td valign="middle" align="center"><bold>MDPC-Net</bold></td>
<td valign="middle" align="center">0.305</td>
<td valign="middle" align="center">34.806</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The bold values indicate the highest accuracy results among the compared methods for each corresponding metric.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>The combined architecture therefore exhibits strong robustness to cross-region and cross-scale crop recognition, maintaining reliable performance despite substantial variations in field size, crop type, and scene composition.</p>
</sec>
</sec>
<sec id="s4_4">
<label>4.4</label>
<title>Computational complexity analysis</title>
<p>Beyond accuracy, MDPC-Net demonstrates clear advantages in computational efficiency. We computed the number of parameters (Parameters) and floating-point operations (FLOPs) for MDPC-Net, WD-SSMamba, and the six comparison models across all three datasets.</p>
<p>In terms of parameter count, WD-SSMamba contains significantly fewer parameters than MDPC-Net across all datasets. For example, on the Matiwan Village dataset, WD-SSMamba has 0.153M parameters, which is less than half the parameters of MDPC-Net (0.295M parameters). Similarly, WD-SSMamba has 0.152M parameters on WHU-HongHu and 0.161M parameters on WHU-LongKou, both of which are lower than MDPC-Net&#x2019;s 0.305M parameters. This reduction in parameter count highlights the lightweight design of WD-SSMamba, which is based on advanced wavelet convolutions and the Mamba architecture for efficient feature extraction and representation. Regarding computational cost, MDPC-Net still exhibits a lower FLOPs value than WD-SSMamba in most cases, particularly on the WHU-LongKou dataset, where WD-SSMamba has 24.790M FLOPs, compared to MDPC-Net&#x2019;s 34.806M FLOPs. While WD-SSMamba benefits from fewer parameters, the complexity of its frequency-domain feature extraction results in higher FLOPs than MDPC-Net in some scenarios. Specifically, on Matiwan Village and WHU-HongHu, WD-SSMamba&#x2019;s FLOPs (34.667M and 34.712M, respectively) are still lower than the FLOPs of some other models, but they are still higher than MDPC-Net&#x2019;s FLOPs in those datasets. This comparison highlights MDPC-Net&#x2019;s superior balance of computational efficiency and accuracy, making it particularly well-suited for large-scale remote sensing tasks that demand both high accuracy and low computational overhead. WD-SSMamba, with its lighter parameter count, excels in compactness but at the cost of a slightly higher computational cost in certain configurations, demonstrating a trade-off between model efficiency and accuracy.</p>
</sec>
</sec>
<sec id="s5" sec-type="conclusions">
<label>5</label>
<title>Conclusion</title>
<p>In this study, we proposed MDPC-Net, a hyperspectral image crop classification (HSICC) framework that achieves high classification accuracy while maintaining low computational complexity. By integrating multi-dimensional feature extraction, a linear-projection feature pyramid, and Transformer-based global modeling, MDPC-Net demonstrates strong and stable performance across diverse agricultural scenes.</p>
<p>First, the MDFEM module adopts a three-branch architecture with progressive dilated convolutions to jointly capture spectral, spatial, and spectral&#x2013;spatial characteristics. This design effectively alleviates long-standing challenges such as &#x201c;same spectrum, different objects&#x201d; (e.g., broad- vs. narrow-leaf soybeans) and &#x201c;same object, different spectra&#x201d; (e.g., rice vs. rice stubble). Second, the LPFPM module reorganizes and fuses multi-dimensional features through depthwise separable convolutions and linear projection, constructing a feature pyramid that reduces hyperspectral redundancy while enabling multi-scale information fusion. This mechanism supports accurate modeling across spatial&#xa0;scales, from fine-grained plot boundaries (e.g., sesame field&#xa0;edges)&#xa0;to large-scale semantic patterns (e.g., corn distribution).&#xa0;Finally,&#xa0;the&#xa0;Transformer component captures long-range contextual&#xa0;dependencies via self-attention, further enhancing global&#xa0;consistency and boundary delineation in complex agricultural environments.</p>
<p>Importantly, the proposed framework is sensor-agnostic in design and does not rely on sensor-specific handcrafted features, making it readily adaptable to hyperspectral data acquired from different platforms, including airborne, UAV-based, and satellite-borne sensors, with varying spectral resolutions and band configurations. The use of linear projection and multi-dimensional feature coupling allows MDPC-Net to flexibly accommodate differences in spectral dimensionality and spatial resolution, which is critical for real-world deployment across heterogeneous data sources. Moreover, MDPC-Net is not tailored to a specific crop type or region, and its effectiveness across three benchmark datasets with distinct crop compositions and planting patterns suggests strong generalization potential to other agricultural regions. This makes the proposed framework particularly suitable for large-scale agricultural mapping and cross-regional applications, where crop types, field geometries, and management practices vary significantly. Experimental results on three representative benchmark datasets consistently demonstrate that MDPC-Net outperforms all comparison models, validating both its effectiveness and robustness under limited-sample conditions.</p>
<p>In future work, MDPC-Net may be further enhanced through additional lightweight optimization and the integration of spatiotemporal information, such as multi-temporal hyperspectral observations. These extensions would further improve adaptability to different sensors and support dynamic crop monitoring, thereby strengthening the practical value of MDPC-Net for precision agriculture and large-scale agricultural management.</p>
</sec>
</body>
<back>
<sec id="s6" sec-type="data-availability">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material. Further inquiries can be directed to the corresponding author.</p></sec>
<sec id="s7" sec-type="author-contributions">
<title>Author contributions</title>
<p>MY: Conceptualization, Data curation, Formal analysis, Funding acquisition, Investigation, Methodology, Project administration, Resources, Software, Supervision, Validation, Visualization, Writing &#x2013; original draft, Writing &#x2013; review &amp; editing.</p></sec>
<ack>
<title>Acknowledgments</title>
<p>The authors acknowledge all data contributors and platforms that provide data, and express gratitude to reviewers for constructive comments and improving advice.</p>
</ack>
<sec id="s9" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p></sec>
<sec id="s10" sec-type="ai-statement">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p></sec>
<sec id="s11" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p></sec>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Aasen</surname> <given-names>H.</given-names></name>
<name><surname>Honkavaara</surname> <given-names>E.</given-names></name>
<name><surname>Lucieer</surname> <given-names>A.</given-names></name>
<name><surname>Zarco-Tejada</surname> <given-names>P. J.</given-names></name>
</person-group> (<year>2018</year>). 
<article-title>Quantitative remote sensing at ultra-high resolution with UAV spectroscopy: A review of sensor technology, measurement procedures, and data correction workflows</article-title>. <source>Remote Sens.</source> <volume>10</volume>, <elocation-id>1091</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/rs10071091</pub-id>, PMID: <pub-id pub-id-type="pmid">41725453</pub-id>
</mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Agilandeeswari</surname> <given-names>L.</given-names></name>
<name><surname>Prabukumar</surname> <given-names>M.</given-names></name>
<name><surname>Radhesyam</surname> <given-names>V.</given-names></name>
<name><surname>Phaneendra</surname> <given-names>K. L. N. B.</given-names></name>
<name><surname>Farhan</surname> <given-names>A.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>Crop classification for agricultural applications in hyperspectral remote sensing images</article-title>. <source>Appl. Sci.</source> <volume>12</volume>, <elocation-id>1670</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/app12031670</pub-id>, PMID: <pub-id pub-id-type="pmid">41725453</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Al Duhayyim</surname> <given-names>M.</given-names></name>
<name><surname>Alsolai</surname> <given-names>H.</given-names></name>
<name><surname>Ben Haj Hassine</surname> <given-names>S.</given-names></name>
<name><surname>S. Alzahrani</surname> <given-names>J.</given-names></name>
<name><surname>S. Salama</surname> <given-names>A.</given-names></name>
<name><surname>Motwakel</surname> <given-names>A.</given-names></name>
<etal/>
</person-group>. (<year>2023</year>). 
<article-title>Automated deep learning driven crop classification on hyperspectral remote sensing images</article-title>. <source>Comput. Mater. Contin.</source> <volume>74</volume>, <fpage>3167</fpage>&#x2013;<lpage>3181</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.32604/cmc.2023.033054</pub-id>, PMID: <pub-id pub-id-type="pmid">40612875</pub-id>
</mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Ali</surname> <given-names>I.</given-names></name>
<name><surname>Mushtaq</surname> <given-names>Z.</given-names></name>
<name><surname>Arif</surname> <given-names>S.</given-names></name>
<name><surname>D. Algarni</surname> <given-names>A.</given-names></name>
<name><surname>F. Soliman</surname> <given-names>N.</given-names></name>
<name><surname>El-Shafai</surname> <given-names>W.</given-names></name>
</person-group> (<year>2023</year>). 
<article-title>Hyperspectral images-based crop classification scheme for agricultural remote sensing</article-title>. <source>Comput. Syst. Sci. Eng.</source> <volume>46</volume>, <fpage>303</fpage>&#x2013;<lpage>319</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.32604/csse.2023.034374</pub-id>, PMID: <pub-id pub-id-type="pmid">40612875</pub-id>
</mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Chandrasekharan</surname> <given-names>S.</given-names></name>
<name><surname>Gomez</surname> <given-names>K.</given-names></name>
<name><surname>Al-Hourani</surname> <given-names>A.</given-names></name>
<name><surname>Kandeepan</surname> <given-names>S.</given-names></name>
<name><surname>Rasheed</surname> <given-names>T.</given-names></name>
<name><surname>Goratti</surname> <given-names>L.</given-names></name>
<etal/>
</person-group>. (<year>2016</year>). 
<article-title>Designing and implementing future aerial communication networks</article-title>. <source>IEEE Commun. Mag.</source> <volume>54</volume>, <fpage>26</fpage>&#x2013;<lpage>34</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/MCOM.2016.7470932</pub-id>, PMID: <pub-id pub-id-type="pmid">41116384</pub-id>
</mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Chen</surname> <given-names>M.</given-names></name>
<name><surname>Feng</surname> <given-names>S.</given-names></name>
<name><surname>Zhao</surname> <given-names>C.</given-names></name>
<name><surname>Qu</surname> <given-names>B.</given-names></name>
<name><surname>Su</surname> <given-names>N.</given-names></name>
<name><surname>Li</surname> <given-names>W.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>Fractional fourier-based frequency-spatial&#x2013;spectral prototype network for agricultural hyperspectral image open-set classification</article-title>. <source>IEEE Trans. Geosci. Remote Sens.</source> <volume>62</volume>, <fpage>1</fpage>&#x2013;<lpage>14</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TGRS.2024.3386566</pub-id>, PMID: <pub-id pub-id-type="pmid">41116384</pub-id>
</mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Fernandes</surname> <given-names>F. C.</given-names></name>
<name><surname>Faramarzi</surname> <given-names>E.</given-names></name>
<name><surname>Li</surname> <given-names>X.</given-names></name>
<name><surname>Ma</surname> <given-names>Z.</given-names></name>
<name><surname>Ducloux</surname> <given-names>X.</given-names></name>
</person-group> (<year>2019</year>). 
<article-title>Mobile display power reduction for video using standardized metadata</article-title>. <source>IEEE Trans. Mob. Comput.</source> <volume>18</volume>, <fpage>165</fpage>&#x2013;<lpage>178</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TMC.2018.2829185</pub-id>, PMID: <pub-id pub-id-type="pmid">41116384</pub-id>
</mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Gallo</surname> <given-names>I.</given-names></name>
<name><surname>Ranghetti</surname> <given-names>L.</given-names></name>
<name><surname>Landro</surname> <given-names>N.</given-names></name>
<name><surname>La Grassa</surname> <given-names>R.</given-names></name>
<name><surname>Boschetti</surname> <given-names>M.</given-names></name>
</person-group> (<year>2023</year>). 
<article-title>In-season and dynamic crop mapping using 3D convolution neural networks and sentinel-2 time series</article-title>. <source>ISPRS J. Photogramm. Remote Sens.</source> <volume>195</volume>, <fpage>335</fpage>&#x2013;<lpage>352</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.isprsjprs.2022.12.005</pub-id>, PMID: <pub-id pub-id-type="pmid">41756733</pub-id>
</mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Gao</surname> <given-names>F.</given-names></name>
<name><surname>Anderson</surname> <given-names>M. C.</given-names></name>
<name><surname>Zhang</surname> <given-names>X.</given-names></name>
<name><surname>Yang</surname> <given-names>Z.</given-names></name>
<name><surname>Alfieri</surname> <given-names>J. G.</given-names></name>
<name><surname>Kustas</surname> <given-names>W. P.</given-names></name>
<etal/>
</person-group>. (<year>2017</year>). 
<article-title>Toward mapping crop progress at field scales through fusion of Landsat and MODIS imagery</article-title>. <source>Remote Sens. Environ.</source> <volume>188</volume>, <fpage>9</fpage>&#x2013;<lpage>25</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.rse.2016.11.004</pub-id>, PMID: <pub-id pub-id-type="pmid">41756733</pub-id>
</mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Guerri</surname> <given-names>M. F.</given-names></name>
<name><surname>Distante</surname> <given-names>C.</given-names></name>
<name><surname>Spagnolo</surname> <given-names>P.</given-names></name>
<name><surname>Bougourzi</surname> <given-names>F.</given-names></name>
<name><surname>Taleb-Ahmed</surname> <given-names>A.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Deep learning techniques for hyperspectral image analysis in agriculture: A review</article-title>. <source>ISPRS Open J. Photogramm. Remote Sens.</source> <volume>12</volume>, <elocation-id>100062</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ophoto.2024.100062</pub-id>, PMID: <pub-id pub-id-type="pmid">41756733</pub-id>
</mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Guo</surname> <given-names>X.</given-names></name>
<name><surname>Feng</surname> <given-names>Q.</given-names></name>
<name><surname>Guo</surname> <given-names>F.</given-names></name>
</person-group> (<year>2025</year>). 
<article-title>CMTNet: a hybrid CNN-transformer network for UAV-based hyperspectral crop classification in precision agriculture</article-title>. <source>Sci. Rep.</source> <volume>15</volume>, <fpage>12383</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-025-97052-w</pub-id>, PMID: <pub-id pub-id-type="pmid">40216979</pub-id>
</mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Hamidi</surname> <given-names>M.</given-names></name>
<name><surname>Safari</surname> <given-names>A.</given-names></name>
<name><surname>Homayouni</surname> <given-names>S.</given-names></name>
</person-group> (<year>2021</year>). 
<article-title>An auto-encoder based classifier for crop mapping from multitemporal multispectral imagery</article-title>. <source>Int. J. Remote Sens.</source> <volume>42</volume>, <fpage>986</fpage>&#x2013;<lpage>1016</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1080/01431161.2020.1820619</pub-id>, PMID: <pub-id pub-id-type="pmid">41735180</pub-id>
</mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Howard</surname> <given-names>A. G.</given-names></name>
<name><surname>Zhu</surname> <given-names>M.</given-names></name>
<name><surname>Chen</surname> <given-names>B.</given-names></name>
<name><surname>Kalenichenko</surname> <given-names>D.</given-names></name>
<name><surname>Wang</surname> <given-names>W.</given-names></name>
<name><surname>Weyand</surname> <given-names>T.</given-names></name>
<etal/>
</person-group>. (<year>2017</year>). 
<article-title>MobileNets: efficient convolutional neural networks for mobile vision applications</article-title>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1704.04861</pub-id>, PMID: <pub-id pub-id-type="pmid">41363103</pub-id>
</mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Hu</surname> <given-names>W.</given-names></name>
<name><surname>Huang</surname> <given-names>Y.</given-names></name>
<name><surname>Wei</surname> <given-names>L.</given-names></name>
<name><surname>Zhang</surname> <given-names>F.</given-names></name>
<name><surname>Li</surname> <given-names>H.</given-names></name>
</person-group> (<year>2015</year>). 
<article-title>Deep convolutional neural networks for hyperspectral image classification</article-title>. <source>J. Sens.</source> <volume>2015</volume>, <elocation-id>258619</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1155/2015/258619</pub-id>
</mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Iandola</surname> <given-names>F. N.</given-names></name>
<name><surname>Han</surname> <given-names>S.</given-names></name>
<name><surname>Moskewicz</surname> <given-names>M. W.</given-names></name>
<name><surname>Ashraf</surname> <given-names>K.</given-names></name>
<name><surname>Dally</surname> <given-names>W. J.</given-names></name>
<name><surname>Keutzer</surname> <given-names>K.</given-names></name>
</person-group> (<year>2016</year>). 
<article-title>SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and &lt;0.5MB model size</article-title>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.1602.07360</pub-id>, PMID: <pub-id pub-id-type="pmid">41363103</pub-id>
</mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Khan</surname> <given-names>H. R.</given-names></name>
<name><surname>Gillani</surname> <given-names>Z.</given-names></name>
<name><surname>Jamal</surname> <given-names>M. H.</given-names></name>
<name><surname>Athar</surname> <given-names>A.</given-names></name>
<name><surname>Chaudhry</surname> <given-names>M. T.</given-names></name>
<name><surname>Chao</surname> <given-names>H.</given-names></name>
<etal/>
</person-group>. (<year>2023</year>). 
<article-title>Early identification of crop type for smallholder farming systems using deep learning on time-series sentinel-2 imagery</article-title>. <source>Sensors</source> <volume>23</volume>, <fpage>1779</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/s23041779</pub-id>, PMID: <pub-id pub-id-type="pmid">36850377</pub-id>
</mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Khan</surname> <given-names>U.</given-names></name>
<name><surname>Khan</surname> <given-names>M.</given-names></name>
<name><surname>Latif</surname> <given-names>M.</given-names></name>
<name><surname>Naveed</surname> <given-names>M.</given-names></name>
<name><surname>Alam</surname> <given-names>M.</given-names></name>
<name><surname>Khan</surname> <given-names>S.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>A systematic literature review of machine learning and deep learning approaches for spectral image classification in agricultural applications using aerial photography</article-title>. <source>Comput. Mater. Contin.</source> <volume>78</volume>, <fpage>2967</fpage>&#x2013;<lpage>3000</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.32604/cmc.2024.045101</pub-id>, PMID: <pub-id pub-id-type="pmid">40612875</pub-id>
</mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Konduri</surname> <given-names>V. S.</given-names></name>
<name><surname>Kumar</surname> <given-names>J.</given-names></name>
<name><surname>Hargrove</surname> <given-names>W. W.</given-names></name>
<name><surname>Hoffman</surname> <given-names>F. M.</given-names></name>
<name><surname>Ganguly</surname> <given-names>A. R.</given-names></name>
</person-group> (<year>2020</year>). 
<article-title>Mapping crops within the growing season across the United States</article-title>. <source>Remote Sens. Environ.</source> <volume>251</volume>, <elocation-id>112048</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.rse.2020.112048</pub-id>, PMID: <pub-id pub-id-type="pmid">41756733</pub-id>
</mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Liang</surname> <given-names>J.</given-names></name>
<name><surname>Yang</surname> <given-names>Z.</given-names></name>
<name><surname>Bi</surname> <given-names>Y.</given-names></name>
<name><surname>Qu</surname> <given-names>B.</given-names></name>
<name><surname>Liu</surname> <given-names>M.</given-names></name>
<name><surname>Xue</surname> <given-names>B.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>A multitree genetic programming-based feature construction approach to crop classification using hyperspectral images</article-title>. <source>IEEE Trans. Geosci. Remote Sens.</source> <volume>62</volume>, <fpage>1</fpage>&#x2013;<lpage>17</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TGRS.2024.3415773</pub-id>, PMID: <pub-id pub-id-type="pmid">41116384</pub-id>
</mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Lu</surname> <given-names>Q.</given-names></name>
<name><surname>Xie</surname> <given-names>Y.</given-names></name>
<name><surname>Wei</surname> <given-names>L.</given-names></name>
<name><surname>Wei</surname> <given-names>Z.</given-names></name>
<name><surname>Tian</surname> <given-names>S.</given-names></name>
<name><surname>Liu</surname> <given-names>H.</given-names></name>
<etal/>
</person-group>. (<year>2024</year>). 
<article-title>Extended attribute profiles for precise crop classification in UAV-borne hyperspectral imagery</article-title>. <source>IEEE Geosci. Remote Sens. Lett.</source> <volume>21</volume>, <fpage>1</fpage>&#x2013;<lpage>5</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/LGRS.2023.3348462</pub-id>, PMID: <pub-id pub-id-type="pmid">41116384</pub-id>
</mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Mei</surname> <given-names>S.</given-names></name>
<name><surname>Song</surname> <given-names>C.</given-names></name>
<name><surname>Ma</surname> <given-names>M.</given-names></name>
<name><surname>Xu</surname> <given-names>F.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>Hyperspectral image classification using group-aware hierarchical transformer</article-title>. <source>IEEE Trans. Geosci. Remote Sens.</source> <volume>60</volume>, <fpage>1</fpage>&#x2013;<lpage>14</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TGRS.2022.3207933</pub-id>, PMID: <pub-id pub-id-type="pmid">41116384</pub-id>
</mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Michelon</surname> <given-names>G. K.</given-names></name>
<name><surname>Assun&#xe7;&#xe3;o</surname> <given-names>W. K. G.</given-names></name>
<name><surname>Gr&#xfc;nbacher</surname> <given-names>P.</given-names></name>
<name><surname>Egyed</surname> <given-names>A.</given-names></name>
</person-group> (<year>2023</year>). &#x201c;
<article-title>Analysis and propagation of feature revisions in preprocessor-based software product lines</article-title>,&#x201d; in <conf-name>2023 IEEE International Conference on Software Analysis, Evolution and Reengineering (SANER)</conf-name>. (<publisher-loc>Piscataway, NJ, USA</publisher-loc>: 
<publisher-name>IEEE</publisher-name>) <fpage>284</fpage>&#x2013;<lpage>295</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/SANER56733.2023.00035</pub-id>, PMID: <pub-id pub-id-type="pmid">41116384</pub-id>
</mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Nazeri</surname> <given-names>K.</given-names></name>
<name><surname>Ng</surname> <given-names>E.</given-names></name>
<name><surname>Joseph</surname> <given-names>T.</given-names></name>
<name><surname>Qureshi</surname> <given-names>F.</given-names></name>
<name><surname>Ebrahimi</surname> <given-names>M.</given-names></name>
</person-group> (<year>2019</year>). &#x201c;
<article-title>EdgeConnect: structure guided image inpainting using edge prediction</article-title>,&#x201d; in <conf-name>2019 IEEE/CVF International Conference on Computer Vision Workshop (ICCVW)</conf-name>. (<publisher-loc>Piscataway, NJ, USA</publisher-loc>: 
<publisher-name>IEEE</publisher-name>) <fpage>3265</fpage>&#x2013;<lpage>3274</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/ICCVW.2019.00408</pub-id>, PMID: <pub-id pub-id-type="pmid">41116384</pub-id>
</mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Roy</surname> <given-names>S. K.</given-names></name>
<name><surname>Deria</surname> <given-names>A.</given-names></name>
<name><surname>Shah</surname> <given-names>C.</given-names></name>
<name><surname>Haut</surname> <given-names>J. M.</given-names></name>
<name><surname>Du</surname> <given-names>Q.</given-names></name>
<name><surname>Plaza</surname> <given-names>A.</given-names></name>
</person-group> (<year>2023</year>). 
<article-title>Spectral&#x2013;spatial morphological attention transformer for hyperspectral image classification</article-title>. <source>IEEE Trans. Geosci. Remote Sens.</source> <volume>61</volume>, <fpage>1</fpage>&#x2013;<lpage>15</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TGRS.2023.3242346</pub-id>, PMID: <pub-id pub-id-type="pmid">41116384</pub-id>
</mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Roy</surname> <given-names>S. K.</given-names></name>
<name><surname>Krishna</surname> <given-names>G.</given-names></name>
<name><surname>Dubey</surname> <given-names>S. R.</given-names></name>
<name><surname>Chaudhuri</surname> <given-names>B. B.</given-names></name>
</person-group> (<year>2020</year>). 
<article-title>HybridSN: exploring 3-D&#x2013;2-D CNN feature hierarchy for hyperspectral image classification</article-title>. <source>IEEE Geosci. Remote Sens. Lett.</source> <volume>17</volume>, <fpage>277</fpage>&#x2013;<lpage>281</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/LGRS.2019.2918719</pub-id>, PMID: <pub-id pub-id-type="pmid">41116384</pub-id>
</mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Sun</surname> <given-names>L.</given-names></name>
<name><surname>Zhao</surname> <given-names>G.</given-names></name>
<name><surname>Zheng</surname> <given-names>Y.</given-names></name>
<name><surname>Wu</surname> <given-names>Z.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>Spectral&#x2013;spatial feature tokenization transformer for hyperspectral image classification</article-title>. <source>IEEE Trans. Geosci. Remote Sens.</source> <volume>60</volume>, <fpage>1</fpage>&#x2013;<lpage>14</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TGRS.2022.3144158</pub-id>, PMID: <pub-id pub-id-type="pmid">41116384</pub-id>
</mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Tang</surname> <given-names>H.</given-names></name>
<name><surname>Yang</surname> <given-names>X.</given-names></name>
<name><surname>Tang</surname> <given-names>D.</given-names></name>
<name><surname>Dong</surname> <given-names>Y.</given-names></name>
<name><surname>Zhang</surname> <given-names>L.</given-names></name>
<name><surname>Xie</surname> <given-names>W.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Tensor-based few-shot learning for cross-domain hyperspectral image classification</article-title>. <source>Remote Sens.</source> <volume>16</volume>, <elocation-id>4149</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/rs16224149</pub-id>, PMID: <pub-id pub-id-type="pmid">41725453</pub-id>
</mixed-citation>
</ref>
<ref id="B28">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Tu</surname> <given-names>B.</given-names></name>
<name><surname>Liao</surname> <given-names>X.</given-names></name>
<name><surname>Li</surname> <given-names>Q.</given-names></name>
<name><surname>Peng</surname> <given-names>Y.</given-names></name>
<name><surname>Plaza</surname> <given-names>A.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>Local semantic feature aggregation-based transformer for hyperspectral image classification</article-title>. <source>IEEE Trans. Geosci. Remote Sens.</source> <volume>60</volume>, <fpage>1</fpage>&#x2013;<lpage>15</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TGRS.2022.3201145</pub-id>, PMID: <pub-id pub-id-type="pmid">41116384</pub-id>
</mixed-citation>
</ref>
<ref id="B29">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Ullah</surname> <given-names>F.</given-names></name>
<name><surname>Ullah</surname> <given-names>I.</given-names></name>
<name><surname>Khan</surname> <given-names>K.</given-names></name>
<name><surname>Khan</surname> <given-names>S.</given-names></name>
<name><surname>Amin</surname> <given-names>F.</given-names></name>
</person-group> (<year>2025</year>). 
<article-title>Advances in deep neural network-based hyperspectral image classification and feature learning with limited samples: a survey</article-title>. <source>Appl. Intell.</source> <volume>55</volume>, <fpage>370</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s10489-024-06139-w</pub-id>, PMID: <pub-id pub-id-type="pmid">41758449</pub-id>
</mixed-citation>
</ref>
<ref id="B30">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>S.</given-names></name>
<name><surname>Liu</surname> <given-names>Z.</given-names></name>
<name><surname>Chen</surname> <given-names>Y.</given-names></name>
<name><surname>Hou</surname> <given-names>C.</given-names></name>
<name><surname>Liu</surname> <given-names>A.</given-names></name>
<name><surname>Zhang</surname> <given-names>Z.</given-names></name>
</person-group> (<year>2023</year>). 
<article-title>Expansion spectral&#x2013;spatial attention network for hyperspectral image classification</article-title>. <source>IEEE J. Sel. Top. Appl. Earth Obs. Remote Sens.</source> <volume>16</volume>, <fpage>6411</fpage>&#x2013;<lpage>6427</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/JSTARS.2023.3288521</pub-id>, PMID: <pub-id pub-id-type="pmid">41116384</pub-id>
</mixed-citation>
</ref>
<ref id="B31">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhang</surname> <given-names>B.</given-names></name>
<name><surname>Chen</surname> <given-names>Y.</given-names></name>
<name><surname>Li</surname> <given-names>Z.</given-names></name>
<name><surname>Xiong</surname> <given-names>S.</given-names></name>
<name><surname>Lu</surname> <given-names>X.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>SANet: A self-attention network for agricultural hyperspectral image classification</article-title>. <source>IEEE Trans. Geosci. Remote Sens.</source> <volume>62</volume>, <fpage>1</fpage>&#x2013;<lpage>15</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TGRS.2023.3341473</pub-id>, PMID: <pub-id pub-id-type="pmid">41116384</pub-id>
</mixed-citation>
</ref>
<ref id="B32">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhang</surname> <given-names>J.</given-names></name>
<name><surname>Meng</surname> <given-names>Z.</given-names></name>
<name><surname>Zhao</surname> <given-names>F.</given-names></name>
<name><surname>Liu</surname> <given-names>H.</given-names></name>
<name><surname>Chang</surname> <given-names>Z.</given-names></name>
</person-group> (<year>2022</year>). 
<article-title>Convolution transformer mixer for hyperspectral image classification</article-title>. <source>IEEE Geosci. Remote Sens. Lett.</source> <volume>19</volume>, <fpage>1</fpage>&#x2013;<lpage>5</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/LGRS.2022.3208935</pub-id>, PMID: <pub-id pub-id-type="pmid">41116384</pub-id>
</mixed-citation>
</ref>
<ref id="B33">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhang</surname> <given-names>H.</given-names></name>
<name><surname>Xu</surname> <given-names>X.</given-names></name>
<name><surname>Li</surname> <given-names>S.</given-names></name>
<name><surname>Plaza</surname> <given-names>A.</given-names></name>
</person-group> (<year>2025</year>). 
<article-title>Wavelet decomposition-based spectral&#x2013;spatial mamba network for hyperspectral image classification</article-title>. <source>IEEE Trans. Geosci. Remote Sens.</source> <volume>63</volume>, <fpage>1</fpage>&#x2013;<lpage>17</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TGRS.2025.3590154</pub-id>, PMID: <pub-id pub-id-type="pmid">41116384</pub-id>
</mixed-citation>
</ref>
<ref id="B34">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhao</surname> <given-names>Z.</given-names></name>
<name><surname>Xu</surname> <given-names>X.</given-names></name>
<name><surname>Li</surname> <given-names>S.</given-names></name>
<name><surname>Plaza</surname> <given-names>A.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Hyperspectral image classification using groupwise separable convolutional vision transformer network</article-title>. <source>IEEE Trans. Geosci. Remote Sens.</source> <volume>62</volume>, <fpage>1</fpage>&#x2013;<lpage>17</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TGRS.2024.3377610</pub-id>, PMID: <pub-id pub-id-type="pmid">41116384</pub-id>
</mixed-citation>
</ref>
<ref id="B35">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zunair</surname> <given-names>H.</given-names></name>
<name><surname>Rahman</surname> <given-names>A.</given-names></name>
<name><surname>Mohammed</surname> <given-names>N.</given-names></name>
<name><surname>Cohen</surname> <given-names>J. P.</given-names></name>
</person-group> (<year>2020</year>). 
<article-title>Uniformizing techniques to process CT scans with 3D CNNs for tuberculosis prediction</article-title>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.2007.13224</pub-id>, PMID: <pub-id pub-id-type="pmid">41363103</pub-id>
</mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn id="n1" fn-type="custom" custom-type="edited-by">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3271544">Chaoya Dang</ext-link>, Nanjing Agricultural University, China</p></fn>
<fn id="n2" fn-type="custom" custom-type="reviewed-by">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2614311">Faxu Guo</ext-link>, Gansu Agricultural University, China</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3329005">Gui Cheng</ext-link>, Chinese Academy of Sciences (CAS), China</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3329301">Beibei Yu</ext-link>, Wuhan University, China</p></fn>
</fn-group>
</back>
</article>