<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Mar. Sci.</journal-id>
<journal-title>Frontiers in Marine Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Mar. Sci.</abbrev-journal-title>
<issn pub-type="epub">2296-7745</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fmars.2025.1613565</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Marine Science</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Enhanced hyperspectral image classification for coastal wetlands using a hybrid CNN-transformer approach with cross-attention mechanism</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Li</surname>
<given-names>Zhongmei</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2944626/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Liu</surname>
<given-names>Tang</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2672461/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Lu</surname>
<given-names>Yuxiang</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2672351/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Tian</surname>
<given-names>Jing</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhang</surname>
<given-names>Meng</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhou</surname>
<given-names>Chenghu</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>State Key Laboratory of Resources and Environmental Information System, Institute of Geographic Sciences and Natural Resources Research (IGSNRR), Chinese Academy of Sciences (CAS)</institution>, <addr-line>Beijing</addr-line>,&#xa0;<country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Beijing Institute of Remote Sensing Information</institution>, <addr-line>Beijing</addr-line>,&#xa0;<country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Bolin Fu, Guilin University of Technology, China</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Dingfeng Yu, Qilu University of Technology, China</p>
<p>Cunjin Xue, Aerospace Information Research Institute, Chinese Academy of Sciences (CAS), China</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Tang Liu, <email xlink:href="mailto:liut@lreis.ac.cn">liut@lreis.ac.cn</email>
</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>26</day>
<month>06</month>
<year>2025</year>
</pub-date>
<pub-date pub-type="collection">
<year>2025</year>
</pub-date>
<volume>12</volume>
<elocation-id>1613565</elocation-id>
<history>
<date date-type="received">
<day>17</day>
<month>04</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>09</day>
<month>06</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2025 Li, Liu, Lu, Tian, Zhang and Zhou</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Li, Liu, Lu, Tian, Zhang and Zhou</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Coastal wetlands play a vital role in shoreline protection, material cycling, and biodiversity conservation. Utilizing hyperspectral remote sensing technology for wetland monitoring can enhance scientific management of these ecosystems. However, the complex water-land interactions and vegetation mixtures in wetlands often lead to significant spectral confusion and complicated spatial structures, posing challenges for fine classification. This paper proposes a novel hyperspectral image classification method that combines the strengths of Convolutional Neural Networks (CNNs) for local feature extraction and Transformers for modeling long-range dependencies. The method utilizes both 3D and 2D convolution operations to effectively capture spectral and spatial features of coastal wetlands. Additionally, dual-branch Transformers equipped with cross-attention mechanisms are employed to explore deep features from multiple perspectives and model the interrelationships between various characteristics. Comprehensive experiments conducted on two typical coastal wetland hyperspectral datasets demonstrate that the proposed method achieves an overall accuracy (OA) of 96.52% and 85.72%, surpassing other benchmarks by 1.0-8.64%. Notably, challenging categories such as mudflats and mixed vegetation area benefit significantly. This research provides valuable insights for the application of hyperspectral imagery in coastal wetland classification.</p>
</abstract>
<kwd-group>
<kwd>convolutional neural network</kwd>
<kwd>transformer</kwd>
<kwd>cross attention mechanism</kwd>
<kwd>hyperspectral image classification</kwd>
<kwd>coastal wetland classification</kwd>
</kwd-group>
<counts>
<fig-count count="8"/>
<table-count count="6"/>
<equation-count count="4"/>
<ref-count count="29"/>
<page-count count="17"/>
<word-count count="8420"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-in-acceptance</meta-name>
<meta-value>Marine Ecosystem Ecology</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>Coastal wetlands play an irreplaceable role in maintaining ecological balance, protecting biodiversity, regulating climate, and purifying water quality (<xref ref-type="bibr" rid="B21">Santos et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B22">Sheaves et&#xa0;al., 2024</xref>). Situated at the transition zone between land and sea, coastal wetlands experience frequent water-land interactions, leading to unique hydrological, soil, and biological community structures. This transitional ecosystem is subject to the double influence of the marine and land environments, with rapid ecological changes and rich biodiversity, but at the same time, it is also very fragile and easily disturbed by human activities and changes in the natural environment (<xref ref-type="bibr" rid="B12">Li et&#xa0;al., 2023</xref>; <xref ref-type="bibr" rid="B14">Man et&#xa0;al., 2023</xref>). Effective monitoring and precise categorization of coastal wetlands are of great significance for developing scientific conservation measures and sustainable management strategies (<xref ref-type="bibr" rid="B1">Agate et&#xa0;al., 2024</xref>).</p>
<p>However, coastal wetland ecosystems are characterized by high environmental heterogeneity, mixed vegetation communities, significant dynamic in surface cover and sampling difficulties, posing serious challenges for wetland sample collection, large-scale dynamic monitoring and fine feature classification. Hyperspectral remote sensing technology, with its wide coverage and nanometer-scale spectral resolution, can obtain continuous spectral signatures of ground objects. This significantly reduces reliance on field surveys and improves data acquisition efficiency, thereby providing essential data support for fine identification and dynamic monitoring of coastal wetlands. It has gradually become an essential tools for scientific wetlands management, including vegetation community structure analysis, intertidal zone dynamic monitoring and ecological parameters inversion (<xref ref-type="bibr" rid="B9">Ingalls et&#xa0;al., 2024</xref>; <xref ref-type="bibr" rid="B10">Jensen et&#xa0;al., 2024</xref>; <xref ref-type="bibr" rid="B19">Piaser et&#xa0;al., 2024</xref>; <xref ref-type="bibr" rid="B25">Yang et&#xa0;al., 2024</xref>). However, it should be emphasized that accurate classification of hyperspectral images is a fundamental prerequisite for these applications. Due to severe spectral mixing and high similarity between classes, achieving precise classification remains particularly challenging in coastal wetland monitoring.</p>
<p>Hyperspectral classification technology has undergone a paradigm shift from traditional machine learning to deep learning. Early research mainly relied on traditional machine learning methods such as support vector machine (SVM) (<xref ref-type="bibr" rid="B15">Melgani and Bruzzone, 2004</xref>) and random forest (RF) (<xref ref-type="bibr" rid="B2">Chan and Paelinckx, 2008</xref>), which primarily focused on spectral feature extraction to achieve initial feature classification. However, due to the curse of dimensionality of hyperspectral images, traditional methods suffer from overfitting and struggle to effectively exploit spatial contextual information. In recent years, deep learning technology has achieved great success in the field of image processing and has been widely used in hyperspectral image classification. Convolutional neural network (CNNs) can automatically extract spectral signatures and local spatial features of the image, effectively alleviating dimensionality issues in hyperspectral data through hierarchical feature learning (<xref ref-type="bibr" rid="B8">Hu et&#xa0;al., 2015</xref>; <xref ref-type="bibr" rid="B27">Yue et&#xa0;al., 2015</xref>). Subsequently, recurrent neural networks (RNNs) (<xref ref-type="bibr" rid="B17">Mou et&#xa0;al., 2017</xref>; <xref ref-type="bibr" rid="B6">Hang et&#xa0;al., 2019</xref>) and generative adversarial networks (GAN) (<xref ref-type="bibr" rid="B28">Zhan et&#xa0;al., 2018</xref>; <xref ref-type="bibr" rid="B29">Zhu et&#xa0;al., 2018</xref>) have been introduced to hyperspectral classification, further enhancing robustness to noise and sample imbalance through spectral-temporal joint optimization and generative-discriminative co-training. Recently, Transformer models have also been successfully introduced into hyperspectral image classification. Leveraging their strong capability to capture long-range dependencies, Transformers effectively model spectral sequence features and global spatial structures in hyperspectral imagery (<xref ref-type="bibr" rid="B7">Hong et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B18">Peng et&#xa0;al., 2022</xref>; <xref ref-type="bibr" rid="B26">Yang et&#xa0;al., 2022</xref>).</p>
<p>Despite the success of deep learning in hyperspectral classification, most existing methods have been developed and evaluated on benchmark datasets representing agricultural (e.g., Indian Pines and WU-Hi datasets) or urban areas (e.g., Washington DC Mall and Pavia University datasets), where the spatial and spectral distributions are relatively regular. In contrast, coastal wetlands exhibit highly heterogeneous spatial structures and significant intra-class spectral variability due to complex water-land interactions and vegetation mixtures. These characteristics pose substantial challenges for generalizing existing models to wetland ecosystems. Recent studies show that through the rational design of the hybrid architecture of CNN and Transformer, it is possible to fully utilize the local details and global contextual information and provide stronger feature representation capabilities in remote sensing classification, such as building outline extraction (<xref ref-type="bibr" rid="B3">Chang et&#xa0;al., 2024</xref>), change detection (<xref ref-type="bibr" rid="B11">Jiang et&#xa0;al., 2024</xref>), and crop classification (<xref ref-type="bibr" rid="B24">Xiang et&#xa0;al., 2023</xref>). Specifically in the field of hyperspectral image classification, SSFTT generates low-dimensional features through lightweight CNN combination, converts the features into semantic information through Gauss weighted tokenizer, and then inputs Transformer encoder for global relationship modeling, taking into account both efficiency and accuracy (<xref ref-type="bibr" rid="B23">Sun et&#xa0;al., 2022</xref>).</p>
<p>Inspired by these developments, we propose a novel hyperspectral image classification method that integrates CNN and Transformer architectures. The method first employs 3D and 2D convolution operations to extract shallow spatial-spectral features. A dual-branch Transformer encoder then processes different feature subsets in parallel&#x2014;one focusing on spatial features, the other on channel-wise information&#x2014;thereby enhancing multi-dimensional feature representation. A cross-attention mechanism enables dynamic interaction and fusion between branches, allowing the model to learn complex inter-feature relationships and to reduce misclassification caused by spectral similarity. This design balances local detail extraction with global dependency modeling, improving classification robustness in heterogeneous environments and providing essential support for the scientific monitoring and management of coastal wetlands. To validate the effectiveness of our method, we conduct comprehensive experiments on two representative hyperspectral datasets: the Yancheng wetland dataset and the Yellow River Estuary wetland dataset.</p>
<p>The remainder of this paper is organized as follows. Section 2 provides a detailed description of the proposed hyperspectral image classification method. Section 3 presents the experimental setup, including dataset descriptions, evaluation metrics, and both quantitative and qualitative analysis of the results. Section 4 reports ablation studies to evaluate the contribution of each module within the proposed framework. Finally, Section 5 concludes the paper and outlines potential directions for future research.</p>
</sec>
<sec id="s2" sec-type="materials|methods">
<label>2</label>
<title>Materials and methods</title>
<sec id="s2_1">
<label>2.1</label>
<title>Networks</title>
<p>To overcome the unique challenges inherent in wetland ecosystems&#x2014;characterized by pronounced environmental heterogeneity, intricate spectral-spatial interactions, and subtle inter-class variations&#x2014;we propose a hierarchical deep learning framework that systematically integrates local feature extraction with global contextual modeling, thereby enhancing discriminative capability for complex wetland land cover features. Our method integrates CNNs and Transformer structures to capture both low-level spectral-spatial features and high-level semantic representations. The overall framework consists of four key components: a spatial-spectral feature extractor, a dual-branch Transformer encoder, a cross-attention mechanism, and a Kolmogorov&#x2013;Arnold Network (KAN) (<xref ref-type="bibr" rid="B4">Cheon, 2024</xref>; <xref ref-type="bibr" rid="B13">Liu et&#xa0;al., 2025</xref>)module. Specifically, the spatial-spectral feature extractor combines a 3D convolutional layer and a 2D convolutional layer to preliminarily extract joint spatial and spectral features. The dual-branch Transformer processes different feature subsets in parallel to explore information from multiple perspectives, while the cross-attention mechanism facilitates interaction between the two branches and enhances the correlation modeling among features. Subsequently, the KAN block is employed to perform final classification by assigning a category label to each pixel, thereby accomplishing hyperspectral image segmentation. To reduce computational complexity, each image patch generates a single feature cube after the initial feature extraction stage. The overall architecture of the proposed method is illustrated in <xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref>, and the structure and functionality of each component are described in detail below.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Schematic of the proposed algorithm framework. (1) Input hyperspectral patches undergo feature extraction to generate feature cubes. (2) The cubes are decomposed along spatial and channel dimensions for dual-branch Transformer processing. (3) Cross-attention modules enable feature interaction between branches. (4) Deep features are further extracted through additional dual-branch Transformer layers. (5) Final classification is achieved via KAN Layer after feature fusion.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-12-1613565-g001.tif">
<alt-text content-type="machine-generated">Flowchart illustrating a neural network architecture for image analysis. An input image undergoes feature extraction through 3D and 2D convolutions. The extracted features are processed by spatial and channel transformers. Cross-attention combines outputs, followed by concatenation. A KAN layer generates the final output. Arrows indicate data flow and transformation stages.</alt-text>
</graphic>
</fig>
<sec id="s2_1_1">
<label>2.1.1</label>
<title>Spatial-spectral feature extractor</title>
<p>Convolutional neural networks (CNNs) have demonstrated strong capabilities in hierarchical feature extraction. Wetland ecosystems exhibit complex spectral-spatial characteristics due to their diverse vegetation, water bodies, and transitional land cover. To effectively capture these features, we first employ a hybrid 3D-2D CNN feature extractor for preliminary spectral-spatial representation learning. The proposed feature extraction module integrates a sequential 3D convolutional block and a 2D convolutional block, each enhanced with Batch Normalization (BN) and nonlinear activation. The 3D convolution block primarily captures joint spectral and spatial information from each hyperspectral sample patch, while the 2D convolution block further refines spatial patterns from the output of the 3D convolution. This combination allows the model to effectively learn local spatial structures and retain spectral integrity. By leveraging the complementary strengths of both 3D and 2D convolutional operations, the proposed module fully exploits the multidimensional characteristics of hyperspectral imagery, providing a robust feature foundation for the subsequent classification task.</p>
<p>To prevent information loss at the image boundaries during patch extraction, zero-padding is applied to the hyperspectral images of coastal wetlands. The class label for each extracted patch is assigned based on the ground-truth label of its central pixel.</p>
</sec>
<sec id="s2_1_2">
<label>2.1.2</label>
<title>Dual-branch transformer encoder</title>
<p>Although the 3D-2D CNN feature extractor effectively captures low-level spectral-spatial patterns, wetland classification remains challenging due to the inherent spatial heterogeneity and subtle inter-class variations in coastal environments. To address these limitations and enhance the model&#x2019;s ability to model complex spectral-spatial relationships and long-range dependencies, we introduce a dual-branch Transformer architecture based on feature cube decomposition. The feature cube generated based on the Feature Extraction structure, which defines its dimension as <inline-formula>
<mml:math display="inline" id="im1">
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>s</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, is decomposed in this part along both spatial and channel directions. In the spatial direction, as shown in <xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2a</bold>
</xref>, the feature cube is decomposed into <inline-formula>
<mml:math display="inline" id="im2">
<mml:mi>L</mml:mi>
</mml:math>
</inline-formula> spatial tokens of size <inline-formula>
<mml:math display="inline" id="im3">
<mml:mi>L</mml:mi>
</mml:math>
</inline-formula> spatial tokens of size <inline-formula>
<mml:math display="inline" id="im4">
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, where, <inline-formula>
<mml:math display="inline" id="im5">
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi>s</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> represents the total number of spatial locations. This approach facilitates the modeling of inter-channel relationships, as each spatial token aggregates all channel information at a specific spatial location, enabling the capture of local features. In the channel direction, as illustrated in <xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2b</bold>
</xref>, the feature cube is decomposed into <inline-formula>
<mml:math display="inline" id="im6">
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mo>&#xa0;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> channel tokens <inline-formula>
<mml:math display="inline" id="im7">
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mo>&#xa0;</mml:mo>
<mml:mi>s</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>s</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. Each token focuses on a single channel and contains complete spatial information corresponding to that channel, helping preserve the spatial context.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Schematic comparison of dual-branch Transformer architectures for hyperspectral feature decomposition. <bold>(a)</bold> Spatial transformer branch <bold>(b)</bold> Channel branch transformer.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-12-1613565-g002.tif">
<alt-text content-type="machine-generated">Diagram comparing spatial and channel transformer encoders. (a) Spatial encoder uses spatial tokenization followed by positional encoding, transforming data with spatial transformer encoder. (b) Channel encoder uses channel tokenization, applying positional encoding, and processed by a channel transformer encoder. Both use extra learnable class embedding.</alt-text>
</graphic>
</fig>
<p>For the spatial and channel branches, the input features are processed through a 3D convolutional kernel to generate feature chunks <inline-formula>
<mml:math display="inline" id="im8">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mo>&#xa0;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>. As detailed in <xref ref-type="disp-formula" rid="eq1">Equation 1</xref>, these chunks are then augmented with learnable category markers <inline-formula>
<mml:math display="inline" id="im9">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and position encoding parameters <inline-formula>
<mml:math display="inline" id="im10">
<mml:mrow>
<mml:msup>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> to construct the final marker sequence <inline-formula>
<mml:math display="inline" id="im11">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, which serves as the input to each Transformer encoder.</p>
<disp-formula id="eq1">
<label>(1)</label>
<mml:math display="block" id="M1">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mtext>concat</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>;</mml:mo>
<mml:mi>F</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
<mml:mo>+</mml:mo>
<mml:msup>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</disp-formula>
<p>The main function of Transformer Encoder is feature extraction. It captures internal dependencies and high-level representations of the input data through multi-head attention mechanisms and feed-forward neural network.</p>
</sec>
<sec id="s2_1_3">
<label>2.1.3</label>
<title>Cross-attention mechanism</title>
<p>To enable dynamic feature interactions within our dual-branch Transformer architecture, we propose a cross-attention mechanism to facilitate adaptive feature enhancement between the two branches. This mechanism automatically identifies and amplifies the most discriminating features specific to different types of features in wetlands. Specifically, the cross-attention layer enables each element in one feature sequence to dynamically attend to and aggregate relevant information from the other sequence. This enhances feature correlations, provides richer contextual information, and improves the discriminative capability of the extracted features, thereby boosting classification accuracy. As illustrated in <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3</bold>
</xref>, after feature transformation through the spatial and channel branches, cross-attention is applied to facilitate further interaction between the features extracted from the two branches. Specifically, one sequence is used to generate the query matrix, while the other provides the key and value matrices. The dot product between the query and all keys yields attention scores, which are normalized using the softmax function. The resulting weights are then used to compute a weighted sum of the values, forming the cross-attention output.</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Schematic illustration of cross-attention between spatial and channel transformer branches: Dual-path feature interaction via bidirectional attention. CGSA (Channel-Guided Spatial Attention) enhances spatial features by querying channel branch KV, while SGCA (Spatial-Guided Channel Attention) refines channel features using spatial branch KV.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-12-1613565-g003.tif">
<alt-text content-type="machine-generated">Diagram depicting a dual-branch cross attention mechanism. Spatial Branch Features and Channel Branch Features are projected into queries (Q), keys (K), and values (V), resulting in Channel-Guided Spatial Attention and Spatial-Guided Channel Attention. Outputs are Enhanced Spatial and Channel Branch Features respectively.</alt-text>
</graphic>
</fig>
<p>Specifically, prior to the cross-attention computation, this paper employs an asymmetric projection strategy to achieve spatial/channel feature space alignment. Spatial Transformer features <italic>X<sub>spa</sub>
</italic> are projected into Query <inline-formula>
<mml:math display="inline" id="im12">
<mml:mrow>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, Key <inline-formula>
<mml:math display="inline" id="im13">
<mml:mrow>
<mml:msub>
<mml:mi>K</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and Value <inline-formula>
<mml:math display="inline" id="im14">
<mml:mrow>
<mml:msub>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, while the channel Transformer features <italic>X<sub>cha</sub>
</italic> are mapped to <inline-formula>
<mml:math display="inline" id="im15">
<mml:mrow>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>h</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, <italic>K<sub>cha</sub>
</italic> .and <inline-formula>
<mml:math display="inline" id="im16">
<mml:mrow>
<mml:msub>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>h</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> respectively. The <inline-formula>
<mml:math display="inline" id="im17">
<mml:mrow>
<mml:mtext>CrossAttnProj</mml:mtext>
</mml:mrow>
</mml:math>
</inline-formula> (Cross Attention Projection) operation asymmetrically maps dual-branch features to Query/Key/Value tensors as shown in <xref ref-type="disp-formula" rid="eq2">Equation 2</xref>:</p>
<disp-formula id="eq2">
<label>(2)</label>
<mml:math display="block" id="M2">
<mml:mrow>
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mrow>
<mml:mtable>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mtext>CrossAttnProj</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>h</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x2192;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>K</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>h</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>h</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mtext>CrossAttnProj</mml:mtext>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>h</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x2192;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>h</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>K</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
</disp-formula>
<p>The cross-attention is then computed bidirectionally, as expressed in <xref ref-type="disp-formula" rid="eq3">Equations 3</xref> and <xref ref-type="disp-formula" rid="eq4">4</xref>:</p>
<p>Channel-Guided Spatial Attention(<italic>CGSA</italic>): leveraging channel information to enhance spatial representations.</p>
<disp-formula id="eq3">
<label>(3)</label>
<mml:math display="block" id="M3">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mtext>CA</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>G</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>A</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>K</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>h</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>h</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mtext>softmax</mml:mtext>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mi>K</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>h</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mtext>T</mml:mtext>
</mml:msubsup>
</mml:mrow>
<mml:mrow>
<mml:msqrt>
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msqrt>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
<mml:msub>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>h</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</disp-formula>
<p>Spatial-Guided Channel Attention(<italic>SGCA</italic>): utilizing spatial context to refine channel representations.</p>
<disp-formula id="eq4">
<label>(4)</label>
<mml:math display="block" id="M4">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mtext>CA</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>G</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>A</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>h</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>K</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>=</mml:mo>
<mml:mtext>softmax</mml:mtext>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>Q</mml:mi>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mi>h</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mi>K</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mtext>T</mml:mtext>
</mml:msubsup>
</mml:mrow>
<mml:mrow>
<mml:msqrt>
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msqrt>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
<mml:msub>
<mml:mi>V</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</disp-formula>
<p>In the formulas (2) and (3), Queries (<inline-formula>
<mml:math display="inline" id="im18">
<mml:mi>Q</mml:mi>
</mml:math>
</inline-formula>) are always derived from the target branch, Keys/Values (<inline-formula>
<mml:math display="inline" id="im19">
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mo stretchy="false">/</mml:mo>
<mml:mi>V</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>) come from features of the complementary branch, and <inline-formula>
<mml:math display="inline" id="im20">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> denotes feature dimension of the Key.</p>
<p>Following the cross-attention operation, deeper feature extraction is required to better distinguish fine-grained intra-class characteristics. To this end, we introduce an additional dual-branch Transformer module following the cross-attention layer to further enhance high-level semantic representation learning. This module effectively captures the hyperspectral-spatial coupling characteristics of wetland data, thereby generating more discriminative representations for downstream classification. Finally, the complementary features from both branches are integrated through concatenation-based fusion.</p>
</sec>
<sec id="s2_1_4">
<label>2.1.4</label>
<title>Classification layer</title>
<p>The classification layer serves as the final mapping module to transform the extracted hierarchical features into categorical labels. Given the intricate nonlinear relationships inherent in coastal wetland ecosystems&#x2014;interactions between different vegetation, soil types and hydrological conditions&#x2014;we employ a Kolmogorov-Arnold Network (KAN) as a superior alternative to conventional multilayer perceptron (MLP) classifiers. The design of KAN is grounded in the Kolmogorov-Arnold representation theorem, allowing the network to process and learn complex relationships in input data in a way that approximates the theorem. Similar to MLP, KAN has a fully connected structure. However, unlike MLPs, which assign fixed activation functions to neurons (nodes), KAN assigns learnable activation functions to the edges (weights) of the network. This edge-based activation design provides greater flexibility in capturing nonlinear relationships within high-dimensional data, allowing the network to better fit intricate classification boundaries and improve performance in complex ecological classification tasks.</p>
</sec>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Datasets</title>
<p>In order to verify the effectiveness of the proposed method in coastal wetland hyperspectral image classification, two typical coastal wetland hyperspectral datasets were selected in this study &#x2014;Yancheng wetland in Jiangsu Province and Yellow River Estuary wetland dataset.</p>
<sec id="s2_2_1">
<label>2.2.1</label>
<title>Yancheng dataset</title>
<p>The Yancheng wetland is located in the eastern part of Jiangsu Province, China, and has a coastline of 582 kilometers, making it one of the largest coastal silt-flat wetlands on the west coast of the Pacific Ocean and on the edge of the Asian continent (<xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4a</bold>
</xref>). This wetland is highly valuable for ecological diversity, and it provides habitats for a variety of endangered species. In this study, the hyperspectral image dataset of coastal wetland in Yancheng, Jiangsu Province, acquired by the GF5_AHSI sensor, with an image size of 1175&#xd7;585 and containing 253 effective spectral bands, was used. This dataset refers to the literature (<xref ref-type="bibr" rid="B5">Gao et&#xa0;al., 2022</xref>), and the spectral image processing team of Beijing Institute of Technology (BIT) deciphered the image by integrating the field survey data and the high spatial resolution images, and labeled the image with a total of 18 categories of feature classes, including salt fields, pond, paddy fields, woodland, buildings, etc. <xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref> presents the dataset partition, comprising 744 training samples and 7,150 testing samples, with the training set approximately accounting for 9.42% of the total samples, and 0.11% of the total pixels in the panoramic image.</p>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Location and false-color composite images of typical wetland datasets. <bold>(a)</bold> Yancheng wetland <bold>(b)</bold> Yellow River Estuary wetland.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-12-1613565-g004.tif">
<alt-text content-type="machine-generated">Schematic of the location of 2 wetlands in Jiangsu and Shandong Provinces, China, with arrows pointing to the satellite images labeled (a) and (b). Image (a) is a false-color image of Yancheng coastal wetland, and Image (b) is a false-color image of Yellow River Estuary coastal wetland.</alt-text>
</graphic>
</fig>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Number of training and testing samples for coastal wetland datasets.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" colspan="4" align="center">The Yancheng dataset</th>
<th valign="middle" colspan="4" align="center">The Yellow River Estuary dataset</th>
</tr>
<tr>
<th valign="middle" align="center">No.</th>
<th valign="middle" align="center">Name</th>
<th valign="middle" align="center">Training</th>
<th valign="middle" align="center">Testing</th>
<th valign="middle" align="center">No.</th>
<th valign="middle" align="center">Name</th>
<th valign="middle" align="center">Training</th>
<th valign="middle" align="center">Testing</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">Sea</td>
<td valign="middle" align="center">209</td>
<td valign="middle" align="center">2186</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">Spartina alterniflora</td>
<td valign="middle" align="center">162</td>
<td valign="middle" align="center">15462</td>
</tr>
<tr>
<td valign="middle" align="center">2</td>
<td valign="middle" align="center">Offshore area</td>
<td valign="middle" align="center">140</td>
<td valign="middle" align="center">1448</td>
<td valign="middle" align="center">2</td>
<td valign="middle" align="center">Pond</td>
<td valign="middle" align="center">98</td>
<td valign="middle" align="center">6867</td>
</tr>
<tr>
<td valign="middle" align="center">3</td>
<td valign="middle" align="center">Salt field</td>
<td valign="middle" align="center">6</td>
<td valign="middle" align="center">104</td>
<td valign="middle" align="center">3</td>
<td valign="middle" align="center">Woodland</td>
<td valign="middle" align="center">159</td>
<td valign="middle" align="center">3298</td>
</tr>
<tr>
<td valign="middle" align="center">4</td>
<td valign="middle" align="center">Pond</td>
<td valign="middle" align="center">20</td>
<td valign="middle" align="center">173</td>
<td valign="middle" align="center">4</td>
<td valign="middle" align="center">Phragmite</td>
<td valign="middle" align="center">75</td>
<td valign="middle" align="center">7636</td>
</tr>
<tr>
<td valign="middle" align="center">5</td>
<td valign="middle" align="center">Spartina anglica</td>
<td valign="middle" align="center">7</td>
<td valign="middle" align="center">76</td>
<td valign="middle" align="center">5</td>
<td valign="middle" align="center">Typha orientalis presl</td>
<td valign="middle" align="center">9</td>
<td valign="middle" align="center">24</td>
</tr>
<tr>
<td valign="middle" align="center">6</td>
<td valign="middle" align="center">Mudflats</td>
<td valign="middle" align="center">25</td>
<td valign="middle" align="center">243</td>
<td valign="middle" align="center">6</td>
<td valign="middle" align="center">Intertidal phragmite</td>
<td valign="middle" align="center">9</td>
<td valign="middle" align="center">1407</td>
</tr>
<tr>
<td valign="middle" align="center">7</td>
<td valign="middle" align="center">Aquaculture pond</td>
<td valign="middle" align="center">25</td>
<td valign="middle" align="center">238</td>
<td valign="middle" align="center">7</td>
<td valign="middle" align="center">Ecological reservoir</td>
<td valign="middle" align="center">50</td>
<td valign="middle" align="center">3874</td>
</tr>
<tr>
<td valign="middle" align="center">8</td>
<td valign="middle" align="center">Paddy field</td>
<td valign="middle" align="center">87</td>
<td valign="middle" align="center">745</td>
<td valign="middle" align="center">8</td>
<td valign="middle" align="center">Arable land</td>
<td valign="middle" align="center">98</td>
<td valign="middle" align="center">10869</td>
</tr>
<tr>
<td valign="middle" align="center">9</td>
<td valign="middle" align="center">Estuarine area</td>
<td valign="middle" align="center">27</td>
<td valign="middle" align="center">248</td>
<td valign="middle" align="center">9</td>
<td valign="middle" align="center">Lotus pond</td>
<td valign="middle" align="center">50</td>
<td valign="middle" align="center">6448</td>
</tr>
<tr>
<td valign="middle" align="center">10</td>
<td valign="middle" align="center">River</td>
<td valign="middle" align="center">27</td>
<td valign="middle" align="center">272</td>
<td valign="middle" align="center">10</td>
<td valign="middle" align="center">Oilfield</td>
<td valign="middle" align="center">162</td>
<td valign="middle" align="center">7994</td>
</tr>
<tr>
<td valign="middle" align="center">11</td>
<td valign="middle" align="center">Woodland</td>
<td valign="middle" align="center">19</td>
<td valign="middle" align="center">196</td>
<td valign="middle" align="center">11</td>
<td valign="middle" align="center">Salt fields</td>
<td valign="middle" align="center">75</td>
<td valign="middle" align="center">8614</td>
</tr>
<tr>
<td valign="middle" align="center">12</td>
<td valign="middle" align="center">Barren</td>
<td valign="middle" align="center">25</td>
<td valign="middle" align="center">129</td>
<td valign="middle" align="center">12</td>
<td valign="middle" align="center">Suaeda salsa</td>
<td valign="middle" align="center">147</td>
<td valign="middle" align="center">10676</td>
</tr>
<tr>
<td valign="middle" align="center">13</td>
<td valign="middle" align="center">Building</td>
<td valign="middle" align="center">37</td>
<td valign="middle" align="center">489</td>
<td valign="middle" align="center">13</td>
<td valign="middle" align="center">River</td>
<td valign="middle" align="center">49</td>
<td valign="middle" align="center">1831</td>
</tr>
<tr>
<td valign="middle" align="center">14</td>
<td valign="middle" align="center">Fallow land</td>
<td valign="middle" align="center">26</td>
<td valign="middle" align="center">208</td>
<td valign="middle" align="center">14</td>
<td valign="middle" align="center">Mixed area 1</td>
<td valign="middle" align="center">25</td>
<td valign="middle" align="center">1604</td>
</tr>
<tr>
<td valign="middle" align="center">15</td>
<td valign="middle" align="center">Rainfed cropland</td>
<td valign="middle" align="center">28</td>
<td valign="middle" align="center">176</td>
<td valign="middle" align="center">15</td>
<td valign="middle" align="center">Mixed area 2</td>
<td valign="middle" align="center">81</td>
<td valign="middle" align="center">5455</td>
</tr>
<tr>
<td valign="middle" align="center">16</td>
<td valign="middle" align="center">Suaeda salsa</td>
<td valign="middle" align="center">14</td>
<td valign="middle" align="center">71</td>
<td valign="middle" align="center">16</td>
<td valign="middle" align="center">Mixed area 3</td>
<td valign="middle" align="center">9</td>
<td valign="middle" align="center">128</td>
</tr>
<tr>
<td valign="middle" align="center">17</td>
<td valign="middle" align="center">Irrigation canal</td>
<td valign="middle" align="center">10</td>
<td valign="middle" align="center">28</td>
<td valign="middle" align="center">17</td>
<td valign="middle" align="center">Mudflats</td>
<td valign="middle" align="center">81</td>
<td valign="middle" align="center">5879</td>
</tr>
<tr>
<td valign="middle" align="center">18</td>
<td valign="middle" align="center">Phragmites</td>
<td valign="middle" align="center">12</td>
<td valign="middle" align="center">120</td>
<td valign="middle" align="center">18</td>
<td valign="middle" align="center">Sea</td>
<td valign="middle" align="center">81</td>
<td valign="middle" align="center">5463</td>
</tr>
<tr>
<td valign="middle" colspan="2" align="center">Total</td>
<td valign="middle" align="center">744</td>
<td valign="middle" align="center">7150</td>
<td valign="middle" colspan="2" align="center">total</td>
<td valign="middle" align="center">1420</td>
<td valign="middle" align="center">103529</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s2_2_2">
<label>2.2.2</label>
<title>Yellow River Estuary dataset</title>
<p>The Yellow River Estuary wetland is located in the eastern part of the Yellow River Delta. It is rich in biological resources and provides habitat for many rare birds and plants (<xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4b</bold>
</xref>). In this study, the hyperspectral image data set of the Yellow River Estuary area was obtained by GF-5 AHSI sensor. The image size was 1185&#xd7;1342, including 285 effective bands. Like the Yancheng wetland dataset, this dataset was also obtained from the spectral image processing team of Beijing Institute of Technology, and a total of 18 types of ground objects were marked, including spartina alterniflora, suaeda salsa, arable land, etc. Among them, Mixed area 1 is the mixed area of phragmites and tamarix, Mixed area 2 is the mixed area of tamarix and spartina alterniflora, and Mixed area 3 is the mixed area of tamarix, phragmites and spartina alterniflora. <xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref> summarizes the sample partitioning for model training and evaluation, with 1,420 samples allocated for training and 103,529 samples reserved for testing. The dataset samples represent 1.35% of the total samples and 0.089% of the panoramic pixels. This sparse training configuration intentionally challenges the model&#x2019;s generalization capability under limited supervision.</p>
</sec>
</sec>
</sec>
<sec id="s3" sec-type="results">
<label>3</label>
<title>Results</title>
<sec id="s3_1">
<label>3.1</label>
<title>Experimental settings</title>
<p>In order to validate the effectiveness of the proposed method, we chose to conduct performance comparison experiments with several representative hyperspectral image classification related algorithms, including SVM (<xref ref-type="bibr" rid="B15">Melgani and Bruzzone, 2004</xref>), HE3DCNN (He, Li and Chen, 2017), HybridSN (<xref ref-type="bibr" rid="B20">Roy et&#xa0;al., 2020</xref>), SpectralFormer (<xref ref-type="bibr" rid="B7">Hong et&#xa0;al., 2022</xref>), SSFTT (<xref ref-type="bibr" rid="B23">Sun et&#xa0;al., 2022</xref>) and FactoFormer (<xref ref-type="bibr" rid="B16">Mohamed et&#xa0;al., 2024</xref>).</p>
<p>Specifically, this study employs the cross-entropy loss function (CrossEntropyLoss) as the optimization objective for the model, utilizing the Adam optimizer for parameter updates. To enhance the training process, we implement a stepwise learning rate decay strategy where the learning rate is multiplicatively reduced by a factor (set to 0.95 for both datasets in our experiments) after every <inline-formula>
<mml:math display="inline" id="im21">
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mtext>&#xa0;epochs&#xa0;</mml:mtext>
<mml:mo stretchy="false">/</mml:mo>
<mml:mn>10</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> training epochs. This scheduling strategy enables refined parameter adjustment during later training stages, thereby significantly improving convergence stability. For the Yancheng dataset, the patchsize is set to 5, with an initial learning rate of 9.8e-5 and a weight_decay of 9.9e-5. In contrast, for the Yellow River Estuary dataset, the patchsize is configured as 3, with both the initial learning rate and weight_decay set to 9.9e-5. For the SVM, we chose the radial Gaussian kernel function (RBF) for the classification task, and the penalty parameter <inline-formula>
<mml:math display="inline" id="im22">
<mml:mrow>
<mml:mo>&#xa0;</mml:mo>
<mml:mi>c</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mn>0.01</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>100</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and the kernel function parameter <inline-formula>
<mml:math display="inline" id="im23">
<mml:mrow>
<mml:mi>&#x3b3;</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mn>0.001</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>0.1</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> were determined by random search. HE3DCNN is a hyperspectral image classification model that combines 3D convolution and pyramid structure, the network settings refer to the literature, and the patchsize is set to 9 and 7 for Yancheng and Yellow River Estuary dataset respectively. HybridSN is a hybrid 2D and 3D convolutional approach, the dimensionality reduction procedure employes PCA with 15 principal components for both datasets, and the patchsize is 11, the learning rate of the optimizer is set to 0.001, and the value of weight_decay parameter is 1e-6. SpectralFormer is based on the Transformer architecture, which enhances the model&#x2019;s ability to capture and represent spectral features through the perspective of serialization processing, in the Yancheng dataset, the patchsize is set to 5, and the bandpatch is set to 3, while in the Yellow River Estuary dataset, patchsize is set to 7, and bandpatch is set to 3. Other parameters are the same for both dataset, such as the mode is CAF, learning rate is set to 5e-4, and weight_decay is set to 5e-3. SSFTT is a hybrid structure of CNN and Transformer, for the Yancheng dataset, PCA is applied with 21 retained principal components, patchsize is set to 7, and the learning rate is set to 0.001, whereas in the Yellow River Estuary dataset, the number of PCA principal components is set to 15, and the patchsize is set to 11. In the FactoFormer method, for both datasets, the learning rate and the weight_decay are set to 1e-4, and the patchsize of Yancheng and Yellow River Estuary dataset are set to 5 and 3, respectively. In all the experiments, the epoch of each method is set to 200, and an early termination mechanism is adopted to prevent overfitting.</p>
<p>All experiments are conducted in a PyTorch environment running on a Windows 11 64-bit system with the following hardware configuration: an Intel Core i9-10900K Ultra 9 1850H processor (2.3GHz), 32GB RAM, 1TB SSD, and an NVIDIA RTX 3080 GPU (10GB VRAM). The computational environment utilizes CUDA 12.4 and cuDNN 9.0 for accelerated processing.</p>
<p>In order to quantitatively analyze the effectiveness of the proposed method and other comparative methods, four quantitative assessment metrics were introduced, including overall accuracy (OA), average accuracy (AA), kappa coefficient (&#x3ba;), and classification accuracy for each land cover category. A larger value for each indicator indicates a better classification effect.</p>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Quantitative analysis</title>
<p>Through comprehensive comparison with representative hyperspectral image classification approaches (<xref ref-type="table" rid="T2">
<bold>Tables&#xa0;2</bold>
</xref>, <xref ref-type="table" rid="T3">
<bold>3</bold>
</xref>), our method demonstrates competitive performance on both coastal wetland datasets (Yancheng and Yellow River Estuary). The tabulated results highlight our method&#x2019;s superiority, with optimal and sub-optimal metrics indicated in bold and underlined text, respectively.</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Class-specific classification accuracy (%) using different methods on the Yancheng dataset (bold and underlined values indicate optimal and suboptimal indicators respectively).</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Class</th>
<th valign="middle" align="center">SVM</th>
<th valign="middle" align="center">HE3DCNN</th>
<th valign="middle" align="center">HybridSN</th>
<th valign="middle" align="center">SpectralFormer</th>
<th valign="middle" align="center">SSFTT</th>
<th valign="middle" align="center">FactoFormer</th>
<th valign="middle" align="center">Proposed method</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="center">1</td>
<td valign="top" align="center">81.71</td>
<td valign="top" align="center">92.91</td>
<td valign="top" align="center">92.50</td>
<td valign="top" align="center">
<underline>99.13</underline>
</td>
<td valign="top" align="center">94.74</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">98.12</td>
</tr>
<tr>
<td valign="top" align="center">2</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">98.90</td>
<td valign="top" align="center">
<underline>99.93</underline>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">99.31</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">3</td>
<td valign="top" align="center">58.65</td>
<td valign="top" align="center">25.00</td>
<td valign="top" align="center">56.73</td>
<td valign="top" align="center">
<bold>69.23</bold>
</td>
<td valign="top" align="center">
<underline>64.42</underline>
</td>
<td valign="top" align="center">48.08</td>
<td valign="top" align="center">54.81</td>
</tr>
<tr>
<td valign="top" align="center">4</td>
<td valign="top" align="center">
<bold>87.86</bold>
</td>
<td valign="top" align="center">71.68</td>
<td valign="top" align="center">77.46</td>
<td valign="top" align="center">80.35</td>
<td valign="top" align="center">
<underline>80.92</underline>
</td>
<td valign="top" align="center">80.35</td>
<td valign="top" align="center">79.19</td>
</tr>
<tr>
<td valign="top" align="center">5</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">82.89</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">86.84</td>
<td valign="top" align="center">90.79</td>
<td valign="top" align="center">
<underline>94.74</underline>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">6</td>
<td valign="top" align="center">83.13</td>
<td valign="top" align="center">77.78</td>
<td valign="top" align="center">87.65</td>
<td valign="top" align="center">78.60</td>
<td valign="top" align="center">
<underline>94.24</underline>
</td>
<td valign="top" align="center">87.65</td>
<td valign="top" align="center">
<bold>95.47</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">7</td>
<td valign="top" align="center">75.63</td>
<td valign="top" align="center">82.77</td>
<td valign="top" align="center">89.50</td>
<td valign="top" align="center">
<underline>90.76</underline>
</td>
<td valign="top" align="center">
<bold>92.02</bold>
</td>
<td valign="top" align="center">77.31</td>
<td valign="top" align="center">
<bold>92.02</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">8</td>
<td valign="top" align="center">97.32</td>
<td valign="top" align="center">95.44</td>
<td valign="top" align="center">
<underline>99.46</underline>
</td>
<td valign="top" align="center">96.38</td>
<td valign="top" align="center">97.18</td>
<td valign="top" align="center">98.26</td>
<td valign="top" align="center">
<bold>99.73</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">9</td>
<td valign="top" align="center">
<underline>98.39</underline>
</td>
<td valign="top" align="center">84.68</td>
<td valign="top" align="center">94.35</td>
<td valign="top" align="center">
<bold>98.79</bold>
</td>
<td valign="top" align="center">95.56</td>
<td valign="top" align="center">93.55</td>
<td valign="top" align="center">
<underline>98.39</underline>
</td>
</tr>
<tr>
<td valign="top" align="center">10</td>
<td valign="top" align="center">81.25</td>
<td valign="top" align="center">
<underline>95.96</underline>
</td>
<td valign="top" align="center">58.09</td>
<td valign="top" align="center">94.12</td>
<td valign="top" align="center">71.69</td>
<td valign="top" align="center">95.59</td>
<td valign="top" align="center">
<bold>97.79</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">11</td>
<td valign="top" align="center">
<underline>97.96</underline>
</td>
<td valign="top" align="center">84.69</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">12</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">62.79</td>
<td valign="top" align="center">96.90</td>
<td valign="top" align="center">94.57</td>
<td valign="top" align="center">
<underline>98.45</underline>
</td>
<td valign="top" align="center">
<underline>98.45</underline>
</td>
<td valign="top" align="center">
<underline>98.45</underline>
</td>
</tr>
<tr>
<td valign="top" align="center">13</td>
<td valign="top" align="center">80.37</td>
<td valign="top" align="center">85.48</td>
<td valign="top" align="center">93.46</td>
<td valign="top" align="center">83.03</td>
<td valign="top" align="center">
<bold>99.18</bold>
</td>
<td valign="top" align="center">86.91</td>
<td valign="top" align="center">
<underline>97.14</underline>
</td>
</tr>
<tr>
<td valign="top" align="center">14</td>
<td valign="top" align="center">
<bold>95.67</bold>
</td>
<td valign="top" align="center">85.10</td>
<td valign="top" align="center">82.69</td>
<td valign="top" align="center">87.98</td>
<td valign="top" align="center">
<underline>95.19</underline>
</td>
<td valign="top" align="center">92.31</td>
<td valign="top" align="center">91.35</td>
</tr>
<tr>
<td valign="top" align="center">15</td>
<td valign="top" align="center">93.75</td>
<td valign="top" align="center">95.45</td>
<td valign="top" align="center">68.18</td>
<td valign="top" align="center">78.98</td>
<td valign="top" align="center">
<bold>98.30</bold>
</td>
<td valign="top" align="center">96.02</td>
<td valign="top" align="center">
<underline>97.16</underline>
</td>
</tr>
<tr>
<td valign="top" align="center">16</td>
<td valign="top" align="center">80.28</td>
<td valign="top" align="center">66.20</td>
<td valign="top" align="center">
<underline>88.73</underline>
</td>
<td valign="top" align="center">84.51</td>
<td valign="top" align="center">
<bold>95.77</bold>
</td>
<td valign="top" align="center">67.61</td>
<td valign="top" align="center">76.06</td>
</tr>
<tr>
<td valign="top" align="center">17</td>
<td valign="top" align="center">78.57</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">57.14</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">82.14</td>
<td valign="top" align="center">64.29</td>
<td valign="top" align="center">
<underline>96.43</underline>
</td>
</tr>
<tr>
<td valign="top" align="center">18</td>
<td valign="top" align="center">68.33</td>
<td valign="top" align="center">59.17</td>
<td valign="top" align="center">57.50</td>
<td valign="top" align="center">58.33</td>
<td valign="top" align="center">65.83</td>
<td valign="top" align="center">
<bold>80.83</bold>
</td>
<td valign="top" align="center">
<underline>78.33</underline>
</td>
</tr>
<tr>
<td valign="top" align="center">OA(%)</td>
<td valign="top" align="center">88.59</td>
<td valign="top" align="center">89.51</td>
<td valign="top" align="center">91.12</td>
<td valign="top" align="center">94.01</td>
<td valign="top" align="center">94.24</td>
<td valign="top" align="center">
<underline>94.94</underline>
</td>
<td valign="top" align="center">
<bold>96.52</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">AA(%)</td>
<td valign="top" align="center">86.60</td>
<td valign="top" align="center">80.38</td>
<td valign="top" align="center">83.35</td>
<td valign="top" align="center">87.87</td>
<td valign="top" align="center">
<underline>89.76</underline>
</td>
<td valign="top" align="center">86.77</td>
<td valign="top" align="center">
<bold>91.69</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">&#x3ba;</td>
<td valign="top" align="center">0.8668</td>
<td valign="top" align="center">0.8761</td>
<td valign="top" align="center">0.8951</td>
<td valign="top" align="center">0.9289</td>
<td valign="top" align="center">0.9319</td>
<td valign="top" align="center">
<underline>0.9398</underline>
</td>
<td valign="top" align="center">
<bold>0.9587</bold>
</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Class-specific classification accuracy (%) using different methods on the Yellow River Estuary dataset (bold and underlined values indicate optimal and suboptimal indicators respectively).</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Class</th>
<th valign="middle" align="center">SVM</th>
<th valign="middle" align="center">HE3DCNN</th>
<th valign="middle" align="center">HybridSN</th>
<th valign="middle" align="center">SpectralFormer</th>
<th valign="middle" align="center">SSFTT</th>
<th valign="middle" align="center">FactoFormer</th>
<th valign="middle" align="center">Proposed method</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="center">1</td>
<td valign="top" align="center">91.19</td>
<td valign="top" align="center">91.55</td>
<td valign="top" align="center">
<bold>96.98</bold>
</td>
<td valign="top" align="center">85.75</td>
<td valign="top" align="center">79.39</td>
<td valign="top" align="center">
<underline>95.87</underline>
</td>
<td valign="top" align="center">91.73</td>
</tr>
<tr>
<td valign="top" align="center">2</td>
<td valign="top" align="center">87.81</td>
<td valign="top" align="center">69.74</td>
<td valign="top" align="center">
<underline>92.46</underline>
</td>
<td valign="top" align="center">78.72</td>
<td valign="top" align="center">
<bold>99.33</bold>
</td>
<td valign="top" align="center">71.33</td>
<td valign="top" align="center">81.93</td>
</tr>
<tr>
<td valign="top" align="center">3</td>
<td valign="top" align="center">95.45</td>
<td valign="top" align="center">
<underline>97.18</underline>
</td>
<td valign="top" align="center">86.45</td>
<td valign="top" align="center">92.78</td>
<td valign="top" align="center">67.62</td>
<td valign="top" align="center">94.21</td>
<td valign="top" align="center">
<bold>97.73</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">4</td>
<td valign="top" align="center">66.41</td>
<td valign="top" align="center">
<underline>77.85</underline>
</td>
<td valign="top" align="center">
<bold>83.16</bold>
</td>
<td valign="top" align="center">53.41</td>
<td valign="top" align="center">77.30</td>
<td valign="top" align="center">74.16</td>
<td valign="top" align="center">62.70</td>
</tr>
<tr>
<td valign="top" align="center">5</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">6</td>
<td valign="top" align="center">77.19</td>
<td valign="top" align="center">46.34</td>
<td valign="top" align="center">76.83</td>
<td valign="top" align="center">
<underline>90.62</underline>
</td>
<td valign="top" align="center">
<bold>92.25</bold>
</td>
<td valign="top" align="center">87.42</td>
<td valign="top" align="center">84.65</td>
</tr>
<tr>
<td valign="top" align="center">7</td>
<td valign="top" align="center">89.18</td>
<td valign="top" align="center">89.21</td>
<td valign="top" align="center">89.24</td>
<td valign="top" align="center">79.84</td>
<td valign="top" align="center">84.23</td>
<td valign="top" align="center">
<underline>92.20</underline>
</td>
<td valign="top" align="center">
<bold>94.48</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">8</td>
<td valign="top" align="center">74.02</td>
<td valign="top" align="center">
<underline>89.82</underline>
</td>
<td valign="top" align="center">90.46</td>
<td valign="top" align="center">88.62</td>
<td valign="top" align="center">78.35</td>
<td valign="top" align="center">89.61</td>
<td valign="top" align="center">
<bold>98.64</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">9</td>
<td valign="top" align="center">71.25</td>
<td valign="top" align="center">71.68</td>
<td valign="top" align="center">70.46</td>
<td valign="top" align="center">72.83</td>
<td valign="top" align="center">74.12</td>
<td valign="top" align="center">
<underline>77.08</underline>
</td>
<td valign="top" align="center">
<bold>80.29</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">10</td>
<td valign="top" align="center">88.10</td>
<td valign="top" align="center">
<bold>98.86</bold>
</td>
<td valign="top" align="center">91.53</td>
<td valign="top" align="center">49.04</td>
<td valign="top" align="center">
<underline>94.43</underline>
</td>
<td valign="top" align="center">88.49</td>
<td valign="top" align="center">89.93</td>
</tr>
<tr>
<td valign="top" align="center">11</td>
<td valign="top" align="center">68.12</td>
<td valign="top" align="center">45.15</td>
<td valign="top" align="center">48.68</td>
<td valign="top" align="center">66.95</td>
<td valign="top" align="center">64.60</td>
<td valign="top" align="center">
<bold>82.34</bold>
</td>
<td valign="top" align="center">
<underline>70.10</underline>
</td>
</tr>
<tr>
<td valign="top" align="center">12</td>
<td valign="top" align="center">85.84</td>
<td valign="top" align="center">
<bold>
<underline>95.06</underline>
</bold>
</td>
<td valign="top" align="center">86.95</td>
<td valign="top" align="center">90.12</td>
<td valign="top" align="center">94.26</td>
<td valign="top" align="center">84.92</td>
<td valign="top" align="center">
<underline>95.04</underline>
</td>
</tr>
<tr>
<td valign="top" align="center">13</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<underline>99.13</underline>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">98.36</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">14</td>
<td valign="top" align="center">
<bold>51.12</bold>
</td>
<td valign="top" align="center">16.96</td>
<td valign="top" align="center">30.11</td>
<td valign="top" align="center">49.94</td>
<td valign="top" align="center">43.33</td>
<td valign="top" align="center">44.26</td>
<td valign="top" align="center">
<underline>50.94</underline>
</td>
</tr>
<tr>
<td valign="top" align="center">15</td>
<td valign="top" align="center">70.03</td>
<td valign="top" align="center">
<bold>95.29</bold>
</td>
<td valign="top" align="center">
<underline>90.89</underline>
</td>
<td valign="top" align="center">72.81</td>
<td valign="top" align="center">85.26</td>
<td valign="top" align="center">79.60</td>
<td valign="top" align="center">83.83</td>
</tr>
<tr>
<td valign="top" align="center">16</td>
<td valign="top" align="center">92.19</td>
<td valign="top" align="center">25.00</td>
<td valign="top" align="center">91.41</td>
<td valign="top" align="center">
<underline>99.22</underline>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">84.38</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">17</td>
<td valign="top" align="center">
<bold>67.89</bold>
</td>
<td valign="top" align="center">
<bold>67.89</bold>
</td>
<td valign="top" align="center">66.75</td>
<td valign="top" align="center">
<bold>67.89</bold>
</td>
<td valign="top" align="center">64.65</td>
<td valign="top" align="center">
<bold>67.89</bold>
</td>
<td valign="top" align="center">
<bold>67.89</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">18</td>
<td valign="top" align="center">61.08</td>
<td valign="top" align="center">95.31</td>
<td valign="top" align="center">89.99</td>
<td valign="top" align="center">96.01</td>
<td valign="top" align="center">97.79</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<underline>99.60</underline>
</td>
</tr>
<tr>
<td valign="top" align="center">OA(%)</td>
<td valign="top" align="center">78.77</td>
<td valign="top" align="center">82.17</td>
<td valign="top" align="center">83.56</td>
<td valign="top" align="center">77.08</td>
<td valign="top" align="center">81.82</td>
<td valign="top" align="center">
<underline>84.72</underline>
</td>
<td valign="top" align="center">
<bold>85.72</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">AA(%)</td>
<td valign="top" align="center">79.83</td>
<td valign="top" align="center">76.22</td>
<td valign="top" align="center">82.35</td>
<td valign="top" align="center">79.70</td>
<td valign="top" align="center">83.07</td>
<td valign="top" align="center">
<underline>84.10</underline>
</td>
<td valign="top" align="center">
<bold>86.08</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">&#x3ba;</td>
<td valign="top" align="center">0.7695</td>
<td valign="top" align="center">0.8059</td>
<td valign="top" align="center">0.8212</td>
<td valign="top" align="center">0.7513</td>
<td valign="top" align="center">0.8026</td>
<td valign="top" align="center">
<underline>83.38</underline>
</td>
<td valign="top" align="center">
<bold>0.8446</bold>
</td>
</tr>
</tbody>
</table>
</table-wrap>
<sec id="s3_2_1">
<label>3.2.1</label>
<title>Yancheng dataset</title>
<p>
<xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref> shows the performance comparison results of each classification algorithm on Yancheng dataset. Experimental results reveal that the conventional SVM approach, relying exclusively on basic spectral feature processing, demonstrates classification deficiencies. The method underperforms notably for water-related categories, including sea (Class 1), aquaculture pond (Class 7), and irrigation canal (Class 17), achieving an OA value of 88.59%. HE3DCNN employs 3D convolution for spectral-spatial feature extraction and incorporates a pyramid structure for multi-scale feature fusion, achieving an overall accuracy (OA) of 89.51% on the Yancheng dataset. HybridSN method achieves a detailed joint spatial-spectral feature extraction process due to combining the structural features of 2DCNN and 3DCNN and obtains 91.12% OA value, but performs poorly on river, fallow land, rainfed cropland, etc (Class 10/14/15/17/18). SpectralFormer learns spectrally localized sequence information from neighboring bands of hyperspectral images and designs cross-layer jump connections to significantly improve the robustness of feature representation, which achieves OA of 94.01% and AA of 87.87% for the classification task on the Yancheng dataset, but performs poorly on categories such as mudflats (Class 6) and phragmites (Class 18). The SSFTT achieves joint extraction of spatial and spectral features by combining the advantages of CNN and Transformer, showing competitiveness in OA, AA and KAPPA, but poor performance in the river category (Class 10). The FactoFormer method employs a dual-branch spatial and spectral channel modeling process and introduces self-supervised pre-training mechanism, attains 94.94% OA on Yancheng dataset, though performance degrades for aquaculture ponds (Class 7) and irrigation channels (Class 17). The proposed method in this study integrates the advantages of CNN and Transformer, and adopts the dual-branch spatial and channel modeling design to ensure more comprehensive information acquisition. Cross-attention further realizes the fusion of different forms of features and a more detailed feature extraction process, which enables the proposed method to have a better classification performance on the Yancheng dataset, and outperforms other comparative methods in terms of OA, AA, and Kappa.</p>
</sec>
<sec id="s3_2_2">
<label>3.2.2</label>
<title>Yellow River Estuary dataset</title>
<p>
<xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref> demonstrates the performance comparison results of each classification algorithm on the Yellow River Estuary dataset. The distinctive feature of this dataset is that the vegetation mixing region of tamarisk, phragmites and spartina alterniflora is considered (class 14/15/16), and the proportion of training samples in the whole region is exceptionally limited. As can be seen from the results, the performances of all the classification methods on this dataset decreased, with the OA values dropping to the range of 77.08% to 85.72%. The SVM method is accurate for the classification of typha orientalis presl (class 5) and river (class 13), and so are the other methods in the paper, but performs moderately well in the recognition of most of the features. The HE3DCNN method is the most effective for the extraction of the oil field (class 10), but the recognition efficacy for the mixed zone is significantly decreased, especially for mixed area 2 (class 14) and 3 (class 16). The HybridSN method excelled in the extraction of spartina alterniflora (class 1) and mixed area 2 (class 15), but performs poorly in the identification of areas of phragmites and tamarisk mixing. The SpectralFormer method performs well for intertidal phragmite (class 6) and for mixing areas 3 (class 16), but performs poorly on aquaculture pond (class 7), river (class 10), and mixed area 2 (class 15) categories. The SSFTT method performs best in pond (class 2), intertidal phragmites (class 6), and oil field (class 10) extraction and is accurate for the identification of the mixed area 3(class 16). FactoFormer is accurate for the identification of sea(class 18), and has the best performance for the salt field (class 11), which is superior to the other methods, but does not perform well for the mixed area 3. The proposed method demonstrated overall superior classification performance on the Yellow River Estuary dataset, with overall accuracy (OA=85.72%) and average accuracy (AA=86.08%) significantly better than all the comparative methods, and the Kappa coefficient (0.8446) reached the reliability level of &#x201c;almost perfect agreement&#x201d;. The method accurately recognizes three categories (class 5/13/16), performs best in five other categories (class 3/7/8/9/17), and achieves secondary-best performance in four categories (class 11/12/14/18). Overall, for the Yellow River Estuary dataset, this paper&#x2019;s method outperforms other comparative methods in categorization and excels in mixed zone extraction.</p>
</sec>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Qualitative analysis</title>
<p>To visually evaluate the performance of different classification methods in coastal wetland scenarios, this part generates complete fully labeled classification maps for the 2 datasets of Yancheng and Yellow River Estuary respectively, as shown in <xref ref-type="fig" rid="f5">
<bold>Figures&#xa0;5</bold>
</xref>, <xref ref-type="fig" rid="f6">
<bold>6</bold>
</xref>.</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>Full classification maps obtained by different models on the Yancheng dataset. <bold>(a)</bold>groundtruth <bold>(b)</bold> SVM <bold>(c)</bold> HE3DCNN <bold>(d)</bold> HyBridSN <bold>(e)</bold> SpectralFormer <bold>(f)</bold> SSFTT <bold>(g)</bold> FactoFormer <bold>(h)</bold> proposed method.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-12-1613565-g005.tif">
<alt-text content-type="machine-generated">A sequence of eight images (a-h) shows sample maps of Yancheng wetland dataset and the classification results of different methods. The images are color-coded to represent different features: sea, offshore areas, rivers, ponds, aquaculture ponds, mudflats, estuarine areas, and various land uses like paddy fields, rainfed cropland, and buildings. Classification confusion areas are highlighted in red boxes in the classification maps of different methods. The key at the bottom correlates colors with categories like salt fields, woodland, and irrigation canals. This series of images presents the contrasting effects of the different classification methods.</alt-text>
</graphic>
</fig>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>Full classification maps obtained by different models on the Yellow River Estuary dataset <bold>(a)</bold> groundtruth <bold>(b)</bold>SVM <bold>(c)</bold> HE3DCNN <bold>(d)</bold> HyBridSN <bold>(e)</bold> SpectralFormer <bold>(f)</bold> SSFTT <bold>(g)</bold> FactoFormer <bold>(h)</bold> proposed method.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-12-1613565-g006.tif">
<alt-text content-type="machine-generated">A sequence of eight images (a-h) shows sample maps of Yellow River Estuary wetland dataset and the classification results of different methods. Color-coded regions include various vegetation types, water bodies, and land use categories, with a legend indicating specific classifications such as marshlands, ponds, arable land, and oilfields.</alt-text>
</graphic>
</fig>
<sec id="s3_3_1">
<label>3.3.1</label>
<title>Yancheng dataset</title>
<p>From the wetland fully labeled classification map, it can be seen that most classification methods have significant attenuation of accuracy in specific feature types, constrained by the heterogeneity of complex wetland ecosystems and the separability between feature classes. For the Yancheng dataset, the SVM method is ineffective on aquaculture pond (class 7) and irrigation canals (class 17), and it is easy to misclassify aquaculture pond as pond and misclassify irrigation canals as fallow land. Similarly, the HE3DCNN method has significant streaking noise on the sea surface and misclassify the aquaculture pond as phragmites. The HyBridSN method has more severe streaking on the sea surface, and performs poorly in the extraction of river (class 10), fallow land (class 14), rainfed cropland (class 15), irrigation canal (class 17) extraction, misclassifying rainfed cropland as salt field, or due to the similarity in SWIR reflectance properties of salt field crystals and arid rainfed cropland. The SpectralFormer method misclassified features the extraction of spartina anglica (class 5), mudflats (class 6), buildings (class 13), rainfed cropland (class 15), etc. The SSFTT has severe streaking on the sea surface, and the accuracy of rivers (class 10) and phragmites (class 18) is poor, and misclassified near-shore vegetation into salt field or sea water due to the influence of seawater impregnation of the intertidal vegetation. FactoFormer performs poorly for aquaculture ponds (class 7) and irrigation canals (class 17), and is prone to misclassify aquaculture ponds (class 7) as ponds or rivers. In most of the misclassified categories (class 7/10/15/17/18), the proposed method performs stably and shows obvious advantages in the stability of classification of typical wetland features.</p>
</sec>
<sec id="s3_3_2">
<label>3.3.2</label>
<title>Yellow River Estuary dataset</title>
<p>For the Yellow River Estuary dataset, the SVM method misclassifies arable land (class 8) as oilfield, Mixed area 2 (class 15) as other categories, suaeda salsa as salt fields, and the offshore north of the Yellow River Estuary (class 18) as other water bodies such as pond. HE3DCNN in the sea surface (class 18) has regular streak noise, misclassifies intertidal phragmite (class 6) as oilfield or suaeda salsa, misclassifies salt fields (class 11) into pond or ecological reservoir, and performs poorly in mixed vegetation area (class 15/16). The HyBridSN method demonstrates strong classification accuracy for spartina alterniflora (class 1), typha orientalis presl (class 5), and river (class 13). However, it shows notable misclassification issues in other categories, including weak Sea detection and frequent confusion between intertidal Phragmites (class 6) and suaeda salsa (class12). The SpectralFormer method misclassifies suaeda salsa as Salt Fields, ecological reservoir (class 7) as pond or sea, and confused oilfield (class 10) with mixed area 2 (Class 15). Additionally, it erroneously labels the boundary between river and sea as salt fields. The SSFTT method shows significant large-scale misclassification in the sea area, and woodland (class 3) is misidentified as the tamarix- spartina alterniflora mixed growing area. Arable land (class 8) exists confusion with pond, lotus pond and other water bodies, mixed area 1 (class 14) is misidentified as phragmite community. The self-supervised pre-training mechanism of FactoFormer effectively suppresses the misclassification of sea, but at the same time, there are some limitations, misclassifying wetland water bodies as ocean types, misclassifying pond (class 2) as salt fields, and extracting poorly for the mixed region of tamarisk-phragmite-spartina alterniflora (class 16, mixed area 3).The research method in this paper shows confusing classification with ponds in the offshore area north of the Yellow River Estuary. Systematic comparative experiments demonstrate that this misclassification prevails across multiple benchmark methods (SVM, HE3DCNN, HyBridSN, SpectralFormer, and SSFTT). We attribute this phenomenon to: (1) spatial adjacency between coastal waters and pond complexes, and (2) hydrological connectivity (e.g., tidal channels) inducing feature homogenization in spectral-spatial domains. Overall, our method exhibits superior robustness to other methods for the easily confounded categories (class8/11) and mixed vegetation areas (class14/15/16).</p>
</sec>
</sec>
</sec>
<sec id="s4" sec-type="discussion">
<label>4</label>
<title>Discussion</title>
<p>To comprehensively evaluate the contribution of each module in the proposed method, we conducted systematic ablation studies on both the Yancheng and Yellow River Estuary datasets by examining different component combinations. The proposed framework consists of five key components: Feature Extractor (FE), Dual-Branch Transformer1 (DBT1), Cross Attention (CA), Dual-Branch Transformer2 (DBT2), and KAN modules. Through incremental removal of each module, we analyzed their individual and collective effects on model performance across the two datasets. Specifically, <xref ref-type="table" rid="T4">
<bold>Table&#xa0;4</bold>
</xref> presents the overall performance comparison of the Yancheng dataset under different ablation cases, while <xref ref-type="table" rid="T5">
<bold>Table&#xa0;5</bold>
</xref> details the classification accuracy of each feature category across these cases. The Yellow River Estuary dataset follows the same ablation approach, with its detailed classification results summarized in <xref ref-type="table" rid="T6">
<bold>Table&#xa0;6</bold>
</xref>. <xref ref-type="fig" rid="f7">
<bold>Figures&#xa0;7</bold>
</xref>, <xref ref-type="fig" rid="f8">
<bold>8</bold>
</xref> present the panoramic prediction results of the Yancheng and Yellow River Estuary datasets under different ablation study cases, respectively.</p>
<table-wrap id="T4" position="float">
<label>Table&#xa0;4</label>
<caption>
<p>Ablation study configurations (&#x2713;: present; &#xd7;: absent) on Yancheng dataset (optimal results are bolded).</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" rowspan="2" align="center">Cases</th>
<th valign="middle" colspan="5" align="center">Components</th>
<th valign="middle" colspan="3" align="center">Indicators</th>
</tr>
<tr>
<th valign="middle" align="center">FE</th>
<th valign="middle" align="center">DBT1</th>
<th valign="middle" align="center">CA</th>
<th valign="middle" align="center">DBT2</th>
<th valign="middle" align="center">KAN</th>
<th valign="middle" align="center">OA(%)</th>
<th valign="middle" align="center">AA(%)</th>
<th valign="middle" align="center">&#x3ba;</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">&#xd7;</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">63.47</td>
<td valign="middle" align="center">31.63</td>
<td valign="middle" align="center">0.5577</td>
</tr>
<tr>
<td valign="middle" align="center">2</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">&#xd7;</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">95.68</td>
<td valign="middle" align="center">86.31</td>
<td valign="middle" align="center">0.9486</td>
</tr>
<tr>
<td valign="middle" align="center">3</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">&#xd7;</td>
<td valign="middle" align="center">&#xd7;</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">95.30</td>
<td valign="middle" align="center">88.69</td>
<td valign="middle" align="center">0.9444</td>
</tr>
<tr>
<td valign="middle" align="center">4</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">&#xd7;</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">96.06</td>
<td valign="middle" align="center">87.45</td>
<td valign="middle" align="center">0.9531</td>
</tr>
<tr>
<td valign="middle" align="center">5</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">&#xd7;</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">93.80</td>
<td valign="middle" align="center">86.01</td>
<td valign="middle" align="center">0.9268</td>
</tr>
<tr>
<td valign="middle" align="center">6</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">MLP</td>
<td valign="middle" align="center">91.69</td>
<td valign="middle" align="center">88.13</td>
<td valign="middle" align="center">0.9026</td>
</tr>
<tr>
<td valign="middle" align="center">7</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">&#x2713;</td>
<td valign="middle" align="center">
<bold>96.52</bold>
</td>
<td valign="middle" align="center">
<bold>91.69</bold>
</td>
<td valign="middle" align="center">
<bold>0.9587</bold>
</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T5" position="float">
<label>Table&#xa0;5</label>
<caption>
<p>Detailed categorization results of ablation experiments on the Yancheng dataset (optimal results are bolded).</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Class</th>
<th valign="middle" align="center">Case 1</th>
<th valign="middle" align="center">Case 2</th>
<th valign="middle" align="center">Case 3</th>
<th valign="middle" align="center">Case 4</th>
<th valign="middle" align="center">Case 5</th>
<th valign="middle" align="center">Case 6</th>
<th valign="middle" align="center">Proposed method</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="center">1</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">98.99</td>
<td valign="top" align="center">95.56</td>
<td valign="top" align="center">99.27</td>
<td valign="top" align="center">93.55</td>
<td valign="top" align="center">86.18</td>
<td valign="top" align="center">98.12</td>
</tr>
<tr>
<td valign="top" align="center">2</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">99.93</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">3</td>
<td valign="top" align="center">
<bold>56.73</bold>
</td>
<td valign="top" align="center">23.08</td>
<td valign="top" align="center">38.46</td>
<td valign="top" align="center">29.81</td>
<td valign="top" align="center">33.65</td>
<td valign="top" align="center">55.77</td>
<td valign="top" align="center">54.81</td>
</tr>
<tr>
<td valign="top" align="center">4</td>
<td valign="top" align="center">0.00</td>
<td valign="top" align="center">
<bold>84.98</bold>
</td>
<td valign="top" align="center">80.92</td>
<td valign="top" align="center">83.24</td>
<td valign="top" align="center">80.35</td>
<td valign="top" align="center">75.14</td>
<td valign="top" align="center">79.19</td>
</tr>
<tr>
<td valign="top" align="center">5</td>
<td valign="top" align="center">2.63</td>
<td valign="top" align="center">30.26</td>
<td valign="top" align="center">50.00</td>
<td valign="top" align="center">34.21</td>
<td valign="top" align="center">26.32</td>
<td valign="top" align="center">56.58</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">6</td>
<td valign="top" align="center">95.47</td>
<td valign="top" align="center">95.06</td>
<td valign="top" align="center">94.65</td>
<td valign="top" align="center">91.36</td>
<td valign="top" align="center">
<bold>95.88</bold>
</td>
<td valign="top" align="center">90.53</td>
<td valign="top" align="center">95.47</td>
</tr>
<tr>
<td valign="top" align="center">7</td>
<td valign="top" align="center">0.42</td>
<td valign="top" align="center">88.24</td>
<td valign="top" align="center">91.18</td>
<td valign="top" align="center">89.08</td>
<td valign="top" align="center">89.08</td>
<td valign="top" align="center">89.50</td>
<td valign="top" align="center">
<bold>92.02</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">8</td>
<td valign="top" align="center">13.15</td>
<td valign="top" align="center">
<bold>99.87</bold>
</td>
<td valign="top" align="center">99.73</td>
<td valign="top" align="center">99.06</td>
<td valign="top" align="center">98.79</td>
<td valign="top" align="center">97.99</td>
<td valign="top" align="center">99.73</td>
</tr>
<tr>
<td valign="top" align="center">9</td>
<td valign="top" align="center">4.03</td>
<td valign="top" align="center">98.79</td>
<td valign="top" align="center">98.79</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">98.79</td>
<td valign="top" align="center">98.39</td>
</tr>
<tr>
<td valign="top" align="center">10</td>
<td valign="top" align="center">72.43</td>
<td valign="top" align="center">97.79</td>
<td valign="top" align="center">
<bold>99.26</bold>
</td>
<td valign="top" align="center">98.90</td>
<td valign="top" align="center">98.16</td>
<td valign="top" align="center">96.32</td>
<td valign="top" align="center">97.79</td>
</tr>
<tr>
<td valign="top" align="center">11</td>
<td valign="top" align="center">2.55</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">12</td>
<td valign="top" align="center">45.74</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">99.22</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">99.22</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">98.45</td>
</tr>
<tr>
<td valign="top" align="center">13</td>
<td valign="top" align="center">46.01</td>
<td valign="top" align="center">97.75</td>
<td valign="top" align="center">98.16</td>
<td valign="top" align="center">
<bold>98.77</bold>
</td>
<td valign="top" align="center">92.64</td>
<td valign="top" align="center">91.82</td>
<td valign="top" align="center">97.14</td>
</tr>
<tr>
<td valign="top" align="center">14</td>
<td valign="top" align="center">0.00</td>
<td valign="top" align="center">95.67</td>
<td valign="top" align="center">97.60</td>
<td valign="top" align="center">98.56</td>
<td valign="top" align="center">
<bold>99.04</bold>
</td>
<td valign="top" align="center">98.56</td>
<td valign="top" align="center">91.35</td>
</tr>
<tr>
<td valign="top" align="center">15</td>
<td valign="top" align="center">5.11</td>
<td valign="top" align="center">96.02</td>
<td valign="top" align="center">95.45</td>
<td valign="top" align="center">96.59</td>
<td valign="top" align="center">
<bold>97.16</bold>
</td>
<td valign="top" align="center">94.89</td>
<td valign="top" align="center">
<bold>97.16</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">16</td>
<td valign="top" align="center">0.00</td>
<td valign="top" align="center">74.65</td>
<td valign="top" align="center">
<bold>77.46</bold>
</td>
<td valign="top" align="center">76.06</td>
<td valign="top" align="center">70.42</td>
<td valign="top" align="center">71.83</td>
<td valign="top" align="center">76.06</td>
</tr>
<tr>
<td valign="top" align="center">17</td>
<td valign="top" align="center">0.25</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">96.43</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">96.43</td>
</tr>
<tr>
<td valign="top" align="center">18</td>
<td valign="top" align="center">0.00</td>
<td valign="top" align="center">72.50</td>
<td valign="top" align="center">80.00</td>
<td valign="top" align="center">79.17</td>
<td valign="top" align="center">77.50</td>
<td valign="top" align="center">
<bold>82.50</bold>
</td>
<td valign="top" align="center">78.33</td>
</tr>
<tr>
<td valign="top" align="center">OA(%)</td>
<td valign="top" align="center">63.47</td>
<td valign="top" align="center">95.68</td>
<td valign="top" align="center">95.30</td>
<td valign="top" align="center">96.06</td>
<td valign="top" align="center">93.80</td>
<td valign="top" align="center">91.69</td>
<td valign="top" align="center">
<bold>96.52</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">AA(%)</td>
<td valign="top" align="center">31.63</td>
<td valign="top" align="center">86.31</td>
<td valign="top" align="center">88.69</td>
<td valign="top" align="center">87.45</td>
<td valign="top" align="center">86.01</td>
<td valign="top" align="center">88.13</td>
<td valign="top" align="center">
<bold>91.69</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">&#x3ba;</td>
<td valign="top" align="center">0.5577</td>
<td valign="top" align="center">0.9486</td>
<td valign="top" align="center">0.9444</td>
<td valign="top" align="center">0.9531</td>
<td valign="top" align="center">0.9248</td>
<td valign="top" align="center">0.9026</td>
<td valign="top" align="center">
<bold>0.9587</bold>
</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T6" position="float">
<label>Table&#xa0;6</label>
<caption>
<p>Detailed categorization results of ablation experiments on the Yellow River Estuary dataset (optimal results are bolded).</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">Class</th>
<th valign="middle" align="center">Case 1</th>
<th valign="middle" align="center">Case 2</th>
<th valign="middle" align="center">Case 3</th>
<th valign="middle" align="center">Case 4</th>
<th valign="middle" align="center">Case 5</th>
<th valign="middle" align="center">Case 6</th>
<th valign="middle" align="center">Proposed method</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="center">1</td>
<td valign="top" align="center">2.92</td>
<td valign="top" align="center">93.47</td>
<td valign="top" align="center">
<bold>93.82</bold>
</td>
<td valign="top" align="center">93.02</td>
<td valign="top" align="center">92.82</td>
<td valign="top" align="center">90.23</td>
<td valign="top" align="center">91.73</td>
</tr>
<tr>
<td valign="top" align="center">2</td>
<td valign="top" align="center">
<bold>88.93</bold>
</td>
<td valign="top" align="center">82.00</td>
<td valign="top" align="center">85.38</td>
<td valign="top" align="center">81.17</td>
<td valign="top" align="center">84.14</td>
<td valign="top" align="center">81.93</td>
<td valign="top" align="center">81.93</td>
</tr>
<tr>
<td valign="top" align="center">3</td>
<td valign="top" align="center">13.10</td>
<td valign="top" align="center">99.70</td>
<td valign="top" align="center">99.30</td>
<td valign="top" align="center">
<bold>99.82</bold>
</td>
<td valign="top" align="center">97.79</td>
<td valign="top" align="center">97.30</td>
<td valign="top" align="center">97.73</td>
</tr>
<tr>
<td valign="top" align="center">4</td>
<td valign="top" align="center">15.57</td>
<td valign="top" align="center">67.26</td>
<td valign="top" align="center">66.13</td>
<td valign="top" align="center">70.99</td>
<td valign="top" align="center">49.70</td>
<td valign="top" align="center">
<bold>73.11</bold>
</td>
<td valign="top" align="center">62.70</td>
</tr>
<tr>
<td valign="top" align="center">5</td>
<td valign="top" align="center">0.00</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">6</td>
<td valign="top" align="center">0.07</td>
<td valign="top" align="center">
<bold>87.92</bold>
</td>
<td valign="top" align="center">80.38</td>
<td valign="top" align="center">84.51</td>
<td valign="top" align="center">73.13</td>
<td valign="top" align="center">69.94</td>
<td valign="top" align="center">84.65</td>
</tr>
<tr>
<td valign="top" align="center">7</td>
<td valign="top" align="center">0.09</td>
<td valign="top" align="center">96.54</td>
<td valign="top" align="center">
<bold>96.62</bold>
</td>
<td valign="top" align="center">95.95</td>
<td valign="top" align="center">95.51</td>
<td valign="top" align="center">88.05</td>
<td valign="top" align="center">94.48</td>
</tr>
<tr>
<td valign="top" align="center">8</td>
<td valign="top" align="center">76.95</td>
<td valign="top" align="center">97.12</td>
<td valign="top" align="center">97.09</td>
<td valign="top" align="center">92.10</td>
<td valign="top" align="center">98.63</td>
<td valign="top" align="center">
<bold>98.74</bold>
</td>
<td valign="top" align="center">98.64</td>
</tr>
<tr>
<td valign="top" align="center">9</td>
<td valign="top" align="center">3.74</td>
<td valign="top" align="center">73.98</td>
<td valign="top" align="center">72.15</td>
<td valign="top" align="center">75.96</td>
<td valign="top" align="center">69.26</td>
<td valign="top" align="center">
<bold>80.97</bold>
</td>
<td valign="top" align="center">80.29</td>
</tr>
<tr>
<td valign="top" align="center">10</td>
<td valign="top" align="center">66.88</td>
<td valign="top" align="center">91.52</td>
<td valign="top" align="center">90.23</td>
<td valign="top" align="center">
<bold>92.98</bold>
</td>
<td valign="top" align="center">90.41</td>
<td valign="top" align="center">81.44</td>
<td valign="top" align="center">89.93</td>
</tr>
<tr>
<td valign="top" align="center">11</td>
<td valign="top" align="center">0.30</td>
<td valign="top" align="center">69.93</td>
<td valign="top" align="center">73.08</td>
<td valign="top" align="center">72.83</td>
<td valign="top" align="center">
<bold>77.15</bold>
</td>
<td valign="top" align="center">56.52</td>
<td valign="top" align="center">70.10</td>
</tr>
<tr>
<td valign="top" align="center">12</td>
<td valign="top" align="center">0.00</td>
<td valign="top" align="center">93.25</td>
<td valign="top" align="center">96.62</td>
<td valign="top" align="center">94.99</td>
<td valign="top" align="center">
<bold>98.00</bold>
</td>
<td valign="top" align="center">95.63</td>
<td valign="top" align="center">95.04</td>
</tr>
<tr>
<td valign="top" align="center">13</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">14</td>
<td valign="top" align="center">3.62</td>
<td valign="top" align="center">48.13</td>
<td valign="top" align="center">
<bold>62.22</bold>
</td>
<td valign="top" align="center">46.76</td>
<td valign="top" align="center">52.49</td>
<td valign="top" align="center">57.86</td>
<td valign="top" align="center">50.94</td>
</tr>
<tr>
<td valign="top" align="center">15</td>
<td valign="top" align="center">20.13</td>
<td valign="top" align="center">77.76</td>
<td valign="top" align="center">64.82</td>
<td valign="top" align="center">75.51</td>
<td valign="top" align="center">74.35</td>
<td valign="top" align="center">71.11</td>
<td valign="top" align="center">
<bold>83.83</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">16</td>
<td valign="top" align="center">37.50</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">49.22</td>
<td valign="top" align="center">92.19</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">47.66</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">17</td>
<td valign="top" align="center">13.64</td>
<td valign="top" align="center">
<bold>67.89</bold>
</td>
<td valign="top" align="center">67.31</td>
<td valign="top" align="center">67.43</td>
<td valign="top" align="center">37.32</td>
<td valign="top" align="center">67.55</td>
<td valign="top" align="center">
<bold>67.89</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">18</td>
<td valign="top" align="center">95.73</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">
<bold>100.00</bold>
</td>
<td valign="top" align="center">99.60</td>
</tr>
<tr>
<td valign="top" align="center">OA(%)</td>
<td valign="top" align="center">30.51</td>
<td valign="top" align="center">85.54</td>
<td valign="top" align="center">85.45</td>
<td valign="top" align="center">85.58</td>
<td valign="top" align="center">84.71</td>
<td valign="top" align="center">83.51</td>
<td valign="top" align="center">
<bold>85.72</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">AA(%)</td>
<td valign="top" align="center">30.47</td>
<td valign="top" align="center">85.91</td>
<td valign="top" align="center">83.02</td>
<td valign="top" align="center">85.34</td>
<td valign="top" align="center">84.48</td>
<td valign="top" align="center">81.00</td>
<td valign="top" align="center">
<bold>86.08</bold>
</td>
</tr>
<tr>
<td valign="top" align="center">&#x3ba;</td>
<td valign="top" align="center">0.2543</td>
<td valign="top" align="center">0.8427</td>
<td valign="top" align="center">0.8416</td>
<td valign="top" align="center">0.8431</td>
<td valign="top" align="center">0.8335</td>
<td valign="top" align="center">0.8205</td>
<td valign="top" align="center">
<bold>0.8446</bold>
</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="f7" position="float">
<label>Figure&#xa0;7</label>
<caption>
<p>Full classification maps of the Yancheng dataset under different ablation study cases. <bold>(a)</bold> our model without FE module <bold>(b)</bold> without DBT1 module <bold>(c)</bold> without CA and DBT2 module <bold>(d)</bold> without CA module <bold>(e)</bold> without DBT2 module <bold>(f)</bold> with MLP as the classification head.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-12-1613565-g007.tif">
<alt-text content-type="machine-generated">Six panels show colorful land classification maps of Yancheng wetland dataset under different ablation study cases. Each panel highlights various features such as sea, offshore areas, salt fields, ponds, and different land uses in distinct colors. Key identifies features like aquaculture ponds, paddy fields, estuarine areas, rivers, woodlands, and other land types with corresponding colors. Each panel also has red rectangles highlighting classification confusion areas.</alt-text>
</graphic>
</fig>
<fig id="f8" position="float">
<label>Figure&#xa0;8</label>
<caption>
<p>Full classification maps of the Yellow River Estuary dataset under different ablation study cases. <bold>(a)</bold> our model without FE module <bold>(b)</bold> without DBT1 module <bold>(c)</bold> without CA and DBT2 module <bold>(d)</bold> without CA module <bold>(e)</bold> without DBT2 module <bold>(f)</bold> with MLP as the classification head.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fmars-12-1613565-g008.tif">
<alt-text content-type="machine-generated">Six maps displaying different classification maps of Yellow River Estuary dataset under different ablation study cases. Each map uses distinct colors to represent various land types, such as spartina alterniflora, ponds, woodland, phragmite, rivers, oilfields, and sea. Red rectangles highlight classification confusion areas. A legend at the bottom lists all categories including ecological reservoir, arable land, salt fields, mudflats, and mixed areas, with corresponding colors.</alt-text>
</graphic>
</fig>
<p>The ablation study results demonstrate that each component of the proposed model contributes significantly to the final wetland classification performance. Specifically:</p>
<p>The FE module effectively captures joint spatial-spectral features of the wetland hyperspectral data through its 3D CNN architecture, while the subsequent 2D CNN further enhances spatial feature abstraction. This design proves particularly effective for characterizing environments with high spatial heterogeneity. When the FE module is removed (Case 1), the model experiences a substantial performance degradation, with OA dropping to 63.47% for the Yancheng dataset and merely 30.51% for the Yellow River Estuary dataset. These results not only confirm the module&#x2019;s critical role in wetland feature extraction but also underscore its particular importance in scenarios characterized by high environmental heterogeneity.</p>
<p>When the DBT1 module is ablated (Case 2), the overall classification accuracy remains high. However, performance deteriorates significantly for certain fine-grained categories. Specifically, the model exhibits notable deficiencies in Salt field and Spartina anglica in the Yancheng dataset, as well as the mixed vegetation community of tamarix and spartina alterniflora (Mixed Area 2) in the Yellow River Estuary dataset. This observation suggests that the DBT1 module plays a critical role in enhancing the model&#x2019;s ability to discriminate subtle inter-class variations, particularly in mixed wetland vegetation. By leveraging parallel spatial and channel long-range dependency modeling, DBT1 provides more discriminative feature representations for downstream processing.</p>
<p>The CA module improves the model&#x2019;s discriminative capacity for subtle variations by effectively integrating spatial and channel-wise interaction features. Ablation of the CA module (Case 3) leads to performance degradation in several key categories: classification accuracy decreases for Sea, Salt field and Spartina anglica in the Yancheng dataset, accompanied by pronounced streak anomaly on sea surface classification. Similarly, the model exhibits significantly reduced accuracy for Mixed Area 2 and 3 in the Yellow River Estuary dataset compared to the full model. These results demonstrate that the CA module plays a crucial role in enhancing feature discrimination, particularly for challenging cases involving fuzzy boundaries and mixed vegetation communities, through feature interaction capability.</p>
<p>The DBT2 module serves as a secondary refinement unit to enhance classification stability in complex land cover scenarios. Ablation studies (Case 5) reveal that the full model (Case 7) achieves consistent improvements in OA, AA, and KAPPA metrics across both datasets, demonstrating the necessity of deep feature extraction following cross-attention. Comparative analysis between Cases 4 and 3 shows that DBT2 improves classification accuracy for Sea in the Yancheng dataset while eliminating sea surface streak anomaly. Similarly, in the Yellow River Estuary dataset, DBT2 enhances classification performance for mixed vegetation communities (Mixed Area 2 and 3). These results indicate that DBT2&#x2019;s dual-branch Transformer architecture, through its secondary refinement of spatial and channel features, improves the model&#x2019;s discriminative capacity for different water bodies and mixed vegetation environments.</p>
<p>Compared to using MLP as the classification head (Case 6), the KAN module (Case 7) strengthens model discriminability through nonlinear feature mapping, improving overall classification accuracy across both datasets. The KAN module&#x2019;s adaptive nonlinear learning capability enables more effective modeling of dynamic wetland cover variations, demonstrating strong compatibility with the heterogeneous nature of wetland environments.</p>
<p>The ablation studies comprehensively validate the efficacy and synergistic integration of the proposed modular architecture: (1) The FE module establishes fundamental feature representations to address environmental heterogeneity; (2) The DBT1 module refines feature expression to capture inter-class variations; (3) The CA module enhances feature interactions for improved characterization of complex vegetation communities; (4) The DBT2 module enables deeper feature abstraction, particularly for discriminating distinct water bodies and mixed vegetation features; and (5) The KAN module&#x2019;s nonlinear classification head adapts to fuzzy boundaries and dynamic surface cover changes. Collectively, this hierarchical framework provides an effective solution for wetland hyperspectral image classification, by synergistically integrating hierarchical feature extraction with multidimensional feature interaction.</p>
</sec>
<sec id="s5" sec-type="conclusions">
<label>5</label>
<title>Conclusion</title>
<p>In this paper, we propose a hyperspectral image classification method tailored for coastal wetlands. The method integrates the advantages of convolutional neural network (CNNs) and Transformer architectures, and progressively extracts low, middle, and high level features sequentially through hierarchical framework. Specifically, 3D and 2D convolutional layers are employed to fully capture low-level spectral and spatial features, while the combination of dual-branch Transformers with a cross-attention mechanism enable multi-dimensional feature fusion and the extraction of high-level semantic representations. Experiment results demonstrate that the proposed method significantly enhances classification performance on hyperspectral images of coastal wetlands, particularly for typical land cover types such as mudflats and mixed vegetation areas. In future work, strategies to improve model performance under small-sample conditions will be explored. These may include the application of semi-supervised learning, self-supervised learning, and domain adaptation techniques to effectively utilize both limited labeled samples and large volumes of unlabeled data, thereby enhancing the generalization ability of the model.</p>
</sec>
</body>
<back>
<sec id="s6" sec-type="data-availability">
<title>Data availability statement</title>
<p>Publicly available datasets were analyzed in this study. This data can be found here: The data analyzed in this study was obtained from the 2024 Chinese Workshop on Hyperspectral Earth Observation (<uri xlink:href="https://hsi.ecnu.edu.cn">https://hsi.ecnu.edu.cn</uri>), processed by the spectral image processing team of Beijing Institute of Technology. Requests to access these datasets should be directed to the workshop organizing committee at committee@ce.ecnu.edu.cn.</p>
</sec>
<sec id="s7" sec-type="author-contributions">
<title>Author contributions</title>
<p>ZL: Formal Analysis, Methodology, Software, Validation, Visualization, Writing &#x2013; original draft. TL: Conceptualization, Methodology, Writing &#x2013; review &amp; editing, Funding acquisition. YL: Methodology, Software, Writing &#x2013; review &amp; editing. JT: Conceptualization, Writing &#x2013; review &amp; editing. MZ: Writing &#x2013; review &amp; editing, Conceptualization, Supervision. CZ: Conceptualization, Writing &#x2013; review &amp; editing, Project administration, Resources.</p>
</sec>
<sec id="s8" sec-type="funding-information">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research and/or publication of this article. This study is financially supported by the International Partnership Program by the Chinese Academy of Sciences under Grant 121311KYSB20190029.</p>
</sec>
<ack>
<title>Acknowledgments</title>
<p>The authors extend sincere gratitude to Dr. Wanli Zhang for his expert guidance in methodological discussions and generous provision of high-performance computing resources. The coastal wetland hyperspectral datasets employed are obtained from the 2024 China Hyperspectral Earth Observation Symposium, meticulously preprocessed by the Spectral Image Processing Team at Beijing Institute of Technology. We extend our sincere gratitude to the laboratory for their open-access provision of these mission-critical datasets. Finally, the code implementation draws upon methodologies from SpectralFormer and FactoFormer, gratefully acknowledge Danfeng Hong, Mohamed et&#xa0;al. for making their open-source codebase publicly available.</p>
</ack>
<sec id="s9" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
<p>The reviewer CX declared a shared affiliation with the authors ZL, TL, YL, CZ to the handling editor at the time of review.</p>
</sec>
<sec id="s10" sec-type="ai-statement">
<title>Generative AI statement</title>
<p>The author(s) declare that Generative AI was used in the creation of this manuscript. For linguistic enhancement without altering substantive content.</p>
</sec>
<sec id="s11" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Agate</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Ballinger</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Ward</surname> <given-names>R. D.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Satellite remote sensing can provide semi-automated monitoring to aid coastal decision-making</article-title>. <source>Estuar. Coast. Shelf Sci.</source> <volume>298</volume>, <elocation-id>108639</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ecss.2024.108639</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chan</surname> <given-names>J. C.-W.</given-names>
</name>
<name>
<surname>Paelinckx</surname> <given-names>D.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Evaluation of Random Forest and Adaboost tree-based ensemble classification and spectral band selection for ecotope mapping using airborne hyperspectral imagery</article-title>. <source>Remote Sens. Of Environ.</source> <volume>112</volume>, <fpage>2999</fpage>&#x2013;<lpage>3011</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.rse.2008.02.011</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Cen</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Cen</surname> <given-names>G.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Asymmetric network combining CNN and transformer for building extraction from remote sensing images</article-title>. <source>Sensors</source> <volume>24</volume>, <elocation-id>6198</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/s24196198</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cheon</surname> <given-names>M.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Kolmogorov-arnold network for satellite image classification in remote sensing</article-title>. <source>arXiv</source>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.2406.00600</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gao</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Tao</surname> <given-names>R.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>Hyperspectral and multispectral classification for coastal wetland using depthwise feature interaction network</article-title>. <source>IEEE Trans. Geosci. Remote Sens.</source> <volume>60</volume>, <fpage>5512615</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TGRS.2021.3097093</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hang</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Hong</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Ghamisi</surname> <given-names>P.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Cascaded recurrent neural networks for hyperspectral image classification</article-title>. <source>IEEE Trans. Geosci. Remote Sens.</source> <volume>57</volume>, <fpage>5384</fpage>&#x2013;<lpage>5394</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TGRS.2019.2899129</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hong</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Han</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Yao</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Gao</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Plaza</surname> <given-names>A.</given-names>
</name>
<etal/>
</person-group>. (<year>2022</year>). <article-title>SpectralFormer: rethinking hyperspectral image classification with transformers</article-title>. <source>IEEE Trans. Geosci. Remote Sens.</source> <volume>60</volume>, <fpage>5518615</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TGRS.2021.3130716</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hu</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Wei</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>F.</given-names>
</name>    <name>
<surname>Li</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Deep convolutional neural networks for hyperspectral image classification</article-title>. <source>J. Sensors</source> <volume>2015</volume>, <fpage>258619</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1155/2015/258619</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ingalls</surname> <given-names>T. C.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Sawall</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Martin</surname> <given-names>R. E.</given-names>
</name>
<name>
<surname>Thompson</surname> <given-names>D. R.</given-names>
</name>
<name>
<surname>Asner</surname> <given-names>G. P.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Imaging spectroscopy investigations in wet carbon ecosystems: A review of the literature from 1995 to 2022 and future directions</article-title>. <source>Remote Sens. Environ.</source> <volume>305</volume>, <fpage>114051</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.rse.2024.114051</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jensen</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Thompson</surname> <given-names>D. R.</given-names>
</name>
<name>
<surname>Simard</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Solohin</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Castaneda-Moya</surname> <given-names>E.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Imaging spectroscopy-based estimation of aboveground biomass in louisiana&#x2019;s coastal wetlands: toward consistent spectroscopic retrievals across atmospheric states</article-title>. <source>J. Geophysical Research-Biogeosciences</source> <volume>129</volume>, <elocation-id>e2024JG008112</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1029/2024JG008112</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jiang</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Dong</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Multiscale fusion CNN-transformer network for high-resolution remote sensing image change detection</article-title>. <source>IEEE J. Selected Topics Appl. Earth Observations Remote Sens.</source> <volume>17</volume>, <fpage>5280</fpage>&#x2013;<lpage>5293</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/JSTARS.2024.3361507</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Leng</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Yuguda</surname> <given-names>T. K.</given-names>
</name>
<name>
<surname>Wei</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Xia</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zhuo</surname> <given-names>C.</given-names>
</name>
<etal/>
</person-group>. (<year>2023</year>). <article-title>Increasing coastal reclamation by Invasive alien plants and coastal armoring threatens the ecological sustainability of coastal wetlands</article-title>. <source>Front. In Mar. Sci.</source> <volume>10</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fmars.2023.1118894</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>Z.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Vaidya</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Ruehle</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Halverson</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Solja&#x10d;i&#x107;</surname> <given-names>M.</given-names>
</name>
<etal/>
</person-group>. (<year>2025</year>). <article-title>KAN: kolmogorov-arnold networks</article-title>. <source>arXiv</source>. doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.2404.19756</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Man</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Cui</surname> <given-names>B.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Quantitative evaluation of sea reclamation activities on tidal creek connectivity</article-title>. <source>Front. Mar. Sci.</source> <volume>10</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fmars.2023.1164065</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Melgani</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Bruzzone</surname> <given-names>L.</given-names>
</name>
</person-group> (<year>2004</year>). <article-title>Classification of hyperspectral remote sensing images with support vector machines</article-title>. <source>IEEE Trans. Geosci. Remote Sens.</source> <volume>42</volume>, <fpage>1778</fpage>&#x2013;<lpage>1790</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TGRS.2004.831865</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mohamed</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Haghighat</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Fernando</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Sridharan</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Fookes</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Moghadam</surname> <given-names>P.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>FactoFormer: factorized hyperspectral transformers with self-supervised pretraining</article-title>. <source>IEEE Trans. Geosci. Remote Sens.</source> <volume>62</volume>, <fpage>5501614</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TGRS.2023.3343392</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mou</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Ghamisi</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Zhu</surname> <given-names>X. X.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Deep recurrent neural networks for hyperspectral image classification</article-title>. <source>IEEE Trans. Geosci. Remote Sens.</source> <volume>55</volume>, <fpage>3639</fpage>&#x2013;<lpage>3655</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TGRS.2016.2636241</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Peng</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Tu</surname> <given-names>B.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Li</surname> <given-names>W.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Spatial-spectral transformer with cross-attention for hyperspectral image classification</article-title>. <source>IEEE Trans. Geosci. Remote Sens.</source> <volume>60</volume>, <fpage>5537415</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TGRS.2022.3203476</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Piaser</surname> <given-names>E.</given-names>
</name>
<name>
<surname>Berton</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Caccia</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Gallivanone</surname> <given-names>F.</given-names>
</name>
<name>
<surname>Sona</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Villa</surname> <given-names>P.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Effects of functional type and angular configuration on reflectance anisotropy of aquatic vegetation in ultra-high resolution hyperspectral imagery</article-title>. <source>Int. J. Remote Sens</source> <volume>46</volume>, <fpage>909</fpage>&#x2013;<lpage>929</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1080/01431161.2024.2438915</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Roy</surname> <given-names>S. K.</given-names>
</name>
<name>
<surname>Krishna</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Dubey</surname> <given-names>S. R.</given-names>
</name>
<name>
<surname>Chaudhuri</surname> <given-names>B. B.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>HybridSN: exploring 3-D-2-D CNN feature hierarchy for hyperspectral image classification</article-title>. <source>IEEE Geosci. Remote Sens. Lett.</source> <volume>17</volume>, <fpage>277</fpage>&#x2013;<lpage>281</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/LGRS.2019.2918719</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Santos</surname> <given-names>C. D.</given-names>
</name>
<name>
<surname>Catry</surname> <given-names>T.</given-names>
</name>
<name>
<surname>Dias</surname> <given-names>M. P.</given-names>
</name>
<name>
<surname>Granadeiro</surname> <given-names>J. P.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Global changes in coastal wetlands of importance for non-breeding shorebirds</article-title>. <source>Sci. Total Environ.</source> <volume>858</volume>, <elocation-id>159707</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.scitotenv.2022.159707</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sheaves</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Baker</surname> <given-names>R.</given-names>
</name>
<name>
<surname>Abrantes</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Barnett</surname> <given-names>A.</given-names>
</name>
<name>
<surname>Bradley</surname> <given-names>M.</given-names>
</name>
<name>
<surname>Dubuc</surname> <given-names>A.</given-names>
</name>
<etal/>
</person-group>. (<year>2024</year>). <article-title>Consequences for nekton of the nature, dynamics, and ecological functioning of tropical tidally dominated ecosystems</article-title>. <source>Estuar. Coast. shelf Sci.</source> <volume>304</volume>, <elocation-id>108825</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.ecss.2024.108825</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sun</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Zheng</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>Z.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>SpectralSpatial feature tokenization transformer for hyperspectral image classification</article-title>. <source>IEEE Trans. Geosci. Remote Sens.</source> <volume>60</volume>, <fpage>5522214</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TGRS.2022.3144158</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xiang</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Xiong</surname> <given-names>Q.</given-names>
</name>
<name>
<surname>Deng</surname> <given-names>C.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>CTFuseNet: A multi-scale CNN-transformer feature fused network for crop type segmentation on UAV remote sensing imagery</article-title>. <source>Remote Sens.</source> <volume>15</volume>, <elocation-id>1151</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3390/rs15041151</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname> <given-names>G.</given-names>
</name>
<name>
<surname>Shao</surname> <given-names>C.</given-names>
</name>
<name>
<surname>Zuo</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Sun</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>K.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>L.</given-names>
</name>
<etal/>
</person-group>. (<year>2024</year>). <article-title>MFI: A mudflat index based on hyperspectral satellite images for mapping coastal mudflats</article-title>. <source>Int. J. Appl. Earth Observation And Geoinformation</source> <volume>133</volume>, <elocation-id>104140</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jag.2024.104140</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname> <given-names>X.</given-names>
</name>
<name>
<surname>Cao</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Lu</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Zhou</surname> <given-names>Y.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Hyperspectral image transformer classification networks</article-title>. <source>IEEE Trans. Geosci. Remote Sens.</source> <volume>60</volume>, <fpage>5528715</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TGRS.2022.3171551</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yue</surname> <given-names>J.</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>W.</given-names>
</name>
<name>
<surname>Mao</surname> <given-names>S.</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>H.</given-names>
</name>
</person-group> (<year>2015</year>). <article-title>Spectral-spatial classification of hyperspectral images using deep convolutional neural networks</article-title>. <source>Remote Sens. Lett.</source> <volume>6</volume>, <fpage>468</fpage>&#x2013;<lpage>477</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1080/2150704X.2015.1047045</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhan</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Hu</surname> <given-names>D.</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Yu</surname> <given-names>X.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Semisupervised hyperspectral image classification based on generative adversarial networks</article-title>. <source>IEEE Geosci. Remote Sens. Lett.</source> <volume>15</volume>, <fpage>212</fpage>&#x2013;<lpage>216</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/LGRS.2017.2780890</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhu</surname> <given-names>L.</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>Y.</given-names>
</name>
<name>
<surname>Ghamisi</surname> <given-names>P.</given-names>
</name>
<name>
<surname>Benediktsson</surname> <given-names>J. A.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Generative adversarial networks for hyperspectral image classification</article-title>. <source>IEEE Trans. Geosci. Remote Sens.</source> <volume>56</volume>, <fpage>5046</fpage>&#x2013;<lpage>5063</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/TGRS.2018.2805286</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>