<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" dtd-version="1.3" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Hum. Neurosci.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Human Neuroscience</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Hum. Neurosci.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">1662-5161</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fnhum.2026.1755549</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Dynamic graph based attention spectral network for motor imagery-brain computer interface</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Shao</surname> <given-names>Zexiong</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<uri xlink:href="https://loop.frontiersin.org/people/3399653"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Gu</surname> <given-names>Zhenghui</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
<uri xlink:href="https://loop.frontiersin.org/people/2103140"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Che</surname> <given-names>Le</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Yu</surname> <given-names>Zhuliang</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
<uri xlink:href="https://loop.frontiersin.org/people/524109"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Li</surname> <given-names>Yuanqing</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
<uri xlink:href="https://loop.frontiersin.org/people/88107"/>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>The School of Automation Science and Engineering, South China University of Technology</institution>, <city>Guangzhou</city>, <country country="cn">China</country></aff>
<aff id="aff2"><label>2</label><institution>The Institute for Super Robotics (Huangpu)</institution>, <city>Guangzhou</city>, <country country="cn">China</country></aff>
<aff id="aff3"><label>3</label><institution>The Pazhou Laboratory</institution>, <city>Guangzhou</city>, <country country="cn">China</country></aff>
<aff id="aff4"><label>4</label><institution>The School of Architecture, South China University of Technology</institution>, <city>Guangzhou</city>, <country country="cn">China</country></aff>
<aff id="aff5"><label>5</label><institution>Shien-Ming Wu School of Intelligent Engineering, South China University of Technology</institution>, <city>Guangzhou</city>, <country country="cn">China</country></aff>
<author-notes>
<corresp id="c001"><label>&#x0002A;</label>Correspondence: Zhuliang Yu, <email xlink:href="mailto:zlyu@scut.edu.cn">zlyu@scut.edu.cn</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-03-04">
<day>04</day>
<month>03</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>20</volume>
<elocation-id>1755549</elocation-id>
<history>
<date date-type="received">
<day>18</day>
<month>12</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>04</day>
<month>02</month>
<year>2026</year>
</date>
<date date-type="accepted">
<day>10</day>
<month>02</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2026 Shao, Gu, Che, Yu and Li.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Shao, Gu, Che, Yu and Li</copyright-holder>
<license>
<ali:license_ref start_date="2026-03-04">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<p>Motor imagery-based brain computer interface (MI-BCI) have been increasingly adopted in neurorehabilitation and related fields. The performance of MI-electroencephalogram (MI-EEG) decoding algorithms is central to the advancement of MI-BCI. However, current studies often lack rigorous investigation into the brain&#x00027;s complex network organization. Moreover, most existing methods do not incorporate the cross-frequency coupling (CFC) phenomena that occur during MI into their algorithmic designs, nor do they adequately account for how temporal dynamics across different MI stages influence decoding outcomes. To address these limitations, we propose the Dynamic Spectral-Spatial Interaction Convolution Neural Network (DSSICNN), a parameter-efficient MI-EEG decoding framework that jointly extracts temporal-spectral-spatial features. DSSICNN adopts a dual-branch parallel architecture to concurrently learn spatial representations in both Euclidean and non-Euclidean domains. It further integrates a CFC-inspired attention module to model cross-spectral interactions, followed by an additional attention mechanism that quantifies the contributions of distinct MI stages to decoding performance. DSSICNN achieves decoding performance on two public datasets that surpasses the current state-of-the-art (SOTA) under both session-dependent and session-independent settings. Beyond its empirical advantages, DSSICNN offers design insights for developing Graph Neural Network (GNN)-based MI-EEG decoding algorithms and provides a network neuroscience-inspired perspective for understanding the neurophysiological mechanisms underlying MI.</p></abstract>
<kwd-group>
<kwd>brain-computer interface (BCI)</kwd>
<kwd>convolution neural network (CNN)</kwd>
<kwd>cross-spectro interaction</kwd>
<kwd>electroencephalogram (EEG)</kwd>
<kwd>graph neural network (GNN)</kwd>
<kwd>motor imagery (MI)</kwd>
</kwd-group>
<funding-group>
  <funding-statement>The author(s) declared that financial support was received for this work and/or its publication. This work was supported by the Qinghai Province&#x00027;s Third Batch of Science and Technology Programme Projects for 2024 (2024-NK-141S-4) and the STI 2030-Major project (2022ZD0211700 to Xinhong Zhu).</funding-statement>
</funding-group>
<counts>
<fig-count count="9"/>
<table-count count="8"/>
<equation-count count="18"/>
<ref-count count="52"/>
<page-count count="18"/>
<word-count count="10416"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Brain-Computer Interfaces</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<label>1</label>
<title>Introduction</title>
<p>Brain computer interfaces (BCI) constitute an emerging framework for human machine communication by converting neural activity directly into control signals for external systems (<xref ref-type="bibr" rid="B12">Edelman et al., 2025</xref>). Among the available neural sensing modalities, electroencephalography (EEG) has gained substantial prominence owing to its non-invasiveness, affordability, and excellent temporal resolution. Motor imagery (MI), defined as the internal simulation of voluntary limb movements in the absence of overt motor output, induces characteristic modulations in the sensorimotor cortex, particularly within the mu (8&#x02013;12 Hz) and beta (18&#x02013;26 Hz) frequency bands, observed as event-related synchronization/event-related desynchronization (ERS/ERD). Because MI enables users to initiate control voluntarily without depending on external cues, it has become a focal point of contemporary research. MI-EEG-BCI have been applied across a wide range of clinical and rehabilitative contexts (<xref ref-type="bibr" rid="B1">AL-Quraishi et al., 2018</xref>). Consequently, the effectiveness of MI EEG decoding strategies is central to the overall reliability and utility of MI-BCI.</p>
<p>MI-EEG decoding techniques are generally grouped into two major classes: machine learning (ML) algorithms and deep learning (DL) algorithms. ML algorithms derive discriminative representations from MI-EEG data by quantifying signal characteristics in temporal, spectral, and spatial domains. Notable examples include Filter Bank Common Spatial Patterns (FBCSP) (<xref ref-type="bibr" rid="B4">Ang et al., 2008</xref>) and the Continuous Wavelet Transform (CWT) (<xref ref-type="bibr" rid="B19">Hsu and Sun, 2009</xref>). Although widely adopted, these ML-based strategies rely substantially on handcrafted feature engineering, which demands considerable neurophysiological expertise. Such dependence limits their capacity to achieve further gains in decoding accuracy (<xref ref-type="bibr" rid="B2">Altaheri et al., 2023</xref>).</p>
<p>In recent years, the rapid advancement of deep learning (DL) has encouraged the exploration of DL-based strategies for MI-EEG decoding (Wang X. et al., <xref ref-type="bibr" rid="B46">2023</xref>; <xref ref-type="bibr" rid="B6">Bhambare and Jain, 2024</xref>; <xref ref-type="bibr" rid="B18">Gu et al., 2025</xref>), yielding substantial improvements. DL models support end-to-end representation learning, enabling the automatic derivation of informative latent features directly from raw EEG signals. As a result, DL-driven decoding frameworks mitigate the need for extensive neurophysiological domain expertise. Prominent examples include Shallow ConvNet and Deep ConvNet (<xref ref-type="bibr" rid="B37">Schirrmeister et al., 2017</xref>), as well as EEGNet (<xref ref-type="bibr" rid="B27">Lawhern et al., 2018</xref>) and FBCNet (<xref ref-type="bibr" rid="B32">Mane et al., 2021</xref>).</p>
<p>In recent years, graph neural network (GNN) have attracted increasing attention for EEG decoding, motivated by the need to more effectively characterize the spatial properties of EEG signals (<xref ref-type="bibr" rid="B38">Song et al., 2020</xref>; <xref ref-type="bibr" rid="B11">Du et al., 2022</xref>; <xref ref-type="bibr" rid="B13">Feng et al., 2022</xref>). By representing EEG recordings as graph structured data, GNN account for signals from electrodes and offer a more expressive modeling of spatial heterogeneity than traditional convolution based techniques. Despite these advantages, existing GNN driven MI-EEG decoding approaches encounter several persistent limitations. First, adjacency matrices derived from predefined similarity measures may inadequately represent functional relationships among cortical regions, potentially omitting salient neural interaction patterns. When the adjacency structure is learned, the number of parameters grows quadratically with the number of electrodes, resulting in substantial computational overhead. In addition, these adjacency matrices are typically fixed during inference, even though MI tasks may induce dynamic changes in functional connectivity, thereby constraining the model&#x00027;s ability to capture task specific variations. Furthermore, many current frameworks incorporate GNN modules directly into the decoding pipeline, and the computational burden associated with GNN operations further amplifies parameter growth. Finally, variations in cognitive states, such as mental fatigue, can influence the relative contributions of different experimental sessions to overall decoding performance, yet this factor is frequently disregarded in contemporary models.</p>
<p>To address the aforementioned limitations, we propose Dynamic Spectral-Spatial Interaction Convolution Neural Network (DSSICNN). The primary contributions of this work are summarized as follows:</p>
<list list-type="bullet">
<list-item><p>We propose a dual-branch parallel framework constructed to concurrently derive spatial semantic features associated with MI.</p></list-item>
<list-item><p>To account for the differential contributions of distinct phases within MI process to decoding outcomes, we introduce an attention-guided module for spectrally informed temporal aggregation.</p></list-item>
<list-item><p>We performed extensive comparative evaluations to assess the performance of DSSICNN. The findings indicate that DSSICNN achieves substantial improvements over state-of-the-art (SOTA).</p></list-item>
</list></sec>
<sec id="s2">
<label>2</label>
<title>Related work</title>
<sec>
<label>2.1</label>
<title>Attention mechanism</title>
<p>Over the past decade, attention mechanisms have experienced significant advancements. SENet (<xref ref-type="bibr" rid="B20">Hu et al., 2018</xref>) introduced the principle of channel-wise attention, which has subsequently influenced the development of numerous architectures, including <italic>A</italic><sup>2</sup>-Net (<xref ref-type="bibr" rid="B8">Chen et al., 2018</xref>), CBAM (<xref ref-type="bibr" rid="B48">Woo et al., 2018</xref>), ECANet (<xref ref-type="bibr" rid="B45">Wang et al., 2020</xref>), and FcaNet (<xref ref-type="bibr" rid="B35">Qin et al., 2021</xref>). These mechanisms have also been extensively applied in MI-EEG classification (<xref ref-type="bibr" rid="B39">Song et al., 2023</xref>), where their integration into decoding frameworks frequently results in marked performance enhancements. Inspired by these developments, we integrate attention mechanisms within DSSICNN.</p>
</sec>
<sec>
<label>2.2</label>
<title>Cross-frequency coupling</title>
<p>CFC denotes the modulation of neural oscillatory activity in one frequency band by activity in another band. Empirical studies have demonstrated that during MI tasks, CFC occurs across multiple frequency bands (<xref ref-type="bibr" rid="B14">Feng et al., 2020</xref>). Moreover, distinct MI tasks induce characteristic CFC patterns across both spectral and spatial dimensions. Consequently, CFC constitutes a vital source of discriminative features relevant to MI decoding.</p></sec>
</sec>
<sec id="s3">
<label>3</label>
<title>Methods</title>
<p>The detailed architecture of DSSICNN is illustrated in <xref ref-type="fig" rid="F1">Figure 1</xref>. DSSICNN mainly consists of three components: Dynamic Spatial Interaction Graph-guided Spatio-Temporal Feature Extraction (DSIGSTFE), which comprises Temporal-Spectral-Spatial Branch and Spatial Guidance Branch; Attentive Cross-Spectro Interaction Module (ACSIM); and Attentive Spectro-Dependent Temporal Aggregation (ASDTA).</p>
<fig position="float" id="F1">
<label>Figure 1</label>
<caption><p>The overall architecture of DSSICNN.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fnhum-20-1755549-g0001.tif">
<alt-text content-type="machine-generated">Diagram depicting a neural network model architecture
for processing raw EEG signals, highlighting three main modules (DSIGSTFE,
ACSIM, and ASDTA), separate branches for temporal-spectral-spatial and
spatial guidance processing, and a fusion mechanism leading to final
classification results. Symbols denote operations like convolution, pooling,
and addition.</alt-text>
</graphic>
</fig>
<sec>
<label>3.1</label>
<title>Two forms of EEG</title>
<sec>
<label>3.1.1</label>
<title>Multi-view EEG</title>
<p>Owing to inter-subject variability in the frequency bands associated with ERD/ERS, decomposing EEG signals into multiple sub-bands via band-pass filter banks has been shown to improve MI-EEG decoding performance (<xref ref-type="bibr" rid="B4">Ang et al., 2008</xref>; <xref ref-type="bibr" rid="B32">Mane et al., 2021</xref>; <xref ref-type="bibr" rid="B31">Liu et al., 2023</xref>). Specifically, let <inline-formula><mml:math id="M1"><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>w</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mi>C</mml:mi><mml:mo>&#x000D7;</mml:mo><mml:mi>T</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> represent the raw EEG signal, where <italic>C</italic> denotes the number of scalp electrodes and <italic>T</italic> denotes the number of temporal samples. The raw MI-EEG data are then decomposed into <inline-formula><mml:math id="M2"><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>f</mml:mi><mml:mi>b</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>b</mml:mi></mml:mrow></mml:msub><mml:mo>&#x000D7;</mml:mo><mml:mi>C</mml:mi><mml:mo>&#x000D7;</mml:mo><mml:mi>T</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> using a band-pass filter bank, where <italic>N</italic><sub><italic>b</italic></sub> corresponds to the number of filters applied. The band-pass filter bank consists of nine band-pass filters with frequency ranges of 4&#x02013;8 Hz, 8&#x02013;12 Hz, 12&#x02013;16 Hz, 16&#x02013;20 Hz, 20&#x02013;24 Hz, 24&#x02013;28 Hz, 28&#x02013;32 Hz, 32&#x02013;36 Hz, and 36&#x02013;40 Hz, respectively.</p></sec>
<sec>
<label>3.1.2</label>
<title>Graph EEG</title>
<p>As described in <xref ref-type="bibr" rid="B49">Wu et al. (2021)</xref>, a graph can be formally defined as <italic>G</italic> &#x0003D; (<italic>V, E, A, H</italic>), where <italic>V</italic> represents the set of nodes, <italic>E</italic> the set of edges, <italic>A</italic> the adjacency matrix, and <italic>H</italic> the node feature matrix. In the context of EEG, nodes correspond to scalp electrodes, while edges and the adjacency matrix capture the functional or structural connectivity between the brain regions associated with electrode pairs. The matrix <italic>H</italic> encodes the features corresponding to each EEG electrode.</p>
<p>EEG-derived brain networks frequently include spurious connections arising from noise, which can interfere with inter-regional information transfer and impair MI-EEG decoding performance. To mitigate this issue, spurious links are reduced by estimating functional connectivity (FC) between brain regions, which measures their statistical interdependencies. In the present study, FC is evaluated using the phase locking value (PLV) (<xref ref-type="bibr" rid="B26">Lachaux et al., 1999</xref>), and the resulting PLV measurements are utilized to construct the adjacency matrix based on the following criteria:</p>
<disp-formula id="EQ1"><mml:math id="M3"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>P</mml:mi><mml:mi>L</mml:mi><mml:msub><mml:mrow><mml:mi>V</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:mfrac><mml:mo>|</mml:mo><mml:mrow><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:msup><mml:mrow><mml:mi>e</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>&#x003C6;</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>-</mml:mo><mml:msub><mml:mrow><mml:mi>&#x003C6;</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:mrow><mml:mo>|</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(1)</label></disp-formula>
<disp-formula id="EQ2"><mml:math id="M4"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>A</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mtext>&#x02003;</mml:mtext><mml:mi>P</mml:mi><mml:mi>L</mml:mi><mml:msub><mml:mrow><mml:mi>V</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0003E;</mml:mo><mml:mi>t</mml:mi><mml:mi>h</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>s</mml:mi><mml:mi>h</mml:mi><mml:mi>o</mml:mi><mml:mi>l</mml:mi><mml:mi>d</mml:mi></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mtext>&#x02003;</mml:mtext><mml:mi>P</mml:mi><mml:mi>L</mml:mi><mml:msub><mml:mrow><mml:mi>V</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0003C;</mml:mo><mml:mi>t</mml:mi><mml:mi>h</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>s</mml:mi><mml:mi>h</mml:mi><mml:mi>o</mml:mi><mml:mi>l</mml:mi><mml:mi>d</mml:mi></mml:mtd></mml:mtr></mml:mtable></mml:mrow></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(2)</label></disp-formula>
<p>here, <italic>PLV</italic><sub><italic>ij</italic></sub> denotes the PLV between the signals of the <italic>i</italic><sup><italic>th</italic></sup> and <italic>j</italic><sup><italic>th</italic></sup> EEG electrodes and &#x003C6;[<italic>t</italic>] denotes the phase of the analytic signal, which is derived from <italic>x</italic><sub><italic>raw</italic></sub> through the Hilbert transform. To facilitate model convergence and ensure consistent graph sparsity, the threshold is set to the 100<sup><italic>th</italic></sup> largest PLV value.</p>
<p>In summary, FC between brain regions is quantified using the PLV. Connections exhibiting lower PLV values, which are regarded as spurious links introduced by noise, are subsequently removed (<xref ref-type="bibr" rid="B25">Klepl et al., 2022</xref>).</p>
</sec>
</sec>
<sec>
<label>3.2</label>
<title>Dynamic spatial interaction graph-guided spatio-temporal feature extraction</title>
<p>The core concept of DSIGSTFE is to incorporate principles from network neuroscience (<xref ref-type="bibr" rid="B5">Bassett and Sporns, 2017</xref>) into the MI-EEG decoding pipeline to improve the extraction of spatial semantic features. Specifically, DSIGSTFE captures spatial features across both Euclidean and non-Euclidean domains simultaneously. While numerous studies (<xref ref-type="bibr" rid="B31">Liu et al., 2023</xref>) have utilized spatial convolutions to extract spatial information with considerable decoding accuracy, insights from network neuroscience indicate that EEG signals possess graph-structured characteristics, and spatial convolution alone is insufficient to comprehensively model interactions among brain regions, often referred to as spatial interactions. To address this limitation, a dynamic GNN is employed to model these spatial interactions and extract Spatial Guidance Feature, thereby enhancing the spatial representations derived from convolutional operations. Furthermore, in comparison with previous approaches, the proposed dynamic GNN is integrated into a parameter-efficient MI-EEG decoding framework, resulting in a novel architecture that demonstrates notable strengths in both methodological innovation and computational efficiency. The parameter-efficient MI-EEG decoding framework we designed effectively mitigates overfitting during training, while simultaneously ensuring that DSSICNN possesses strong potential for deployment in MI-BCI.</p>
<sec>
<label>3.2.1</label>
<title>Temporal-spectral-spatial branch</title>
<p>A spatial convolutional layer comprising <italic>m</italic> spatial filters with a receptive field of (<italic>C</italic>, 1) is utilized to extract spatial features. Given that the input consists of multi-band signals, information is concurrently aggregated across the spectral dimension, facilitating the extraction of spectral features. To map the input signals into a latent space with increased representational capacity and to enhance the spectral resolution of the resulting features for subsequent ACSIM processing, the condition <italic>m</italic>&#x0003E;<italic>N</italic><sub><italic>b</italic></sub> is imposed. Following the spatial convolution operation, a Batch Normalization (BN) layer (<xref ref-type="bibr" rid="B21">Ioffe and Szegedy, 2015</xref>) and an Exponential Linear Unit (ELU) activation function (<xref ref-type="bibr" rid="B10">Clevert et al., 2015</xref>) are applied. Additionally, the associated parameters are regularized by constraining the maximum L2 norm of the filter weights in the spatial convolutional layers.</p>
<p>In summary, the computation for the spatial convolutional layer is given by the following formulation:</p>
<disp-formula id="EQ3"><mml:math id="M5"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mi>s</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>&#x003C3;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mi>b</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mi>v</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>f</mml:mi><mml:mi>b</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(3)</label></disp-formula>
<p>where <italic>F</italic><sub><italic>conv</italic></sub>(&#x000B7;) is the spatial convolution layer, <italic>F</italic><sub><italic>bn</italic></sub>(&#x000B7;) is the BN layer, &#x003C3;(&#x000B7;) is the ELU activation function, <inline-formula><mml:math id="M6"><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>f</mml:mi><mml:mi>b</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>b</mml:mi></mml:mrow></mml:msub><mml:mo>&#x000D7;</mml:mo><mml:mi>C</mml:mi><mml:mo>&#x000D7;</mml:mo><mml:mi>T</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> is the multi-view EEG, and the <inline-formula><mml:math id="M7"><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mi>s</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>&#x000D7;</mml:mo><mml:mi>T</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> is the output feature of the spatial convolution layer.</p>
<p>During MI trials, participants inevitably experience mental fatigue (<xref ref-type="bibr" rid="B23">Jacquet et al., 2021</xref>), which can lead to substantial fluctuations in MI-related temporal patterns over the course of a trial. To mitigate this effect, each trial is divided into non-overlapping temporal segments, and temporal features are extracted from each segment. Although convolution neural network (CNN) are frequently employed for temporal feature extraction, prior studies in electrophysiology and nonlinear dynamical neuroscience have shown that EEG signals are inherently non-stationary, limiting the capacity of CNN to fully capture temporal dynamics (<xref ref-type="bibr" rid="B22">Izhikevich, 2007</xref>). Therefore, CNN is not applied for temporal feature extraction in this study.</p>
<p>Motivated by the approach in <xref ref-type="bibr" rid="B32">Mane et al. (2021)</xref>, a temporal log-variance layer is employed to extract temporal features from each time window. Specifically, <italic>x</italic><sub><italic>ssf</italic></sub> was partitioned along the temporal dimension using non-overlapping time windows of length <italic>w</italic>, and the logarithmic variance was computed for each time window. The computation of the temporal log-variance layer is given as follows:</p>
<disp-formula id="EQ4"><mml:math id="M8"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mi>s</mml:mi><mml:mi>s</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>&#x003C3;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mi>v</mml:mi><mml:mi>a</mml:mi><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mi>s</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(4)</label></disp-formula>
<disp-formula id="EQ5"><mml:math id="M9"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mi>v</mml:mi><mml:mi>a</mml:mi><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mi>s</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>w</mml:mi></mml:mrow></mml:mfrac><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mo>=</mml:mo><mml:mi>w</mml:mi><mml:mo>&#x000D7;</mml:mo><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mi>w</mml:mi><mml:mo>&#x000D7;</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:munderover></mml:mstyle><mml:msup><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mi>s</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>t</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>-</mml:mo><mml:mi>&#x003BC;</mml:mi><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(5)</label></disp-formula>
<p>where <inline-formula><mml:math id="M10"><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mi>s</mml:mi><mml:mi>s</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>&#x000D7;</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> represents the output feature of the temporal-spectral-spatial branch, <inline-formula><mml:math id="M11"><mml:mi>t</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:mo>&#x0230A;</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:mi>T</mml:mi></mml:mrow><mml:mrow><mml:mi>w</mml:mi></mml:mrow></mml:mfrac></mml:mrow><mml:mo>&#x0230B;</mml:mo></mml:mrow></mml:math></inline-formula> represents the number of time window, &#x003C3;(&#x000B7;) represents the logarithmic function, <italic>F</italic><sub><italic>var</italic></sub>(&#x000B7;) represents the temporal log-variance layer, <inline-formula><mml:math id="M12"><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mi>s</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>&#x000D7;</mml:mo><mml:mi>T</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> represents the output feature of the spatial convolution layer, <italic>i</italic> represents the <italic>i</italic><sup><italic>th</italic></sup> feature map, <italic>j</italic> represents the <italic>j</italic><sup><italic>th</italic></sup> time window, and &#x003BC;[<italic>i, j</italic>] represents the mean of the <italic>j</italic><sup><italic>th</italic></sup> time window within the <italic>i</italic><sup><italic>th</italic></sup> feature map.</p></sec>
<sec>
<label>3.2.2</label>
<title>Spatial guidance branch</title>
<p>Within the Spatial Guidance Branch, a dynamic GNN is utilized to generate spatial guidance features, which are subsequently employed to enhance the representations produced by the Temporal-Spectral-Spatial Branch. To ensure that these spatial guidance features effectively complement the outputs of the Temporal-Spectral-Spatial Branch, temporal alignment between the two branches is maintained. This is accomplished by first fusing the multi-spectral features from multi-view EEG using a (1, 1) pointwise convolution (<xref ref-type="bibr" rid="B9">Chollet, 2017</xref>). The fused features are then processed through a temporal log-variance layer, calculated as follows:</p>
<disp-formula id="EQ6"><mml:math id="M13"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>&#x003D5;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mi>v</mml:mi><mml:mi>a</mml:mi><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>&#x003C3;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mi>b</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mi>v</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>f</mml:mi><mml:mi>b</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(6)</label></disp-formula>
<p>where <inline-formula><mml:math id="M14"><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mi>C</mml:mi><mml:mo>&#x000D7;</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> is the spectral-fused feature and serves as the node feature matrix used as input to the subsequent dynamic GNN, <inline-formula><mml:math id="M15"><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>f</mml:mi><mml:mi>b</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>b</mml:mi></mml:mrow></mml:msub><mml:mo>&#x000D7;</mml:mo><mml:mi>C</mml:mi><mml:mo>&#x000D7;</mml:mo><mml:mi>T</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> is the multi-view EEG, <italic>F</italic><sub><italic>conv</italic></sub>(&#x000B7;) is the (1, 1) pointwise convolution, <italic>F</italic><sub><italic>bn</italic></sub>(&#x000B7;) is the BN layer, &#x003C3;(&#x000B7;) is the ELU activation function, <italic>F</italic><sub><italic>var</italic></sub>(&#x000B7;) is the temporal log-variance layer, and &#x003D5;(&#x000B7;) is the logarithmic function.</p>
<p>Subsequently, a dynamic GNN is employed to model interactions among brain regions. In this study, GATv2 (<xref ref-type="bibr" rid="B7">Brody et al., 2022</xref>) is adopted as the dynamic GNN, with its design principles largely derived from the original GAT architecture (<xref ref-type="bibr" rid="B43">Velickovi&#x00107; et al., 2018</xref>). GATv2 is a convolutional-style GNN that leverages a self-attention mechanism, enabling the dynamic aggregation of node features by adaptively adjusting attention weights based on the properties of each node and its neighbors. Since its feature aggregation relies exclusively on local node-neighborhood relationships, GATv2 is well suited for applications where the underlying graph structure is incomplete or partially unknown. As noted in Section 3.1.2, accurately estimating connectivity using predefined statistical measures is challenging, often resulting in incomplete structural information in graph-based EEG representations. Therefore, GATv2 is particularly appropriate for integration into DSSICNN. The computational procedure of GATv2 is as follows:</p>
<disp-formula id="EQ7"><mml:math id="M16"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mover class="overrightarrow"><mml:mrow><mml:msubsup><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:msubsup></mml:mrow><mml:mo>&#x020D7;</mml:mo></mml:mover><mml:mo>=</mml:mo><mml:mi>&#x003C3;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>h</mml:mi><mml:mi>e</mml:mi><mml:mi>a</mml:mi><mml:mi>d</mml:mi><mml:mi>s</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>h</mml:mi><mml:mi>e</mml:mi><mml:mi>a</mml:mi><mml:mi>d</mml:mi><mml:mi>s</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:munderover></mml:mstyle><mml:mstyle displaystyle="true"><mml:munder class="msub"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>&#x02208;</mml:mo><mml:msub><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:munder></mml:mstyle><mml:msubsup><mml:mrow><mml:mi>&#x003B1;</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msubsup><mml:msup><mml:mrow><mml:mstyle mathvariant='bold'><mml:mtext>W</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msup><mml:mover class="overrightarrow"><mml:mrow><mml:msub><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>&#x020D7;</mml:mo></mml:mover></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(7)</label></disp-formula>
<p>where <inline-formula><mml:math id="M17"><mml:mover class="overrightarrow"><mml:mrow><mml:msubsup><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mi>&#x02032;</mml:mi></mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mo>&#x020D7;</mml:mo></mml:mover><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> indicates the output feature of node <italic>i</italic>, &#x003C3;(&#x000B7;) indicates the activation function, <italic>N</italic><sub><italic>heads</italic></sub> indicates the number of attention heads [defined similarly to Transformer (<xref ref-type="bibr" rid="B42">Vaswani et al., 2017</xref>)], <italic>N</italic><sub><italic>i</italic></sub> indicates the neighborhood of node <italic>i</italic>, <inline-formula><mml:math id="M18"><mml:msubsup><mml:mrow><mml:mi>&#x003B1;</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msubsup></mml:math></inline-formula> indicates the normalized attention coefficients (computed based on the node features of node <italic>i</italic> and node <italic>j</italic>), <bold>W</bold><sup><italic>k</italic></sup>&#x02208;<italic>R</italic><sup><italic>t</italic>&#x000D7;<italic>t</italic></sup> indicates the learnable linear transformation, and <inline-formula><mml:math id="M19"><mml:mover class="overrightarrow"><mml:mrow><mml:msub><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>&#x020D7;</mml:mo></mml:mover><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> indicates the input feature of node <italic>j</italic>.</p>
<p>Once the node-level features are obtained, a readout function is necessary to generate a graph-level embedding, since the task entails classifying EEG signals from individual MI trials (i.e., graph classification). In this work, global sum pooling is employed as the readout function, as previous studies (<xref ref-type="bibr" rid="B50">Xu et al., 2019</xref>) have shown that it frequently yields more discriminative graph embeddings.</p>
<p>In summary, GATv2, in conjunction with global sum pooling, is applied to the spectrally fused features to generate the spatial guidance representations.</p>
<disp-formula id="EQ8"><mml:math id="M20"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mi>g</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>a</mml:mi><mml:mi>d</mml:mi><mml:mi>o</mml:mi><mml:mi>u</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>&#x003C3;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mi>b</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mi>g</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>v</mml:mi><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>G</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(8)</label></disp-formula>
<p>where <inline-formula><mml:math id="M21"><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mi>g</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> is the spatial guidance feature, <italic>F</italic><sub><italic>readout</italic></sub>(&#x000B7;) is the global sum pooling, &#x003C3;(&#x000B7;) is the ELU activation function, <italic>F</italic><sub><italic>bn</italic></sub>(&#x000B7;) is the BN layer, <italic>F</italic><sub><italic>gatv</italic>2</sub>(&#x000B7;) is the GATv2 layer, and <italic>G</italic> &#x0003D; (<italic>V, E, A, H</italic>) is the graph EEG.</p></sec>
<sec>
<label>3.2.3</label>
<title>Enhancing spatial domain representation using spatial guidance features</title>
<p>Taking into account the temporal and spectral compatibility of <italic>x</italic><sub><italic>tssf</italic></sub> and <italic>x</italic><sub><italic>sgf</italic></sub>, as well as the need to control parameter complexity, a simple element-wise addition is utilized to augment the spatial representation within the overall model, as shown below:</p>
<disp-formula id="EQ9"><mml:math id="M22"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>e</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mi>s</mml:mi><mml:mi>s</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mi>g</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:msub></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(9)</label></disp-formula>
<p>where <inline-formula><mml:math id="M23"><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>e</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>&#x000D7;</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> represents the enhanced spatio-temporal feature. Since <italic>x</italic><sub><italic>sgf</italic></sub> was derived from analyses grounded in network neuroscience, this process effectively integrates network neuroscience prior into MI-EEG decoding framework.</p>
</sec>
</sec>
<sec>
<label>3.3</label>
<title>Attentive cross-spectro interaction module</title>
<p>At present, most DL-based MI-EEG decoding algorithms tend to introduce, in the shallow layers of the network, a convolutional layer with multiple filters to transform raw input signals into more expressive latent representations (e.g., the spatial convolution layer in DSSICNN). Such latent representations typically consist of multiple spectral (frequency) channels. Inspired by the phenomenon of CFC, we posit that explicitly modeling the interactions among spectral channels can facilitate the extraction of MI-related discriminative features. However, the majority of existing DL-based MI-EEG decoding approaches largely overlook the modeling of interactions among spectral channels. Given that prior studies have rarely calibrated features according to the relative importance of individual spectral channels, this work designs an attention-based module inspired by CFC to further enhance decoding performance. This module adaptively recalibrates spectral features, thereby promoting the effective representation of cross-spectral interactions. In particular, spectro-wise features are explicitly adjusted via attention mechanisms to capture interactions across spectral bands. Achieving this recalibration necessitates modeling the correlations among distinct frequency components.</p>
<p>To characterize the relationships among spectral bands, global spectro-wise information is initially aggregated. Both max pooling and average pooling are considered effective for capturing critical global spectro-wise features. The computation of this spectro-wise global information is expressed as follows:</p>
<disp-formula id="EQ10"><mml:math id="M24"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>a</mml:mi><mml:mi>x</mml:mi><mml:mi>p</mml:mi><mml:mi>o</mml:mi><mml:mi>o</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>a</mml:mi><mml:mi>x</mml:mi><mml:mi>p</mml:mi><mml:mi>o</mml:mi><mml:mi>o</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>e</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(10)</label></disp-formula>
<disp-formula id="EQ11"><mml:math id="M25"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>a</mml:mi><mml:mi>v</mml:mi><mml:mi>g</mml:mi><mml:mi>p</mml:mi><mml:mi>o</mml:mi><mml:mi>o</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mi>F</mml:mi></mml:mrow><mml:mrow><mml:mi>a</mml:mi><mml:mi>v</mml:mi><mml:mi>g</mml:mi><mml:mi>p</mml:mi><mml:mi>o</mml:mi><mml:mi>o</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>e</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(11)</label></disp-formula>
<p>where <inline-formula><mml:math id="M26"><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>a</mml:mi><mml:mi>x</mml:mi><mml:mi>p</mml:mi><mml:mi>o</mml:mi><mml:mi>o</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> is the spectro-wise global information generated by max pooling, <italic>F</italic><sub><italic>maxpool</italic></sub>(&#x000B7;) is the max pooling layer, <inline-formula><mml:math id="M27"><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>a</mml:mi><mml:mi>v</mml:mi><mml:mi>g</mml:mi><mml:mi>p</mml:mi><mml:mi>o</mml:mi><mml:mi>o</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> is the spectro-wise global information generated by average pooling, and <italic>F</italic><sub><italic>avgpool</italic></sub>(&#x000B7;) is the average pooling layer.</p>
<p>Subsequently, the aggregated spectro-wise global information is utilized to systematically capture correlations across spectral bands. The spectro-wise global information obtained in the previous step serves as the initial set of coefficients, which are then employed to model cross-spectral interactions through coefficient reconstruction. Inspired by cross-correlation methods, a procedure is devised for reconstructing these coefficients. The computation for obtaining the reconstructed coefficients is defined as follows:</p>
<disp-formula id="EQ12"><mml:math id="M28"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>u</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>a</mml:mi><mml:mi>x</mml:mi><mml:mi>p</mml:mi><mml:mi>o</mml:mi><mml:mi>o</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:mfrac></mml:mrow><mml:mrow><mml:mo>-</mml:mo><mml:mfrac><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:mfrac></mml:mrow></mml:munderover></mml:mstyle><mml:mi>w</mml:mi><mml:mi>e</mml:mi><mml:mi>i</mml:mi><mml:mi>g</mml:mi><mml:mi>h</mml:mi><mml:msub><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>a</mml:mi><mml:mi>x</mml:mi><mml:mi>p</mml:mi><mml:mi>o</mml:mi><mml:mi>o</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:mfrac><mml:mo>-</mml:mo><mml:mi>j</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>a</mml:mi><mml:mi>x</mml:mi><mml:mi>p</mml:mi><mml:mi>o</mml:mi><mml:mi>o</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>-</mml:mo><mml:mi>j</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(12)</label></disp-formula>
<disp-formula id="EQ13"><mml:math id="M29"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>u</mml:mi></mml:mrow><mml:mrow><mml:mi>a</mml:mi><mml:mi>v</mml:mi><mml:mi>g</mml:mi><mml:mi>p</mml:mi><mml:mi>o</mml:mi><mml:mi>o</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:mfrac></mml:mrow><mml:mrow><mml:mo>-</mml:mo><mml:mfrac><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:mfrac></mml:mrow></mml:munderover></mml:mstyle><mml:mi>w</mml:mi><mml:mi>e</mml:mi><mml:mi>i</mml:mi><mml:mi>g</mml:mi><mml:mi>h</mml:mi><mml:msub><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>a</mml:mi><mml:mi>v</mml:mi><mml:mi>g</mml:mi><mml:mi>p</mml:mi><mml:mi>o</mml:mi><mml:mi>o</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:mfrac><mml:mo>-</mml:mo><mml:mi>j</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>a</mml:mi><mml:mi>v</mml:mi><mml:mi>g</mml:mi><mml:mi>p</mml:mi><mml:mi>o</mml:mi><mml:mi>o</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>-</mml:mo><mml:mi>j</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(13)</label></disp-formula>
<p>where <inline-formula><mml:math id="M30"><mml:msub><mml:mrow><mml:mi>u</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>a</mml:mi><mml:mi>x</mml:mi><mml:mi>p</mml:mi><mml:mi>o</mml:mi><mml:mi>o</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> and <inline-formula><mml:math id="M31"><mml:msub><mml:mrow><mml:mi>u</mml:mi></mml:mrow><mml:mrow><mml:mi>a</mml:mi><mml:mi>v</mml:mi><mml:mi>g</mml:mi><mml:mi>p</mml:mi><mml:mi>o</mml:mi><mml:mi>o</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> are the spectral context descriptors, <inline-formula><mml:math id="M32"><mml:mi>w</mml:mi><mml:mi>e</mml:mi><mml:mi>i</mml:mi><mml:mi>g</mml:mi><mml:mi>h</mml:mi><mml:msub><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>a</mml:mi><mml:mi>x</mml:mi><mml:mi>p</mml:mi><mml:mi>o</mml:mi><mml:mi>o</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mi>l</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> and <inline-formula><mml:math id="M33"><mml:mi>w</mml:mi><mml:mi>e</mml:mi><mml:mi>i</mml:mi><mml:mi>g</mml:mi><mml:mi>h</mml:mi><mml:msub><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>a</mml:mi><mml:mi>v</mml:mi><mml:mi>g</mml:mi><mml:mi>p</mml:mi><mml:mi>o</mml:mi><mml:mi>o</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mi>l</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> are the trainable weight vectors, and <italic>l</italic> is the size of the cross-correlation window.</p>
<p>Once the spectral context descriptors have been obtained, they are integrated through element-wise addition.</p>
<disp-formula id="EQ14"><mml:math id="M34"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mi>u</mml:mi><mml:mo>=</mml:mo><mml:mi>&#x003D5;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>u</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>a</mml:mi><mml:mi>x</mml:mi><mml:mi>p</mml:mi><mml:mi>o</mml:mi><mml:mi>o</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>u</mml:mi></mml:mrow><mml:mrow><mml:mi>a</mml:mi><mml:mi>v</mml:mi><mml:mi>g</mml:mi><mml:mi>p</mml:mi><mml:mi>o</mml:mi><mml:mi>o</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(14)</label></disp-formula>
<p>where <italic>u</italic>&#x02208;<italic>R</italic><sup><italic>m</italic></sup> is the fused spectral context descriptors, and &#x003D5;(&#x000B7;) is the sigmoid activation function.</p>
<p>Finally, spectro-wise feature recalibration is accomplished by performing element-wise multiplication between the ACSIM input features and the spectral context descriptors. This operation further enhances cross-spectral interactions, as illustrated below:</p>
<disp-formula id="EQ15"><mml:math id="M35"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>a</mml:mi><mml:mi>c</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>u</mml:mi><mml:mo>&#x02297;</mml:mo><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>e</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:msub></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(15)</label></disp-formula>
<p>where &#x02297; represents the spectro-wise multiplication, and the <inline-formula><mml:math id="M36"><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>a</mml:mi><mml:mi>c</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>&#x000D7;</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> represents the output features of ACSIM.</p>
<p>It is worth noting that several recent studies have begun to introduce attention-based spectral feature recalibration modules. Among them, M-FANet (<xref ref-type="bibr" rid="B34">Qin et al., 2024</xref>) is a representative example, which directly employs SENet (<xref ref-type="bibr" rid="B20">Hu et al., 2018</xref>) to achieve spectral feature recalibration. However, the two linear layers inherent in SENet, which perform dimensionality reduction followed by dimensionality expansion, may lead to the loss of information in the original spectral channels. In contrast, ACSIM models interactions among spectral channels directly via cross-correlation without relying on fully connected layers, thereby avoiding information loss in the original spectral channels. Moreover, SENet extracts spectral channel information solely through average pooling, whereas ACSIM jointly utilizes average pooling and max pooling, enabling the capture of richer and potentially more informative spectral channel representations.</p>
</sec>
<sec>
<label>3.4</label>
<title>Attentive spectro-dependent temporal aggregate</title>
<p>Mental fatigue can lead to significant variations in the quality of MI-related temporal fluctuation patterns within the time windows described in Section 3.2.1. This phenomenon occurs because fatigue may compromise a participant&#x00027;s ability to consistently perform MI tasks at certain stages of the experiment. Accordingly, accurately modeling the quality of temporal fluctuation patterns throughout the MI process, in order to assess the contributions of different MI stages to decoding performance, is crucial for enhancing overall decoding accuracy.</p>
<p>To address this challenge, this study proposes a novel attention-based framework that models the quality of MI fluctuation patterns across different stages of MI task execution by subjects, thereby aggregating feature representations from multiple time windows along the temporal axis. This strategy enhances the model&#x00027;s capacity to capture dynamic temporal dependencies. Specifically, attention weights are assigned to quantify the relative significance of features within each time window, and a weighted sum of features across all windows is subsequently computed based on these weights. Furthermore, because the spatial convolution in the Temporal-Spectral-Spatial Branch encodes spectral information within each time window (as detailed in Section 3.2.1), temporal fluctuation patterns may vary across different spectral bands. To account for this variability, independent attention weights are employed to aggregate temporal features for each spectral band individually.</p>
<p>In summary, the computation of ASDTA can be formally expressed as follows:</p>
<disp-formula id="EQ16"><mml:math id="M37"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>a</mml:mi><mml:mi>s</mml:mi><mml:mi>d</mml:mi><mml:mi>t</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:msub><mml:mrow><mml:mi>&#x003B1;</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>a</mml:mi><mml:mi>c</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(16)</label></disp-formula>
<disp-formula id="EQ17"><mml:math id="M38"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>&#x003B1;</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:msup><mml:mrow><mml:mi>e</mml:mi></mml:mrow><mml:mrow><mml:mi>w</mml:mi><mml:mi>e</mml:mi><mml:mi>i</mml:mi><mml:mi>g</mml:mi><mml:mi>h</mml:mi><mml:msub><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>a</mml:mi><mml:mi>s</mml:mi><mml:mi>d</mml:mi><mml:mi>t</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:mrow><mml:mrow><mml:mstyle displaystyle="true"><mml:msubsup><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msubsup></mml:mstyle><mml:msup><mml:mrow><mml:mi>e</mml:mi></mml:mrow><mml:mrow><mml:mi>w</mml:mi><mml:mi>e</mml:mi><mml:mi>i</mml:mi><mml:mi>g</mml:mi><mml:mi>h</mml:mi><mml:msub><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>a</mml:mi><mml:mi>s</mml:mi><mml:mi>d</mml:mi><mml:mi>t</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(17)</label></disp-formula>
<p>where <inline-formula><mml:math id="M39"><mml:msub><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi>a</mml:mi><mml:mi>s</mml:mi><mml:mi>d</mml:mi><mml:mi>t</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> is the output feature of ASDTA, &#x003B1;<sub><italic>ij</italic></sub> is the attention weight of the <italic>j</italic><sup><italic>th</italic></sup> time window in the <italic>i</italic><sup><italic>th</italic></sup> spectral channel, and <inline-formula><mml:math id="M40"><mml:mi>w</mml:mi><mml:mi>e</mml:mi><mml:mi>i</mml:mi><mml:mi>g</mml:mi><mml:mi>h</mml:mi><mml:msub><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>a</mml:mi><mml:mi>s</mml:mi><mml:mi>d</mml:mi><mml:mi>t</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mo>&#x000D7;</mml:mo><mml:mi>t</mml:mi></mml:mrow></mml:msup></mml:math></inline-formula> is the trainable matrix.</p>
</sec>
<sec>
<label>3.5</label>
<title>Classification and training</title>
<p>Finally, a linear layer is employed to compute the probability distribution over the classes. The cross-entropy loss function is used to guide the training process. In addition, the Adam optimizer (<xref ref-type="bibr" rid="B24">Kingma and Ba, 2014</xref>) is adopted to update the parameters of DSSICNN. The hyperparameters are configured as follows: the number of spatial filters <italic>m</italic> is set to 64, the time window size <italic>w</italic> is set to 250, the dropout rate is set to 0.1, the weight decay is set to 0.001, and the learning rate is set to 0.001.</p></sec></sec>
<sec id="s4">
<label>4</label>
<title>Experiments</title>
<sec>
<label>4.1</label>
<title>Datasets</title>
<p>The effectiveness of DSSICNN was evaluated using two publicly available MI-EEG datasets, namely BCIC-IV-2a (Dataset A) (<xref ref-type="bibr" rid="B40">Tangermann et al., 2012</xref>) and OpenBMI (Dataset B) (<xref ref-type="bibr" rid="B28">Lee et al., 2019</xref>). The MI-EEG data were preprocessed following the methodology described in <xref ref-type="bibr" rid="B31">Liu et al. (2023)</xref>. No data augmentation was employed to increase the size of the two datasets mentioned above.</p>
<sec>
<label>4.1.1</label>
<title>BCIC-IV-2a dataset</title>
<p>Dataset A comprises MI-EEG data collected from nine subjects. The MI-BCI paradigm involves four MI tasks: left hand, right hand, tongue, and feet. During data acquisition, signals were recorded from 22 Ag/AgCl electrodes positioned according to the international 10&#x02013;20 system, with a sampling rate of 250 Hz. The recorded signals were subsequently preprocessed using bandpass filtering between 0.5 Hz and 100 Hz. Each subject participated in two separate sessions conducted on different days, with each session consisting of 288 trials (72 trials per class), and each trial lasting 4s. We directly used the officially provided dataset without performing additional artifact processing, excluding only the three EOG channels. The signals within the 4s interval following cue onset in the entire paradigm were selected as trials. In the officially provided dataset, the left mastoid served as the reference electrode and the right mastoid was used as the ground electrode. No normalization methods were applied.</p></sec>
<sec>
<label>4.1.2</label>
<title>OpenBMI dataset</title>
<p>Dataset B consists of EEG data collected from 54 subjects. The MI-BCI paradigm comprises two MI tasks: left hand and right hand. EEG signals were recorded using 62 Ag/AgCl electrodes positioned according to the international 10&#x02013;20 system, with a sampling rate of 1,000 Hz. Each subject participated in two separate sessions conducted on different days. Each session included 200 trials (100 trials per class), with each trial lasting 4s. The recorded MI-EEG signals were subsequently downsampled to 250 Hz. Additionally, following <xref ref-type="bibr" rid="B28">Lee et al. (2019)</xref>, 20 electrodes located over the motor cortex were selected for analysis (FC-5/3/1/2/4/6, C-5/3/1/z/2/4/6, and CP-5/3/1/z/2/4/6). We directly used the officially provided dataset without performing additional artifact processing, excluding only the four EMG channels. A band-pass filter in the range of 8-30 Hz was applied, and the signals within the 4s interval following cue onset in the entire paradigm were selected as trials. In the officially provided dataset, the nasion served as the reference electrode and AFz was used as the ground electrode. No normalization methods were applied.</p>
</sec>
</sec>
<sec>
<label>4.2</label>
<title>Baseline models</title>
<p>DSSICNN was evaluated against a range of baseline models, including Deep ConvNet (<xref ref-type="bibr" rid="B37">Schirrmeister et al., 2017</xref>), Shallow ConvNet (<xref ref-type="bibr" rid="B37">Schirrmeister et al., 2017</xref>), EEGNet (<xref ref-type="bibr" rid="B27">Lawhern et al., 2018</xref>), FBCNet (<xref ref-type="bibr" rid="B32">Mane et al., 2021</xref>), SHNN (<xref ref-type="bibr" rid="B29">Liu et al., 2022</xref>), EEGConformer (<xref ref-type="bibr" rid="B39">Song et al., 2023</xref>), FBMSNet (<xref ref-type="bibr" rid="B31">Liu et al., 2023</xref>), IFNet (<xref ref-type="bibr" rid="B44">Wang J. et al., 2023</xref>), ADFCNN (<xref ref-type="bibr" rid="B41">Tao et al., 2024</xref>), MI-BMInet (<xref ref-type="bibr" rid="B47">Wang et al., 2024</xref>), and CLTNet (<xref ref-type="bibr" rid="B18">Gu et al., 2025</xref>). To ensure the rigor and fairness of the comparison, most baseline models were implemented using publicly available open-source code.</p>
</sec>
<sec>
<label>4.3</label>
<title>Experimental settings</title>
<p>To systematically evaluate the performance of DSSICNN, we conducted experiments under both subject-dependent and subject-independent settings. The subject-dependent setting further includes session-dependent and session-independent configurations. Specifically, the subject-dependent setting indicates that both the training and testing sets are drawn from the same subject, whereas the subject-independent setting denotes that the testing set is obtained from a single subject and the training set is constructed from all remaining subjects. The session-dependent setting refers to the case where both the training and testing sets are derived from the first session, with a training-to-testing ratio of 9:1, whereas the session-independent setting indicates that all data from the first session are used as the training set and all data from the second session are used as the testing set.</p>
<p>The Wilcoxon signed-rank test was applied to evaluate the statistical significance of performance differences between DSSICNN and the baseline models. The Wilcoxon signed-rank test is the non-parametric counterpart of the paired t-test. In addition, the Wilcoxon signed-rank test was conducted at the subject level. All models were trained on an NVIDIA Tesla A100 GPU.</p></sec></sec>
<sec id="s5">
<label>5</label>
<title>Results and discussion</title>
<sec>
<label>5.1</label>
<title>Comparison of decoding performance</title>
<p>The highest decoding performance is indicated in bold, with &#x0002A; denoting <italic>p</italic> &#x0003C; 0.1 and &#x0002A;&#x0002A; denoting <italic>p</italic> &#x0003C; 0.05. It should be noted that the accuracy of Dataset A refers to the classification accuracy across the four classes.</p>
<p>As presented in <xref ref-type="table" rid="T1">Table 1</xref>, under the session-independent setting, DSSICNN achieves the highest accuracy and F1-score, attaining 80.09% and 79.79%, respectively, and significantly outperforming most baseline models (<italic>p</italic> &#x0003C; 0.05). Similarly, in the session-dependent setting, DSSICNN records the highest accuracy and F1-score, reaching 86.71% and 86.19%, respectively, significantly surpassing all baseline models. Moreover, in the session-independent setting, DSSICNN outperforms the leading baseline model (ADFCNN) by 2.2% in accuracy and 2.29% in F1-score. In the session-dependent setting, it achieves gains of 4.65% in accuracy and 4.81% in F1-score relative to the best baseline model (FBMSNet). Importantly, DSSICNN delivers these substantial performance improvements while maintaining a parameter-efficient architecture. It is worth noting that CLTNet achieves superior decoding performance compared to DSSICNN. However, CLTNet relies on data augmentation during practical deployment, whereas DSSICNN does not introduce any data augmentation, which to some extent compensates for the slightly lower decoding performance of DSSICNN relative to CLTNet. In addition, CLTNet employs stacked Transformer modules and LSTM modules. The inherent self-attention mechanism within Transformers substantially increases both the spatial and temporal computational complexity of CLTNet, while the intrinsically non-parallelizable computation of LSTM further leads to a significant increase in time complexity. These factors impose certain limitations on the practical deployment of CLTNet in MI-BCI.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>Comparison of decoding performance (average &#x000B1; std) in % on Dataset A.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Model</bold></th>
<th valign="top" align="center" colspan="2"><bold>Session-independent</bold></th>
<th valign="top" align="center" colspan="2"><bold>Session-dependent</bold></th>
</tr>
<tr>
<th/>
<th valign="top" align="center"><bold>Accuracy</bold></th>
<th valign="top" align="center"><bold>F1-score</bold></th>
<th valign="top" align="center"><bold>Accuracy</bold></th>
<th valign="top" align="center"><bold>F1-score</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">CLTNet</td>
<td valign="top" align="center">83.02 &#x000B1; 9.50</td>
<td valign="top" align="center">-</td>
<td valign="top" align="center">-</td>
<td valign="top" align="center">-</td>
</tr>
<tr>
<td valign="top" align="left">Shallow ConvNet</td>
<td valign="top" align="center">66.47 &#x000B1; 12.60<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">65.66 &#x000B1; 12.44<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">59.63 &#x000B1; 14.36<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">57.54 &#x000B1; 15.01<sup>&#x0002A;&#x0002A;</sup></td>
</tr>
<tr>
<td valign="top" align="left">Deep ConvNet</td>
<td valign="top" align="center">60.15 &#x000B1; 18.33<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">57.51 &#x000B1; 20.94<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">57.07 &#x000B1; 14.16<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">54.45 &#x000B1; 14.61<sup>&#x0002A;&#x0002A;</sup></td>
</tr>
<tr>
<td valign="top" align="left">EEGNet</td>
<td valign="top" align="center">71.10 &#x000B1; 15.19<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">70.70 &#x000B1; 15.33<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">73.04 &#x000B1; 14.42<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">72.56 &#x000B1; 14.71<sup>&#x0002A;&#x0002A;</sup></td>
</tr>
<tr>
<td valign="top" align="left">FBCNet</td>
<td valign="top" align="center">74.11 &#x000B1; 14.70<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">73.84 &#x000B1; 14.96<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">77.56 &#x000B1; 16.02<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">77.04 &#x000B1; 16.47<sup>&#x0002A;&#x0002A;</sup></td>
</tr>
<tr>
<td valign="top" align="left">EEGConformer</td>
<td valign="top" align="center">71.60 &#x000B1; 19.55<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">68.94 &#x000B1; 23.43<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">74.63 &#x000B1; 13.01<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">72.89 &#x000B1; 14.49<sup>&#x0002A;&#x0002A;</sup></td>
</tr>
<tr>
<td valign="top" align="left">SHNN</td>
<td valign="top" align="center">-</td>
<td valign="top" align="center">-</td>
<td valign="top" align="center">74.36 &#x000B1; 14.77<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">-</td>
</tr>
<tr>
<td valign="top" align="left">FBMSNet</td>
<td valign="top" align="center">77.28 &#x000B1; 12.10<sup>&#x0002A;</sup></td>
<td valign="top" align="center">76.60 &#x000B1; 12.68<sup>&#x0002A;</sup></td>
<td valign="top" align="center">82.06 &#x000B1; 11.84<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">81.38 &#x000B1; 12.39<sup>&#x0002A;&#x0002A;</sup></td>
</tr>
<tr>
<td valign="top" align="left">IFNet</td>
<td valign="top" align="center">77.70 &#x000B1; 14.42<sup>&#x0002A;</sup></td>
<td valign="top" align="center">77.11 &#x000B1; 15.07<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">81.51 &#x000B1; 13.88<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">81.10 &#x000B1; 14.23<sup>&#x0002A;&#x0002A;</sup></td>
</tr>
<tr>
<td valign="top" align="left">ADFCNN</td>
<td valign="top" align="center">77.89 &#x000B1; 10.66<sup>&#x0002A;</sup></td>
<td valign="top" align="center">77.50 &#x000B1; 10.75<sup>&#x0002A;</sup></td>
<td valign="top" align="center">79.01 &#x000B1; 11.27<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">78.42 &#x000B1; 11.62<sup>&#x0002A;&#x0002A;</sup></td>
</tr>
<tr>
<td valign="top" align="left">MI-BMInet</td>
<td valign="top" align="center">77.18 &#x000B1; 11.52<sup>&#x0002A;</sup></td>
<td valign="top" align="center">-</td>
<td valign="top" align="center">-</td>
<td valign="top" align="center">-</td>
</tr>
<tr>
<td valign="top" align="left">DSSICNN</td>
<td valign="top" align="center">80.09 &#x000B1; 13.72</td>
<td valign="top" align="center">79.79 &#x000B1; 14.07</td>
<td valign="top" align="center">86.71 &#x000B1; 10.02</td>
<td valign="top" align="center">86.19 &#x000B1; 10.57</td>
</tr></tbody>
</table>
<table-wrap-foot>
<p><sup>&#x0002A;</sup><italic>p</italic> &#x0003C; 0.1 and <sup>&#x0002A;&#x0002A;</sup><italic>p</italic> &#x0003C; 0.05. Bold values indicate the optimal decoding performance.</p>
</table-wrap-foot>
</table-wrap>
<p>The comparative results in <xref ref-type="table" rid="T2">Table 2</xref> demonstrate that DSSICNN achieves superior performance in both accuracy and F1-score under session-independent and session-dependent settings on Dataset B. In the session-independent setting, DSSICNN attains an accuracy of 77.87% and an F1-score of 77.54%, significantly outperforming all baseline models (<italic>p</italic> &#x0003C; 0.05). Similarly, in the session-dependent setting, DSSICNN achieves an accuracy of 89.00% and an F1-score of 88.72%, significantly surpassing all baseline models (<italic>p</italic> &#x0003C; 0.05). Furthermore, in the session-independent setting, DSSICNN improves upon the leading baseline model (IFNet) by 4.56% in accuracy and 5.88% in F1-score. In the session-dependent setting, it outperforms the top-performing baseline model (IFNet) by 12.53% in accuracy and 13.39% in F1-score.</p>
<table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p>Comparison of decoding performance (average &#x000B1; std) in % on Dataset B.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Model</bold></th>
<th valign="top" align="center" colspan="2"><bold>Session-independent</bold></th>
<th valign="top" align="center" colspan="2"><bold>Session-dependent</bold></th>
</tr>
<tr>
<th/>
<th valign="top" align="center"><bold>Accuracy</bold></th>
<th valign="top" align="center"><bold>F1-score</bold></th>
<th valign="top" align="center"><bold>Accuracy</bold></th>
<th valign="top" align="center"><bold>F1-score</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Shallow ConvNet</td>
<td valign="top" align="center">63.96 &#x000B1; 16.29<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">62.16 &#x000B1; 17.65<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">66.61 &#x000B1; 16.70<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">65.27 &#x000B1; 17.35<sup>&#x0002A;&#x0002A;</sup></td>
</tr>
<tr>
<td valign="top" align="left">Deep ConvNet</td>
<td valign="top" align="center">67.71 &#x000B1; 15.27<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">66.26 &#x000B1; 16.59<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">68.67 &#x000B1; 17.28<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">67.02 &#x000B1; 17.97<sup>&#x0002A;&#x0002A;</sup></td>
</tr>
<tr>
<td valign="top" align="left">EEGNet</td>
<td valign="top" align="center">69.34 &#x000B1; 16.27<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">68.01 &#x000B1; 17.54<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">70.73 &#x000B1; 17.46<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">70.07 &#x000B1; 17.87<sup>&#x0002A;&#x0002A;</sup></td>
</tr>
<tr>
<td valign="top" align="left">FBCNet</td>
<td valign="top" align="center">66.72 &#x000B1; 13.84<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">64.82 &#x000B1; 15.75<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">74.57 &#x000B1; 13.96<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">73.64 &#x000B1; 14.70<sup>&#x0002A;&#x0002A;</sup></td>
</tr>
<tr>
<td valign="top" align="left">EEGConformer</td>
<td valign="top" align="center">71.14 &#x000B1; 15.56<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">68.82 &#x000B1; 17.77<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">-</td>
<td valign="top" align="center">-</td>
</tr>
<tr>
<td valign="top" align="left">FBMSNet</td>
<td valign="top" align="center">69.27 &#x000B1; 13.65<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">67.64 &#x000B1; 15.14<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">75.06 &#x000B1; 14.94<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">74.03 &#x000B1; 15.72<sup>&#x0002A;&#x0002A;</sup></td>
</tr>
<tr>
<td valign="top" align="left">IFNet</td>
<td valign="top" align="center">73.31 &#x000B1; 15.17<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">71.66 &#x000B1; 16.98<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">76.47 &#x000B1; 15.57<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">75.33 &#x000B1; 16.61<sup>&#x0002A;&#x0002A;</sup></td>
</tr>
<tr>
<td valign="top" align="left">ADFCNN</td>
<td valign="top" align="center">69.95 &#x000B1; 13.57<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">69.43 &#x000B1; 14.04<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">73.21 &#x000B1; 13.96<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">72.43 &#x000B1; 14.50<sup>&#x0002A;&#x0002A;</sup></td>
</tr>
<tr>
<td valign="top" align="left">DSSICNN</td>
<td valign="top" align="center">77.88 &#x000B1; 12.99</td>
<td valign="top" align="center">77.56 &#x000B1; 13.32</td>
<td valign="top" align="center">89.00 &#x000B1; 8.53</td>
<td valign="top" align="center">88.72 &#x000B1; 8.83</td>
</tr></tbody>
</table>
<table-wrap-foot>
<p><sup>&#x0002A;</sup><italic>p</italic> &#x0003C; 0.1 and <sup>&#x0002A;&#x0002A;</sup><italic>p</italic> &#x0003C; 0.05. Bold values indicate the optimal decoding performance.</p>
</table-wrap-foot>
</table-wrap>
<p>The outcomes of the comparative experiments indicate that integrating network neuroscience priors into the MI-EEG decoding framework substantially enhances decoding performance relative to conventional CNN and Transformer architectures. Notably, DSSICNN exhibits a more pronounced improvement on Dataset B, which may be attributable to the relatively high incidence of &#x0201C;BCI illiteracy&#x0201D; within this dataset. These findings suggest that DSSICNN is particularly effective in improving decoding performance for individuals with lower inherent BCI proficiency compared with the baseline models.</p>
<p>In addition, as a supplement to the comparison of subject-dependent decoding performance, ADFCNN, FBCNet, EEGNet, Multi-branch 3D CNN (<xref ref-type="bibr" rid="B52">Zhao et al., 2019</xref>), CCNN (<xref ref-type="bibr" rid="B3">Amin et al., 2019</xref>), CRAM (<xref ref-type="bibr" rid="B51">Zhang et al., 2019</xref>) and FBCSPCNN (<xref ref-type="bibr" rid="B6">Bhambare and Jain, 2024</xref>) were selected as baselines to evaluate subject-independent decoding performance on Dataset A. The experimental results are presented in <xref ref-type="table" rid="T3">Table 3</xref>. DSSICNN also achieved the best decoding performance under the subject-independent setting. Its decoding performance was significantly superior to that of most baselines, except for ADFCNN and EEGNet. Moreover, the accuracy and F1-score of DSSICNN exceeded those of the strongest baseline (ADFCNN) by 1.52% and 3.23%, respectively. These results demonstrate that DSSICNN exhibits strong generalization capability and has substantial potential for deployment in MI-BCI. It is worth noting that FBCSPCNN achieves superior decoding performance compared to DSSICNN. However, FBCSPCNN relies on data augmentation in practical deployment, whereas DSSICNN does not introduce any data augmentation, which to some extent compensates for the slightly lower decoding performance of DSSICNN relative to FBCSPCNN.</p>
<table-wrap position="float" id="T3">
<label>Table 3</label>
<caption><p>Comparison of subject-independent decoding performance (average &#x000B1; std) in % on Dataset A.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Model</bold></th>
<th valign="top" align="center"><bold>Accuracy</bold></th>
<th valign="top" align="center"><bold>F1-score</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">FBCSPCNN</td>
<td valign="top" align="center">92.66</td>
<td valign="top" align="center">-</td>
</tr>
<tr>
<td valign="top" align="left">CLTNet</td>
<td valign="top" align="center">58.28 &#x000B1; 16.56<sup>&#x0002A;</sup></td>
<td valign="top" align="center">-</td>
</tr>
<tr>
<td valign="top" align="left">EEGNet</td>
<td valign="top" align="center">59.74 &#x000B1; 6.78</td>
<td valign="top" align="center">58.39 &#x000B1; 7.54</td>
</tr>
<tr>
<td valign="top" align="left">CRAM</td>
<td valign="top" align="center">59.10 &#x000B1; 10.85<sup>&#x0002A;</sup></td>
<td valign="top" align="center">-</td>
</tr>
<tr>
<td valign="top" align="left">CCNN</td>
<td valign="top" align="center">55.35 &#x000B1; 10.66<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">-</td>
</tr>
<tr>
<td valign="top" align="left">Multi-branch 3D CNN</td>
<td valign="top" align="center">52.17 &#x000B1; 8.76<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">-</td>
</tr>
<tr>
<td valign="top" align="left">FBCNet</td>
<td valign="top" align="center">50.25 &#x000B1; 12.95<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">46.45 &#x000B1; 15.32<sup>&#x0002A;&#x0002A;</sup></td>
</tr>
<tr>
<td valign="top" align="left">ADFCNN</td>
<td valign="top" align="center">61.27 &#x000B1; 13.07</td>
<td valign="top" align="center">59.32 &#x000B1; 15.15</td>
</tr>
<tr>
<td valign="top" align="left">DSSICNN</td>
<td valign="top" align="center">62.79 &#x000B1; 15.01</td>
<td valign="top" align="center">62.55 &#x000B1; 15.13</td>
</tr></tbody>
</table>
<table-wrap-foot>
<p><sup>&#x0002A;</sup><italic>p</italic> &#x0003C; 0.1 and <sup>&#x0002A;&#x0002A;</sup><italic>p</italic> &#x0003C; 0.05. Bold values indicate the optimal decoding performance.</p>
</table-wrap-foot>
</table-wrap>
</sec>
<sec>
<label>5.2</label>
<title>Comparison of efficiency</title>
<p>To further discuss the feasibility of deploying DSSICNN in MI-BCI, <xref ref-type="table" rid="T4">Table 4</xref> presents a comparison between DSSICNN and the baselines on Dataset A in terms of the number of trainable parameters, the average training and testing time per epoch, the memory footprint, and the FLOPs. Analysis indicates a strong correlation between the number of trainable parameters and memory footprint: models with a larger number of trainable parameters generally exhibit a larger memory footprint, and vice versa. Based on the comprehensive analysis of <xref ref-type="table" rid="T1">Tables 1</xref>, <xref ref-type="table" rid="T4">4</xref>, DSSICNN achieves a relatively favorable balance among decoding accuracy, spatial complexity, and temporal complexity. Therefore, DSSICNN can be regarded as &#x0201C;parameter-efficient.&#x0201D; Although ADFCNN and IFNet have smaller memory footprints than DSSICNN, DSSICNN exhibits faster training/inference speeds and superior decoding performance compared with both ADFCNN and IFNet. While FBCNet and EEGNet require less memory and achieve faster training/inference than DSSICNN, the decoding performance of DSSICNN is substantially superior to that of FBCNet and EEGNet. Notably, despite having one of the smallest memory footprints, ADFCNN demonstrates the slowest training/inference speeds, which significantly limits its applicability to MI-BCI. We hypothesize that this limitation arises from the inclusion of multi-scale CNN components that cannot be efficiently parallelized, as well as a computationally intensive self-attention mechanism. The FLOPs of DSSICNN are second only to those of EEGNet and EEGConformer. Notably, DSSICNN exhibits higher FLOPs than FBMSNet, whose spatial and temporal complexities are greater than those of DSSICNN. We attribute this difference to the use of spatial convolution layers in DSSICNN with a relatively large number of output channels and full connectivity between input and output channels. In addition, the Spatial Guidance Branch of DSSICNN involves matrix multiplications with relatively high dimensionality, which further increases the FLOPs. Owing to the optimizations for convolution operations and matrix multiplications in the PyTorch framework adopted in this study, DSSICNN is able to maintain favorable training and inference speeds. The number of trainable parameters in FBCSPCNN is substantially higher than that of DSSICNN. This indicates that FBCSPCNN relies, to a certain extent, on data augmentation to expand the training data in order to effectively train a network with a large number of trainable parameters.</p>
<table-wrap position="float" id="T4">
<label>Table 4</label>
<caption><p>Comparison of efficiency on Dataset A.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Model</bold></th>
<th valign="top" align="center"><bold>Trainable parameters</bold></th>
<th valign="top" align="center"><bold>Training/inference time (s)</bold></th>
<th valign="top" align="center"><bold>Memory footprint (KB)</bold></th>
<th valign="top" align="center"><bold>FLOPs (M)</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">EEGNet</td>
<td valign="top" align="center">4,028</td>
<td valign="top" align="center">1.03</td>
<td valign="top" align="center">23</td>
<td valign="top" align="center">22.93</td>
</tr>
<tr>
<td valign="top" align="left">FBCNet</td>
<td valign="top" align="center">11,812</td>
<td valign="top" align="center">1.19</td>
<td valign="top" align="center">51</td>
<td valign="top" align="center">7.78</td>
</tr>
<tr>
<td valign="top" align="left">EEGConformer</td>
<td valign="top" align="center">789,572</td>
<td valign="top" align="center">5.69</td>
<td valign="top" align="center">3,125</td>
<td valign="top" align="center">65.60</td>
</tr>
<tr>
<td valign="top" align="left">FBMSNet</td>
<td valign="top" align="center">16,267</td>
<td valign="top" align="center">5.52</td>
<td valign="top" align="center">72</td>
<td valign="top" align="center">8.50</td>
</tr>
<tr>
<td valign="top" align="left">IFNet</td>
<td valign="top" align="center">10,884</td>
<td valign="top" align="center">4.09</td>
<td valign="top" align="center">50</td>
<td valign="top" align="center">2.50</td>
</tr>
<tr>
<td valign="top" align="left">ADFCNN</td>
<td valign="top" align="center">4,852</td>
<td valign="top" align="center">9.94</td>
<td valign="top" align="center">31</td>
<td valign="top" align="center">2.45</td>
</tr>
<tr>
<td valign="top" align="left">FBCSPCNN</td>
<td valign="top" align="center">131,025,284</td>
<td valign="top" align="center">-</td>
<td valign="top" align="center">-</td>
<td valign="top" align="center">-</td>
</tr>
<tr>
<td valign="top" align="left">DSSICNN</td>
<td valign="top" align="center">13,458</td>
<td valign="top" align="center">3.37</td>
<td valign="top" align="center">58</td>
<td valign="top" align="center">12.80</td>
</tr></tbody>
</table>
</table-wrap>
<p>By achieving a favorable balance among decoding accuracy, spatial complexity, and temporal complexity, DSSICNN exhibits strong potential for deployment in MI-BCI. First, prior studies have demonstrated that excessively long recalibration times can reduce user engagement with MI-BCI and thereby degrade MI-BCI performance (<xref ref-type="bibr" rid="B12">Edelman et al., 2025</xref>). DSSICNN does not employ data augmentation and thus does not significantly increase data preprocessing time, and it further exhibits favorable training and inference speeds. Consequently, DSSICNN can effectively shorten recalibration time and improve MI-BCI performance. Second, as reported in <xref ref-type="bibr" rid="B15">Forenzo et al. (2025)</xref>, MI-BCI is often implemented on hardware with limited computational resources, and DSSICNN can effectively reduce hardware memory consumption.</p>
</sec>
<sec>
<label>5.3</label>
<title>Effect of hyperparameter settings</title>
<p>To evaluate the sensitivity of DSSICNN to hyperparameter selection, experiments were conducted on two datasets under the session-independent setting, investigating the impact of varying hyperparameter configurations on decoding performance.</p>
<sec>
<label>5.3.1</label>
<title>Effect of the number of spatial filters</title>
<p><xref ref-type="fig" rid="F2">Figure 2</xref> presents the decoding performance and the corresponding number of trainable parameters under different values of <italic>m</italic>. Decoding performance exhibits notable variability as <italic>m</italic> is adjusted. DSSICNN achieves optimal performance on both datasets when <italic>m</italic> &#x0003D; 64. Additionally, the relationship between the number of trainable parameters and the number of spatial filters follows an approximately linear trend, indicating that the quantity of spatial filters substantially influences the total parameter count. For <italic>m</italic>&#x02208;{16, 32, 128, 256}, decoding performance declines to varying degrees, suggesting that neither increasing nor decreasing the number of spatial filters consistently enhances performance. When the number of spatial filters is too small, although computational demands are reduced, the model&#x00027;s generalization capacity is limited, leading to underfitting. Moreover, the number of spatial filters directly impacts the spectral resolution of the subsequent ACSIM module. An insufficient number of spatial filters reduces ACSIM&#x00027;s spectral resolution, resulting in incomplete cross-spectral interactions and diminished decoding accuracy. Conversely, an excessive number of spatial filters compromises the parameter efficiency of DSSICNN, potentially causing overfitting and generating redundant spectral bands. These redundant bands not only increase the number of trainable parameters but may also interfere with effective cross-spectral interactions within ACSIM.</p>
<fig position="float" id="F2">
<label>Figure 2</label>
<caption><p>Effect of hyperparameter setting in the session-independent setting.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fnhum-20-1755549-g0002.tif">
<alt-text content-type="machine-generated">Line chart comparing average accuracy percentage and number of trainable parameters for BCIC-IV-2a and OpenBMI datasets, plotted against the number of spatial filters and size of time windows, with six colored lines and a legend identifying each metric.</alt-text>
</graphic>
</fig>
</sec>
<sec>
<label>5.3.2</label>
<title>Effect of the size of time windows</title>
<p>As noted in <xref ref-type="bibr" rid="B17">Gabor (1946)</xref>, an inherent trade-off exists between temporal and frequency resolution in signals, implying that both cannot be simultaneously maximized. Consequently, adjusting the size of the time window alters the amount of temporal information captured, thereby affecting decoding performance. The experimental results shown in <xref ref-type="fig" rid="F2">Figure 2</xref> indicate that variations in <italic>w</italic> produce substantial fluctuations in decoding accuracy. Optimal performance is observed with <italic>w</italic> &#x0003D; 250 on both datasets, whereas for <italic>w</italic>&#x02208;{100, 125, 200, 500}, decoding performance declines markedly. These findings suggest that the relationship between time window size and decoding performance is neither strictly positive nor negative. An excessively small time window diminishes frequency resolution, providing insufficient spectral information for cross-spectral interactions within ACSIM, thereby reducing decoding accuracy. In contrast, an overly large time window inadequately captures fluctuations in subjects&#x00027; attention during the MI task, failing to appropriately segment periods of focus and distraction, which significantly impairs the effectiveness of ASDTA.</p></sec>
<sec>
<label>5.3.3</label>
<title>Effect of PLV threshold selection</title>
<p>As outlined in Section 3.1.2, the threshold was initially set to the 100<sup><italic>th</italic></sup> highest PLV value. To examine the influence of threshold selection on decoding performance, experiments were conducted on Dataset A using thresholds corresponding to the 50<sup><italic>th</italic></sup> and 150<sup><italic>th</italic></sup> highest PLV values. The results, summarized in <xref ref-type="table" rid="T5">Table 5</xref>, indicate that both excessively sparse and overly dense EEG graphs lead to notable performance degradation. In overly sparse graphs, inter-node information flow is severely restricted, and certain nodes may become isolated, preventing the model from effectively capturing interactions among brain regions during the MI task. Conversely, overly dense graphs result in nearly all nodes being interconnected, which, after GNN processing, produces an over-smoothing effect where node features converge to similar values. This redundancy in non-Euclidean spatial information increases the risk of model overfitting. Therefore, the careful selection of the PLV threshold is critical for optimizing decoding performance.</p>
<table-wrap position="float" id="T5">
<label>Table 5</label>
<caption><p>Comparison of decoding performance under different PLV thresholds (average &#x000B1; std) in %.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Rank</bold></th>
<th valign="top" align="center"><bold>Accuracy</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">50<sup><italic>th</italic></sup></td>
<td valign="top" align="center">76.39 &#x000B1; 12.85</td>
</tr>
<tr>
<td valign="top" align="left">100<sup><italic>th</italic></sup></td>
<td valign="top" align="center">80.09 &#x000B1; 13.72</td>
</tr>
<tr>
<td valign="top" align="left">150<sup><italic>th</italic></sup></td>
<td valign="top" align="center">77.12 &#x000B1; 13.13</td>
</tr></tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec>
<label>5.4</label>
<title>Ablation experiments</title>
<p>To evaluate the individual contributions of DSSICNN components to decoding performance, ablation experiments were performed on two datasets under the session-independent setting. The configurations of the DSSICNN variants employed in these studies are detailed in <xref ref-type="table" rid="T6">Table 6</xref>. In this framework, Branch 1 refers to the Temporal-Spectral-Spatial Branch, while Branch 2 corresponds to the Spatial Guidance Branch.</p>
<table-wrap position="float" id="T6">
<label>Table 6</label>
<caption><p>Design of DSSICNN variants.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Model</bold></th>
<th valign="top" align="center" colspan="2"><bold>DSIGSTFE</bold></th>
<th valign="top" align="center"><bold>ACSIM</bold></th>
<th valign="top" align="center"><bold>ASDTA</bold></th>
</tr>
<tr>
<th/>
<th valign="top" align="center"><bold>Branch 1</bold></th>
<th valign="top" align="center"><bold>Branch 2</bold></th>
<th/>
<th/>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">DSSICNN-1</td>
<td valign="top" align="center">&#x02713;</td>
<td valign="top" align="center">&#x02717;</td>
<td valign="top" align="center">&#x02713;</td>
<td valign="top" align="center">&#x02713;</td>
</tr>
<tr>
<td valign="top" align="left">DSSICNN-2</td>
<td valign="top" align="center">&#x02713;</td>
<td valign="top" align="center">&#x02713;</td>
<td valign="top" align="center">&#x02717;</td>
<td valign="top" align="center">&#x02713;</td>
</tr>
<tr>
<td valign="top" align="left">DSSICNN-3</td>
<td valign="top" align="center">&#x02713;</td>
<td valign="top" align="center">&#x02713;</td>
<td valign="top" align="center">&#x02713;</td>
<td valign="top" align="center">&#x02717;</td>
</tr></tbody>
</table>
</table-wrap>
<p>The results reported in <xref ref-type="table" rid="T7">Table 7</xref> demonstrate that DSSICNN significantly outperforms its variants (<italic>p</italic> &#x0003C; 0.05) across both datasets, underscoring the substantial contributions of each component to overall decoding performance. <xref ref-type="fig" rid="F3">Figure 3</xref> displays the confusion matrices of DSSICNN and its variants for Subject 3 from Dataset A, showing that DSSICNN consistently achieves higher classification accuracy across all classes. These observations further highlight the essential role of each DSSICNN component in MI-EEG decoding. Examination of the DSSICNN confusion matrix indicates that the foot and tongue classes exhibit the highest misclassification rates across most tasks. This finding aligns with the results reported in <xref ref-type="bibr" rid="B30">Liu et al. (2025)</xref> and suggests that DSSICNN still has a limited capacity to extract sufficiently discriminative MI-related features for these two classes.</p>
<table-wrap position="float" id="T7">
<label>Table 7</label>
<caption><p>Comparison of decoding performance (average &#x000B1; std) in % between DSSICNN and DSSICNN variants.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Model</bold></th>
<th valign="top" align="center" colspan="2"><bold>Dataset A</bold></th>
<th valign="top" align="center" colspan="2"><bold>Dataset B</bold></th>
</tr>
<tr>
<th/>
<th valign="top" align="center"><bold>Accuracy</bold></th>
<th valign="top" align="center"><bold>F1-score</bold></th>
<th valign="top" align="center"><bold>Accuracy</bold></th>
<th valign="top" align="center"><bold>F1-score</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">DSSICNN-1</td>
<td valign="top" align="center">76.81 &#x000B1; 14.32<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">75.99 &#x000B1; 15.43<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">70.22 &#x000B1; 14.39<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">68.72 &#x000B1; 15.76<sup>&#x0002A;&#x0002A;</sup></td>
</tr>
<tr>
<td valign="top" align="left">DSSICNN-2</td>
<td valign="top" align="center">77.39 &#x000B1; 14.29<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">76.83 &#x000B1; 14.92<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">69.79 &#x000B1; 14.55<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">67.67 &#x000B1; 16.73<sup>&#x0002A;&#x0002A;</sup></td>
</tr>
<tr>
<td valign="top" align="left">DSSICNN-3</td>
<td valign="top" align="center">73.42 &#x000B1; 17.44<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">72.65 &#x000B1; 18.36<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">69.74 &#x000B1; 14.92<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">67.65 &#x000B1; 16.88<sup>&#x0002A;&#x0002A;</sup></td>
</tr>
<tr>
<td valign="top" align="left">DSSICNN</td>
<td valign="top" align="center">80.09 &#x000B1; 13.72</td>
<td valign="top" align="center">79.79 &#x000B1; 14.07</td>
<td valign="top" align="center">77.88 &#x000B1; 12.99</td>
<td valign="top" align="center">77.56 &#x000B1; 13.32</td>
</tr></tbody>
</table>
<table-wrap-foot>
<p><sup>&#x0002A;</sup><italic>p</italic> &#x0003C; 0.1 and <sup>&#x0002A;&#x0002A;</sup><italic>p</italic> &#x0003C; 0.05. Bold values indicate the optimal decoding performance.</p>
</table-wrap-foot>
</table-wrap>
<fig position="float" id="F3">
<label>Figure 3</label>
<caption><p>Confusion matrices of DSSICNN and its variants. <bold>(a)</bold> DSSICNN. <bold>(b)</bold> DSSICNN-1. <bold>(c)</bold> DSSICNN-2. <bold>(d)</bold> DSSICNN-3.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fnhum-20-1755549-g0003.tif">
<alt-text content-type="machine-generated">Four labeled confusion matrix heatmaps arranged in a two-by-two grid, each showing classification accuracy for four classes: LH, RH, F, and T, with darker blue indicating higher accuracy; panels are labeled a, b, c, and d in bold, corresponding to each subplot.</alt-text>
</graphic>
</fig>
<p>For Dataset A, ASDTA emerges as the component exerting the greatest impact on DSSICNN&#x00027;s decoding performance. This effect is likely due to substantial variability in subjects&#x00027; attention levels during MI task execution, which induces pronounced changes in the temporal fluctuation patterns associated with MI. Further examination of the confusion matrix in <xref ref-type="fig" rid="F3">Figure 3</xref> elucidates the specific contributions of each DSSICNN component to the classification of distinct MI tasks. In particular, the Spatial Guidance Branch of DSIGSTFE enhances the discriminative capacity of high-level semantic features associated with left-hand movements, whereas ACSIM amplifies the representational power of high-level features related to right-hand movements. In contrast, ASDTA primarily influences decoding accuracy for MI-EEG signals corresponding to regions other than the tongue. Notably, for Dataset B, all DSSICNN components contribute approximately equally to improvements in decoding performance.</p>
</sec>
<sec>
<label>5.5</label>
<title>Online MI-BCI experiment</title>
<p>To evaluate the feasibility of deploying DSSICNN in real-world applications, and inspired by <xref ref-type="bibr" rid="B16">Forenzo et al. (2024)</xref> and <xref ref-type="bibr" rid="B15">Forenzo et al. (2025)</xref>, a simplified MI-BCI based robotic arm control system was developed in this study. The system comprised three principal components: a signal acquisition module, a laptop computer, and a robotic arm. MI-EEG signals were recorded using the NeuSen W Wireless EEG Acquisition System [Neuracle Technology (Changzhou) Co., Ltd., Changzhou, China] as the signal acquisition module. The laptop was responsible for cue presentation, reception and decoding of the MI-EEG signals transmitted from the acquisition module, and generation of decoding outputs for robotic arm control. The graphical user interface (GUI) and the MI-BCI experimental paradigm are illustrated in <xref ref-type="fig" rid="F4">Figure 4</xref>. The cues presented on the GUI included both textual and graphical instructions, where the textual cues specified the actions to be performed by the participants and the graphical cues indicated the type of MI to be executed. Within the paradigm, the text displayed along the time axis (e.g., &#x0201C;Ready&#x0201D;) corresponded to the textual instructions, whereas the images aligned with the axis represented the graphical instructions. Five participants were recruited for the online experiments, and each participant completed 60 MI trials. The study protocol was approved by the Ethics Committee of Sichuan Provincial Rehabilitation Hospital, China (Approval No. CKLL 2018008), and written informed consent was obtained from all participants. The MI tasks consisted of two classes, left hand and right hand MI, which were mapped to the control of the left and right robotic arms, respectively. A 1s time window was used to segment the subjects&#x00027; MI-EEG signals as control inputs. Given that the MI-EEG acquisition module operates at a sampling rate of 250 Hz, we set <italic>w</italic> &#x0003D; 50. In addition, the command delivery cadence was reported in terms of the average inter-command interval. For a given subject, a small amount of subject-specific data was first used to calibrate the MI-BCI. Subsequently, online experiments were conducted, and the decoding accuracy, F1-score, and command delivery cadence, reported in terms of the average inter-command interval, were evaluated.</p>
<fig position="float" id="F4">
<label>Figure 4</label>
<caption><p>Paradigm and graphical user interface (GUI) of the online MI-BCI experiment <bold>(a)</bold> Paradigm. <bold>(b)</bold> GUI.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fnhum-20-1755549-g0004.tif">
<alt-text content-type="machine-generated">Panel &#x0201C;a&#x0201D; shows a schematic timeline with four phases&#x02014;Ready, Set, Execute, and Rest&#x02014;accompanied by an image of a clenched fist and directional arrows. Panel &#x0201C;b&#x0201D; displays a computer screen with the word &#x0201C;Ready&#x0201D; above a large cross symbol, serving as a visual cue for task initiation.</alt-text>
</graphic>
</fig>
<p>The online experimental results are presented in <xref ref-type="table" rid="T8">Table 8</xref>. DSSICNN achieved satisfactory decoding performance and command delivery cadence in the online experiments, thereby validating its potential for deployment in MI-BCI.</p>
<table-wrap position="float" id="T8">
<label>Table 8</label>
<caption><p>Decoding performance in % and command delivery cadence in online experiments.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Subject</bold></th>
<th valign="top" align="center"><bold>Accuracy</bold></th>
<th valign="top" align="center"><bold>F1-score</bold></th>
<th valign="top" align="center"><bold>Command delivery cadence (s)</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">1</td>
<td valign="top" align="center">75</td>
<td valign="top" align="center">74.94</td>
<td valign="top" align="center">6.35</td>
</tr>
<tr>
<td valign="top" align="left">2</td>
<td valign="top" align="center">80</td>
<td valign="top" align="center">79.91</td>
<td valign="top" align="center">6.73</td>
</tr>
<tr>
<td valign="top" align="left">3</td>
<td valign="top" align="center">70</td>
<td valign="top" align="center">69.87</td>
<td valign="top" align="center">5.07</td>
</tr>
<tr>
<td valign="top" align="left">4</td>
<td valign="top" align="center">87.76</td>
<td valign="top" align="center">87.72</td>
<td valign="top" align="center">6.98</td>
</tr>
<tr>
<td valign="top" align="left">5</td>
<td valign="top" align="center">75</td>
<td valign="top" align="center">74.99</td>
<td valign="top" align="center">7.95</td>
</tr></tbody>
</table>
</table-wrap>
</sec>
<sec>
<label>5.6</label>
<title>Visualization</title>
<sec>
<label>5.6.1</label>
<title>Spatial distribution of convolutional kernel</title>
<p><xref ref-type="fig" rid="F5">Figure 5</xref> illustrates the spatial distribution of the convolutional kernel weights in DSIGSTFE for Subject 3 of Dataset A. Pronounced variations are observed in the spatial patterns across different spectral bands, indicating that distinct brain regions are selectively engaged for specific spectral components. Additionally, a marked asymmetry is evident in the distribution of kernel weights between the left and right hemispheres, reflecting differential regional activation during various MI tasks and highlighting the hierarchical spatial and functional organization of the brain as a complex network. Notably, the central and parietal regions consistently display the highest kernel weights for Subject 3, corresponding primarily to the motor cortex, which plays a critical role in MI.</p>
<fig position="float" id="F5">
<label>Figure 5</label>
<caption><p>Brain topographies of convolutional kernel weights.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fnhum-20-1755549-g0005.tif">
<alt-text content-type="machine-generated">Grid of 64 circular heatmaps representing topographical data overlaid on head diagrams, colored from blue to red according to a scale bar ranging from 0.0 to 1.0 at the right.</alt-text>
</graphic>
</fig>
</sec>
<sec>
<label>5.6.2</label>
<title>Connectivity between brain regions</title>
<p>The connectivity strength between brain regions for Subject 3 from Dataset A is visualized using heatmaps, with values normalized to the range 0&#x02013;1, as shown in <xref ref-type="fig" rid="F6">Figure 6</xref>. Notable variations in connectivity strength are evident across different brain regions during the execution of various MI tasks. These findings suggest that integrating a mechanism to model inter-regional brain connectivity within the MI-EEG decoding framework can enhance the spatial semantic richness of extracted features, thereby improving the framework&#x00027;s capacity for spatial representation.</p>
<fig position="float" id="F6">
<label>Figure 6</label>
<caption><p>Heatmap of brain regions connectivity strength. <bold>(a)</bold> LH, left hand; <bold>(b)</bold> RH, right hand; <bold>(c)</bold> F, foot; <bold>(d)</bold> T, tongue.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fnhum-20-1755549-g0006.tif">
<alt-text content-type="machine-generated">Four vertically stacked square heatmaps labeled a, b, c, and d with color gradients from purple to yellow, each showing a symmetric matrix of values between zero and one for channels labeled on both axes, with a color bar scale on the right of each chart and minor variances in the observed intensity patterns across the four panels.</alt-text>
</graphic>
</fig>
</sec>
<sec>
<label>5.6.3</label>
<title>Importance of various brain regions</title>
<p>In addition to evaluating inter-regional connectivity, we assess the relative importance of individual brain regions to gain deeper insight into the neurophysiological mechanisms underlying MI task execution. Given the hierarchical spatial and functional organization of the brain at the levels of neurons, local circuits, and functional areas (<xref ref-type="bibr" rid="B33">Power et al., 2011</xref>), MI tasks facilitate information exchange within specific brain regions. In this context, certain regions act as information hubs during task execution. By conceptualizing the brain as a complex network and representing EEG scalp electrodes over different regions as network nodes, the importance of each brain region can be quantified using node centrality metrics. Node centrality measures the relative significance of nodes within a network. Due to the intrinsic complexity of brain networks, considering only first-order neighborhoods is insufficient. Therefore, eigenvector centrality (<xref ref-type="bibr" rid="B36">Ruhnau, 2000</xref>) is employed, as it incorporates the centrality of neighboring nodes, effectively accounting for multi-order neighborhood information. The computation of eigenvector centrality is defined as follows:</p>
<disp-formula id="EQ18"><mml:math id="M41"><mml:mtable class="eqnarray" columnalign="center"><mml:mtr><mml:mtd><mml:mi>A</mml:mi><mml:mi>&#x003B1;</mml:mi><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mi>&#x003BB;</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>a</mml:mi><mml:mi>x</mml:mi></mml:mrow></mml:msub><mml:mi>&#x003B1;</mml:mi></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(18)</label></disp-formula>
<p>where <italic>A</italic>&#x02208;<italic>R</italic><sup><italic>C</italic>&#x000D7;<italic>C</italic></sup> is the adjacency matrix, &#x003BB;<sub><italic>max</italic></sub> is the eigenvalue corresponding to the largest magnitude, and &#x003B1;&#x02208;<italic>R</italic><sup><italic>C</italic></sup> is the eigenvector centrality vector.</p>
<p>The importance of individual brain regions was quantified using eigenvector centrality, and the corresponding brain topographies for Subject 3 from Dataset A are presented in <xref ref-type="fig" rid="F7">Figure 7</xref>. The results indicate that different brain regions display varying levels of engagement, contingent on the specific MI task being performed.</p>
<fig position="float" id="F7">
<label>Figure 7</label>
<caption><p>Brain topographies of eigenvector centrality. <bold>(a)</bold> LH, left hand; <bold>(b)</bold> RH, right hand; <bold>(c)</bold> F, foot; <bold>(d)</bold> T, tongue.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fnhum-20-1755549-g0007.tif">
<alt-text content-type="machine-generated">Four labeled EEG scalp heatmaps display different brain activity patterns; each map shows varying red and blue gradients representing data values from zero to one with corresponding colorbars for a, b, c, and d.</alt-text>
</graphic>
</fig>
<p>Specifically, during left-hand and tongue MI tasks, a considerable amount of information exchange is localized within the central region, whereas right-hand MI tasks elicit prominent information flow across both the central and parietal lobes. In contrast, foot MI tasks are associated with increased information exchange within the frontal lobe. These findings strongly highlight both the effectiveness and the critical importance of modeling inter-regional information transfer using a dynamic GNN.</p></sec>
<sec>
<label>5.6.4</label>
<title>Discriminability of high-level semantic feature</title>
<p>The high-level semantic features, i.e., the representations obtained immediately before the linear classification layer, extracted by DSSICNN and the baseline models were visualized using t-SNE, as shown in <xref ref-type="fig" rid="F8">Figure 8</xref>. The visualizations indicate that the features produced by DSSICNN exhibit markedly greater intra-class compactness and inter-class separability compared with those generated by the baseline models. These findings suggest that DSSICNN effectively captures highly discriminative high-level semantic representations, providing a clear rationale for its superior performance in MI-EEG decoding.</p>
<fig position="float" id="F8">
<label>Figure 8</label>
<caption><p>t-SNE of high-level semantic feature distribution (LH: left hand, RH: right hand, F: foot, T: tongue). <bold>(a)</bold> DSSICNN. <bold>(b)</bold> EEGNet. <bold>(c)</bold> FBCNet. <bold>(d)</bold> ADFCNN.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fnhum-20-1755549-g0008.tif">
<alt-text content-type="machine-generated">Four scatter plots labeled a through d each display colored point clusters representing LH, RH, F, and T categories. Plot a shows distinctly separated clusters, plot b shows clusters overlapping more, plot c presents mixed points with less defined boundaries, and plot d displays four well-separated clusters with minimal overlap.</alt-text>
</graphic>
</fig>
</sec>
<sec>
<label>5.6.5</label>
<title>Importance across temporal segments</title>
<p>The attention weights assigned to the time windows in ASDTA for Subject 3 from Dataset A are shown in <xref ref-type="fig" rid="F9">Figure 9</xref>. The heatmap reveals pronounced variations in the distribution of attention weights across spectral bands along the temporal dimension. Notably, time windows toward the later stages of the temporal axis generally exhibit higher attention weights, which may reflect the slight temporal overlap between the MI phase and the preceding cue phase in the Dataset A experimental paradigm. Additionally, in several spectral bands, the earliest time window shows markedly elevated attention weights, indicating that these bands are particularly sensitive to spectral information associated with MI preparation.</p>
<fig position="float" id="F9">
<label>Figure 9</label>
<caption><p>Heatmap of attention weights.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fnhum-20-1755549-g0009.tif">
<alt-text content-type="machine-generated">Heatmap visualization showing spectral band data across sixty-four bands and ten temporal segments, with color intensity ranging from dark purple for lower values to bright yellow for higher values, accompanied by a horizontal color scale bar below.</alt-text>
</graphic>
</fig>
</sec>
</sec>
<sec>
<label>5.7</label>
<title>Limitations and future research</title>
<p>Although the proposed DSSICNN demonstrates excellent decoding performance, several limitations remain. First, while DSSICNN can be effectively trained on small-scale datasets without the need for data augmentation, MI-EEG data are inherently graph-structured, and future work will investigate graph-based data augmentation techniques to increase the effective size of MI-EEG datasets. Additionally, although the brain is modeled as a complex network, the current approach does not explicitly account for its partitioned organizational structure. As a direction for future research, we aim to decompose the whole-brain graph into multiple subgraphs based on established functional brain partitions.</p></sec></sec>
<sec sec-type="conclusions" id="s6">
<label>6</label>
<title>Conclusion</title>
<p>By incorporating principles from network neuroscience into the MI-EEG decoding framework, we introduce DSSICNN, a model explicitly designed to systematically extract spatial features from MI-EEG signals. The architecture employs a dual-branch parallel design that simultaneously captures spatial information in both Euclidean and non-Euclidean domains. An attention-based mechanism is further integrated to dynamically recalibrate spectro-wise features, thereby enabling effective modeling of cross-spectral interactions. Additionally, the model tracks the evolution of MI-related temporal fluctuation patterns by aggregating feature representations along the temporal dimension. Comparative evaluations on two publicly available datasets demonstrate that DSSICNN significantly outperforms state-of-the-art methods in both session-independent and session-dependent settings. Overall, DSSICNN establishes a novel GNN-based paradigm for MI-EEG decoding and underscores the importance of leveraging the neurophysiological mechanisms underlying MI to enhance decoding performance.</p></sec>
</body>
<back>
<sec sec-type="data-availability" id="s7">
<title>Data availability statement</title>
<p>Publicly available datasets were analyzed in this study. This data can be found here: <ext-link ext-link-type="uri" xlink:href="https://www.bbci.de/competition/iv/&#x00023;dataset2a">https://www.bbci.de/competition/iv/&#x00023;dataset2a</ext-link>; <ext-link ext-link-type="uri" xlink:href="https://gigadb.org/dataset/100542">https://gigadb.org/dataset/100542</ext-link>.</p>
</sec>
<sec sec-type="ethics-statement" id="s8">
<title>Ethics statement</title>
<p>The studies involving humans were approved by the Ethics Committee of Sichuan Provincial Rehabilitation Hospital, China (Approval No. CKLL 2018008). The studies were conducted in accordance with the local legislation and institutional requirements. The participants provided their written informed consent to participate in this study.</p>
</sec>
<sec sec-type="author-contributions" id="s9">
<title>Author contributions</title>
<p>ZS: Conceptualization, Data curation, Formal analysis, Investigation, Methodology, Software, Visualization, Writing &#x02013; original draft. ZG: Funding acquisition, Project administration, Resources, Validation, Methodology, Writing &#x02013; review &#x00026; editing. LC: Funding acquisition, Project administration, Resources, Supervision, Validation, Writing &#x02013; review &#x00026; editing. ZY: Funding acquisition, Project administration, Resources, Supervision, Validation, Writing &#x02013; review &#x00026; editing. YL: Funding acquisition, Project administration, Resources, Supervision, Validation, Writing &#x02013; review &#x00026; editing.</p>
</sec>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="s11">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p></sec>
<sec sec-type="disclaimer" id="s12">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>AL-Quraishi</surname> <given-names>M. S.</given-names></name> <name><surname>Elamvazuthi</surname> <given-names>I.</given-names></name> <name><surname>Daud</surname> <given-names>S. A.</given-names></name> <name><surname>Parasuraman</surname> <given-names>S.</given-names></name> <name><surname>Borboni</surname> <given-names>A.</given-names></name></person-group> (<year>2018</year>). <article-title>EEG-based control for upper and lower limb exoskeletons and prostheses: a systematic review</article-title>. <source>Sensors</source> <volume>18</volume>:<fpage>3342</fpage>. doi: <pub-id pub-id-type="doi">10.3390/s18103342</pub-id><pub-id pub-id-type="pmid">30301238</pub-id></mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Altaheri</surname> <given-names>H.</given-names></name> <name><surname>Muhammad</surname> <given-names>G.</given-names></name> <name><surname>Alsulaiman</surname> <given-names>M.</given-names></name> <name><surname>Amin</surname> <given-names>S. U.</given-names></name> <name><surname>Altuwaijri</surname> <given-names>G. A.</given-names></name> <name><surname>Abdul</surname> <given-names>W.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Deep learning techniques for classification of electroencephalogram (EEG) motor imagery (MI) signals: a review</article-title>. <source>Neural Comput. Applic</source>. <volume>35</volume>, <fpage>14681</fpage>&#x02013;<lpage>14722</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s00521-021-06352-5</pub-id></mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Amin</surname> <given-names>S. U.</given-names></name> <name><surname>Alsulaiman</surname> <given-names>M.</given-names></name> <name><surname>Muhammad</surname> <given-names>G.</given-names></name> <name><surname>Mekhtiche</surname> <given-names>M. A.</given-names></name> <name><surname>Shamim Hossain</surname> <given-names>M.</given-names></name></person-group> (<year>2019</year>). <article-title>Deep Learning for EEG motor imagery classification based on multi-layer CNNs feature fusion</article-title>. <source>Fut. Gener. Comput. Syst</source>. <volume>101</volume>, <fpage>542</fpage>&#x02013;<lpage>554</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.future.2019.06.027</pub-id></mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ang</surname> <given-names>K. K.</given-names></name> <name><surname>Chin</surname> <given-names>Z. Y.</given-names></name> <name><surname>Zhang</surname> <given-names>H.</given-names></name> <name><surname>Guan</surname> <given-names>C.</given-names></name></person-group> (<year>2008</year>). <article-title>&#x0201C;Filter bank common spatial pattern (FBCSP) in brain-computer interface,&#x0201D;</article-title> in <source>2008 IEEE International Joint Conference on Neural Networks (IEEE World Congress on Computational Intelligence)</source>, 2390&#x02013;2397. doi: <pub-id pub-id-type="doi">10.1109/IJCNN.2008.4634130</pub-id></mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bassett</surname> <given-names>D. S.</given-names></name> <name><surname>Sporns</surname> <given-names>O.</given-names></name></person-group> (<year>2017</year>). <article-title>Network neuroscience</article-title>. <source>Nat. Neurosci</source>. <volume>20</volume>, <fpage>353</fpage>&#x02013;<lpage>364</lpage>. doi: <pub-id pub-id-type="doi">10.1038/nn.4502</pub-id></mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bhambare</surname> <given-names>R.</given-names></name> <name><surname>Jain</surname> <given-names>M.</given-names></name></person-group> (<year>2024</year>). <article-title>Multi-class motor imagery detection using optimum channels</article-title>. <source>Int. J. Intell. Syst. Applic. Eng</source>. <volume>12</volume>, <fpage>67</fpage>&#x02013;<lpage>78</lpage>.</mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Brody</surname> <given-names>S.</given-names></name> <name><surname>Alon</surname> <given-names>U.</given-names></name> <name><surname>Yahav</surname> <given-names>E.</given-names></name></person-group> (<year>2022</year>). <article-title>&#x0201C;How attentive are graph attention networks?,&#x0201D;</article-title> in <source>International Conference on Learning Representations</source>.</mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>Y.</given-names></name> <name><surname>Kalantidis</surname> <given-names>Y.</given-names></name> <name><surname>Li</surname> <given-names>J.</given-names></name> <name><surname>Yan</surname> <given-names>S.</given-names></name> <name><surname>Feng</surname> <given-names>J.</given-names></name></person-group> (<year>2018</year>). <article-title>&#x0201C;A2^-nets: double attention networks,&#x0201D;</article-title> in <source>Advances in Neural Information Processing Systems</source> (<publisher-loc>Curran Associates, Inc.</publisher-loc>).</mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chollet</surname> <given-names>F.</given-names></name></person-group> (<year>2017</year>). <article-title>&#x0201C;Xception: deep learning with depthwise separable convolutions,&#x0201D;</article-title> in <source>Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)</source>. doi: <pub-id pub-id-type="doi">10.1109/CVPR.2017.195</pub-id></mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Clevert</surname> <given-names>D.-A.</given-names></name> <name><surname>Unterthiner</surname> <given-names>T.</given-names></name> <name><surname>Hochreiter</surname> <given-names>S.</given-names></name></person-group> (<year>2015</year>). <article-title>Fast and accurate deep network learning by exponential linear units (ELUs)</article-title>. <source>arXiv preprint arXiv:1511.07289</source>.</mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Du</surname> <given-names>G.</given-names></name> <name><surname>Su</surname> <given-names>J.</given-names></name> <name><surname>Zhang</surname> <given-names>L.</given-names></name> <name><surname>Su</surname> <given-names>K.</given-names></name> <name><surname>Wang</surname> <given-names>X.</given-names></name> <name><surname>Teng</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>A multi-dimensional graph convolution network for EEG emotion recognition</article-title>. <source>IEEE Trans. Instrum. Meas</source>. <volume>71</volume>, <fpage>1</fpage>&#x02013;<lpage>11</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TIM.2022.3204314</pub-id></mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Edelman</surname> <given-names>B. J.</given-names></name> <name><surname>Zhang</surname> <given-names>S.</given-names></name> <name><surname>Schalk</surname> <given-names>G.</given-names></name> <name><surname>Brunner</surname> <given-names>P.</given-names></name> <name><surname>M&#x000FC;ller-Putz</surname> <given-names>G.</given-names></name> <name><surname>Guan</surname> <given-names>C.</given-names></name> <etal/></person-group>. (<year>2025</year>). <article-title>Non-invasive brain-computer interfaces: state of the art and trends</article-title>. <source>IEEE Rev. Biomed. Eng</source>. <volume>18</volume>, <fpage>26</fpage>&#x02013;<lpage>49</lpage>. doi: <pub-id pub-id-type="doi">10.1109/RBME.2024.3449790</pub-id><pub-id pub-id-type="pmid">39186407</pub-id></mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Feng</surname> <given-names>L.</given-names></name> <name><surname>Cheng</surname> <given-names>C.</given-names></name> <name><surname>Zhao</surname> <given-names>M.</given-names></name> <name><surname>Deng</surname> <given-names>H.</given-names></name> <name><surname>Zhang</surname> <given-names>Y.</given-names></name></person-group> (<year>2022</year>). <article-title>EEG-based emotion recognition using spatial-temporal graph convolutional LSTM with attention mechanism</article-title>. <source>IEEE J. Biomed. Health Inf</source>. <volume>26</volume>, <fpage>5406</fpage>&#x02013;<lpage>5417</lpage>. doi: <pub-id pub-id-type="doi">10.1109/JBHI.2022.3198688</pub-id><pub-id pub-id-type="pmid">35969553</pub-id></mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Feng</surname> <given-names>N.</given-names></name> <name><surname>Hu</surname> <given-names>F.</given-names></name> <name><surname>Wang</surname> <given-names>H.</given-names></name> <name><surname>Gouda</surname> <given-names>M. A.</given-names></name></person-group> (<year>2020</year>). <article-title>Decoding of voluntary and involuntary upper-limb motor imagery based on graph fourier transform and cross-frequency coupling coefficients</article-title>. <source>J. Neural Eng</source>. <volume>17</volume>:<fpage>056043</fpage>. doi: <pub-id pub-id-type="doi">10.1088/1741-2552/abc024</pub-id><pub-id pub-id-type="pmid">33045685</pub-id></mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Forenzo</surname> <given-names>D.</given-names></name> <name><surname>Zhang</surname> <given-names>Y.</given-names></name> <name><surname>Wittenberg</surname> <given-names>G. F.</given-names></name> <name><surname>He</surname> <given-names>B.</given-names></name></person-group> (<year>2025</year>). <article-title>Continuous reaching and grasping with a BCI controlled robotic arm in healthy and stroke-affected individuals</article-title>. <source>IEEE Trans. Neural Syst. Rehabil. Eng</source>. <volume>33</volume>, <fpage>3888</fpage>&#x02013;<lpage>3899</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TNSRE.2025.3611821</pub-id><pub-id pub-id-type="pmid">40966144</pub-id></mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Forenzo</surname> <given-names>D.</given-names></name> <name><surname>Zhu</surname> <given-names>H.</given-names></name> <name><surname>Shanahan</surname> <given-names>J.</given-names></name> <name><surname>Lim</surname> <given-names>J.</given-names></name> <name><surname>He</surname> <given-names>B.</given-names></name></person-group> (<year>2024</year>). <article-title>Continuous tracking using deep learning-based decoding for noninvasive brain-computer interface</article-title>. <source>PNAS Nexu</source> <volume>3</volume>:<fpage>pgae145</fpage>. doi: <pub-id pub-id-type="doi">10.1093/pnasnexus/pgae145</pub-id></mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gabor</surname> <given-names>D.</given-names></name></person-group> (<year>1946</year>). <article-title>Theory of communication. Part 1: the analysis of information</article-title>. <source>J. Instit. Electr. Eng. Part III</source> <volume>93</volume>, <fpage>429</fpage>&#x02013;<lpage>441</lpage>. doi: <pub-id pub-id-type="doi">10.1049/ji-3-2.1946.0074</pub-id></mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gu</surname> <given-names>H.</given-names></name> <name><surname>Chen</surname> <given-names>T.</given-names></name> <name><surname>Ma</surname> <given-names>X.</given-names></name> <name><surname>Zhang</surname> <given-names>M.</given-names></name> <name><surname>Sun</surname> <given-names>Y.</given-names></name> <name><surname>Zhao</surname> <given-names>J.</given-names></name></person-group> (<year>2025</year>). <article-title>CLTNet: a hybrid deep learning model for motor imagery classification</article-title>. <source>Brain Sci</source>. <volume>15</volume>:<fpage>124</fpage>. doi: <pub-id pub-id-type="doi">10.3390/brainsci15020124</pub-id><pub-id pub-id-type="pmid">40002457</pub-id></mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hsu</surname> <given-names>W.-Y.</given-names></name> <name><surname>Sun</surname> <given-names>Y.-N.</given-names></name></person-group> (<year>2009</year>). <article-title>EEG-based motor imagery analysis using weighted wavelet transform features</article-title>. <source>J. Neurosci. Methods</source> <volume>176</volume>, <fpage>310</fpage>&#x02013;<lpage>318</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jneumeth.2008.09.014</pub-id><pub-id pub-id-type="pmid">18848844</pub-id></mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hu</surname> <given-names>J.</given-names></name> <name><surname>Shen</surname> <given-names>L.</given-names></name> <name><surname>Sun</surname> <given-names>G.</given-names></name></person-group> (<year>2018</year>). <article-title>&#x0201C;Squeeze-and-excitation networks,&#x0201D;</article-title> in <source>Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)</source>. doi: <pub-id pub-id-type="doi">10.1109/CVPR.2018.00745</pub-id><pub-id pub-id-type="pmid">31034408</pub-id></mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ioffe</surname> <given-names>S.</given-names></name> <name><surname>Szegedy</surname> <given-names>C.</given-names></name></person-group> (<year>2015</year>). <article-title>&#x0201C;Batch normalization: accelerating deep network training by reducing internal covariate shift,&#x0201D;</article-title> in <source>Proceedings of the 32nd International Conference on Machine Learning</source>, eds. F. Bach, and D. Blei (Lille, France: PMLR), <fpage>448</fpage>&#x02013;<lpage>456</lpage>.</mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Izhikevich</surname> <given-names>E. M.</given-names></name></person-group> (<year>2007</year>). <source>Dynamical Systems in Neuroscience</source>. New York: MIT Press. doi: <pub-id pub-id-type="doi">10.7551/mitpress/2526.001.0001</pub-id></mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Jacquet</surname> <given-names>T.</given-names></name> <name><surname>Lepers</surname> <given-names>R.</given-names></name> <name><surname>Poulin-Charronnat</surname> <given-names>B.</given-names></name> <name><surname>Bard</surname> <given-names>P.</given-names></name> <name><surname>Pfister</surname> <given-names>P.</given-names></name> <name><surname>Pageaux</surname> <given-names>B.</given-names></name></person-group> (<year>2021</year>). <article-title>Mental fatigue induced by prolonged motor imagery increases perception of effort and the activity of motor areas</article-title>. <source>Neuropsychologia</source> <volume>150</volume>:<fpage>107701</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.neuropsychologia.2020.107701</pub-id><pub-id pub-id-type="pmid">33276035</pub-id></mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kingma</surname> <given-names>D. P.</given-names></name> <name><surname>Ba</surname> <given-names>J.</given-names></name></person-group> (<year>2014</year>). <article-title>Adam: a method for stochastic optimization</article-title>. <source>arXiv preprint arXiv:1412.6980</source>.</mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Klepl</surname> <given-names>D.</given-names></name> <name><surname>He</surname> <given-names>F.</given-names></name> <name><surname>Wu</surname> <given-names>M.</given-names></name> <name><surname>Blackburn</surname> <given-names>D. J.</given-names></name> <name><surname>Sarrigiannis</surname> <given-names>P.</given-names></name></person-group> (<year>2022</year>). <article-title>EEG-based graph neural network classification of Alzheimer&#x00027;s disease: an empirical evaluation of functional connectivity methods</article-title>. <source>IEEE Trans. Neural Syst. Rehabilit. Eng</source>. <volume>30</volume>, <fpage>2651</fpage>&#x02013;<lpage>2660</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TNSRE.2022.3204913</pub-id><pub-id pub-id-type="pmid">36067099</pub-id></mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lachaux</surname> <given-names>J.-P.</given-names></name> <name><surname>Rodriguez</surname> <given-names>E.</given-names></name> <name><surname>Martinerie</surname> <given-names>J.</given-names></name> <name><surname>Varela</surname> <given-names>F. J.</given-names></name></person-group> (<year>1999</year>). <article-title>Measuring phase synchrony in brain signals</article-title>. <source>Hum. Brain Mapp</source>. <volume>8</volume>, <fpage>194</fpage>&#x02013;<lpage>208</lpage>. doi: <pub-id pub-id-type="doi">10.1002/(SICI)1097-0193(1999)8:4&#x0003C;194::AID-HBM4&#x0003E;3.0.CO;2-C</pub-id><pub-id pub-id-type="pmid">10619414</pub-id></mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lawhern</surname> <given-names>V. J.</given-names></name> <name><surname>Solon</surname> <given-names>A. J.</given-names></name> <name><surname>Waytowich</surname> <given-names>N. R.</given-names></name> <name><surname>Gordon</surname> <given-names>S. M.</given-names></name> <name><surname>Hung</surname> <given-names>C. P.</given-names></name> <name><surname>Lance</surname> <given-names>B. J.</given-names></name></person-group> (<year>2018</year>). <article-title>EEGNet: a compact convolutional neural network for EEG-based brain-computer interfaces</article-title>. <source>J. Neural Eng</source>. <volume>15</volume>:<fpage>056013</fpage>. doi: <pub-id pub-id-type="doi">10.1088/1741-2552/aace8c</pub-id><pub-id pub-id-type="pmid">29932424</pub-id></mixed-citation>
</ref>
<ref id="B28">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lee</surname> <given-names>M.-H.</given-names></name> <name><surname>Kwon</surname> <given-names>O.-Y.</given-names></name> <name><surname>Kim</surname> <given-names>Y.-J.</given-names></name> <name><surname>Kim</surname> <given-names>H.-K.</given-names></name> <name><surname>Lee</surname> <given-names>Y.-E.</given-names></name> <name><surname>Williamson</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>EEG dataset and OpenBMI toolbox for three BCI paradigms: an investigation into BCI illiteracy</article-title>. <source>GigaScience</source> <volume>8</volume>:<fpage>giz002</fpage>. doi: <pub-id pub-id-type="doi">10.1093/gigascience/giz002</pub-id><pub-id pub-id-type="pmid">30698704</pub-id></mixed-citation>
</ref>
<ref id="B29">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>C.</given-names></name> <name><surname>Jin</surname> <given-names>J.</given-names></name> <name><surname>Daly</surname> <given-names>I.</given-names></name> <name><surname>Li</surname> <given-names>S.</given-names></name> <name><surname>Sun</surname> <given-names>H.</given-names></name> <name><surname>Huang</surname> <given-names>Y.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>SincNet-based hybrid neural network for motor imagery EEG decoding</article-title>. <source>IEEE Trans. Neural Syst. Rehabilit. Eng</source>. <volume>30</volume>, <fpage>540</fpage>&#x02013;<lpage>549</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TNSRE.2022.3156076</pub-id><pub-id pub-id-type="pmid">35235515</pub-id></mixed-citation>
</ref>
<ref id="B30">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>K.</given-names></name> <name><surname>Xing</surname> <given-names>X.</given-names></name> <name><surname>Yang</surname> <given-names>T.</given-names></name> <name><surname>Yu</surname> <given-names>Z.</given-names></name> <name><surname>Xiao</surname> <given-names>B.</given-names></name> <name><surname>Wang</surname> <given-names>G.</given-names></name> <etal/></person-group>. (<year>2025</year>). <article-title>DMSACNN: deep multiscale attentional convolutional neural network for EEG-based motor decoding</article-title>. <source>IEEE J. Biomed. Health Inf</source>. <volume>29</volume>, <fpage>4884</fpage>&#x02013;<lpage>4896</lpage>. doi: <pub-id pub-id-type="doi">10.1109/JBHI.2025.3546288</pub-id><pub-id pub-id-type="pmid">40031548</pub-id></mixed-citation>
</ref>
<ref id="B31">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>K.</given-names></name> <name><surname>Yang</surname> <given-names>M.</given-names></name> <name><surname>Yu</surname> <given-names>Z.</given-names></name> <name><surname>Wang</surname> <given-names>G.</given-names></name> <name><surname>Wu</surname> <given-names>W.</given-names></name></person-group> (<year>2023</year>). <article-title>FBMSNet: a filter-bank multi-scale convolutional neural network for EEG-based motor imagery decoding</article-title>. <source>IEEE Trans. Biomed. Eng</source>. <volume>70</volume>, <fpage>436</fpage>&#x02013;<lpage>445</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TBME.2022.3193277</pub-id><pub-id pub-id-type="pmid">35867371</pub-id></mixed-citation>
</ref>
<ref id="B32">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mane</surname> <given-names>R.</given-names></name> <name><surname>Chew</surname> <given-names>E.</given-names></name> <name><surname>Chua</surname> <given-names>K.</given-names></name> <name><surname>Ang</surname> <given-names>K. K.</given-names></name> <name><surname>Robinson</surname> <given-names>N.</given-names></name> <name><surname>Vinod</surname> <given-names>A. P.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>FBCNet: a multi-view convolutional neural network for brain-computer interface</article-title>. <source>arXiv preprint arXiv:2104.01233</source>.</mixed-citation>
</ref>
<ref id="B33">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Power</surname> <given-names>J.</given-names></name> <name><surname>Cohen</surname> <given-names>A.</given-names></name> <name><surname>Nelson</surname> <given-names>S.</given-names></name> <name><surname>Wig</surname> <given-names>G.</given-names></name> <name><surname>Barnes</surname> <given-names>K.</given-names></name> <name><surname>Church</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2011</year>). <article-title>Functional network organization of the human brain</article-title>. <source>Neuron</source> <volume>72</volume>, <fpage>665</fpage>&#x02013;<lpage>678</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.neuron.2011.09.006</pub-id><pub-id pub-id-type="pmid">22099467</pub-id></mixed-citation>
</ref>
<ref id="B34">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Qin</surname> <given-names>Y.</given-names></name> <name><surname>Yang</surname> <given-names>B.</given-names></name> <name><surname>Ke</surname> <given-names>S.</given-names></name> <name><surname>Liu</surname> <given-names>P.</given-names></name> <name><surname>Rong</surname> <given-names>F.</given-names></name> <name><surname>Xia</surname> <given-names>X.</given-names></name></person-group> (<year>2024</year>). <article-title>M-FANet: multi-feature attention convolutional neural network for motor imagery decoding</article-title>. <source>IEEE Trans. Neural Syst. Rehabilit. Eng</source>. <volume>32</volume>, <fpage>401</fpage>&#x02013;<lpage>411</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TNSRE.2024.3351863</pub-id><pub-id pub-id-type="pmid">38194394</pub-id></mixed-citation>
</ref>
<ref id="B35">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Qin</surname> <given-names>Z.</given-names></name> <name><surname>Zhang</surname> <given-names>P.</given-names></name> <name><surname>Wu</surname> <given-names>F.</given-names></name> <name><surname>Li</surname> <given-names>X.</given-names></name></person-group> (<year>2021</year>). <article-title>&#x0201C;FcaNet: frequency channel attention networks,&#x0201D;</article-title> in <source>Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)</source>, 783&#x02013;792. doi: <pub-id pub-id-type="doi">10.1109/ICCV48922.2021.00082</pub-id></mixed-citation>
</ref>
<ref id="B36">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ruhnau</surname> <given-names>B.</given-names></name></person-group> (<year>2000</year>). <article-title>Eigenvector-centrality&#x02014;a node-centrality?</article-title> <source>Soc. Netw</source>. <volume>22</volume>, <fpage>357</fpage>&#x02013;<lpage>365</lpage>. doi: <pub-id pub-id-type="doi">10.1016/S0378-8733(00)00031-9</pub-id></mixed-citation>
</ref>
<ref id="B37">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Schirrmeister</surname> <given-names>R. T.</given-names></name> <name><surname>Springenberg</surname> <given-names>J. T.</given-names></name> <name><surname>Fiederer</surname> <given-names>L. D. J.</given-names></name> <name><surname>Glasstetter</surname> <given-names>M.</given-names></name> <name><surname>Eggensperger</surname> <given-names>K.</given-names></name> <name><surname>Tangermann</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>Deep learning with convolutional neural networks for EEG decoding and visualization</article-title>. <source>Hum. Brain Mapp</source>. <volume>38</volume>, <fpage>5391</fpage>&#x02013;<lpage>5420</lpage>. doi: <pub-id pub-id-type="doi">10.1002/hbm.23730</pub-id><pub-id pub-id-type="pmid">28782865</pub-id></mixed-citation>
</ref>
<ref id="B38">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Song</surname> <given-names>T.</given-names></name> <name><surname>Zheng</surname> <given-names>W.</given-names></name> <name><surname>Song</surname> <given-names>P.</given-names></name> <name><surname>Cui</surname> <given-names>Z.</given-names></name></person-group> (<year>2020</year>). <article-title>EEG emotion recognition using dynamical graph convolutional neural networks</article-title>. <source>IEEE Trans. Affect. Comput</source>. <volume>11</volume>, <fpage>532</fpage>&#x02013;<lpage>541</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TAFFC.2018.2817622</pub-id></mixed-citation>
</ref>
<ref id="B39">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Song</surname> <given-names>Y.</given-names></name> <name><surname>Zheng</surname> <given-names>Q.</given-names></name> <name><surname>Liu</surname> <given-names>B.</given-names></name> <name><surname>Gao</surname> <given-names>X.</given-names></name></person-group> (<year>2023</year>). <article-title>EEG conformer: convolutional transformer for EEG decoding and visualization</article-title>. <source>IEEE Trans. Neural Syst. Rehabilit. Eng</source>. <volume>31</volume>, <fpage>710</fpage>&#x02013;<lpage>719</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TNSRE.2022.3230250</pub-id><pub-id pub-id-type="pmid">37015413</pub-id></mixed-citation>
</ref>
<ref id="B40">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tangermann</surname> <given-names>M.</given-names></name> <name><surname>M&#x000FC;ller</surname> <given-names>K.-R.</given-names></name> <name><surname>Aertsen</surname> <given-names>A.</given-names></name> <name><surname>Birbaumer</surname> <given-names>N.</given-names></name> <name><surname>Braun</surname> <given-names>C.</given-names></name> <name><surname>Brunner</surname> <given-names>C.</given-names></name> <etal/></person-group>. (<year>2012</year>). <article-title>Review of the BCI competition IV</article-title>. <source>Front. Neurosci</source>. <volume>6</volume>:<fpage>55</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fnins.2012.00055</pub-id><pub-id pub-id-type="pmid">22811657</pub-id></mixed-citation>
</ref>
<ref id="B41">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tao</surname> <given-names>W.</given-names></name> <name><surname>Wang</surname> <given-names>Z.</given-names></name> <name><surname>Wong</surname> <given-names>C. M.</given-names></name> <name><surname>Jia</surname> <given-names>Z.</given-names></name> <name><surname>Li</surname> <given-names>C.</given-names></name> <name><surname>Chen</surname> <given-names>X.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>ADFCNN: attention-based dual-scale fusion convolutional neural network for motor imagery brain-computer interface</article-title>. <source>IEEE Trans. Neural Syst. Rehab. Eng</source>. <volume>32</volume>, <fpage>154</fpage>&#x02013;<lpage>165</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TNSRE.2023.3342331</pub-id><pub-id pub-id-type="pmid">38090841</pub-id></mixed-citation>
</ref>
<ref id="B42">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Vaswani</surname> <given-names>A.</given-names></name> <name><surname>Shazeer</surname> <given-names>N.</given-names></name> <name><surname>Parmar</surname> <given-names>N.</given-names></name> <name><surname>Uszkoreit</surname> <given-names>J.</given-names></name> <name><surname>Jones</surname> <given-names>L.</given-names></name> <name><surname>Gomez</surname> <given-names>A. N.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>&#x0201C;Attention is all you need,&#x0201D;</article-title> in <source>Advances in Neural Information Processing Systems</source> (<publisher-loc>Curran Associates, Inc.</publisher-loc>).</mixed-citation>
</ref>
<ref id="B43">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Velickovi&#x00107;</surname> <given-names>P.</given-names></name> <name><surname>Cucurull</surname> <given-names>G.</given-names></name> <name><surname>Casanova</surname> <given-names>A.</given-names></name> <name><surname>Romero</surname> <given-names>A.</given-names></name> <name><surname>Li&#x000F2;</surname> <given-names>P.</given-names></name> <name><surname>Bengio</surname> <given-names>Y.</given-names></name></person-group> (<year>2018</year>). <article-title>&#x0201C;Graph attention networks,&#x0201D;</article-title> in <source>International Conference on Learning Representations</source>.</mixed-citation>
</ref>
<ref id="B44">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>J.</given-names></name> <name><surname>Yao</surname> <given-names>L.</given-names></name> <name><surname>Wang</surname> <given-names>Y.</given-names></name></person-group> (<year>2023</year>). <article-title>IFNet: an interactive frequency convolutional neural network for enhancing motor imagery decoding from EEG</article-title>. <source>IEEE Trans. Neural Syst. Rehabilit. Eng</source>. <volume>31</volume>, <fpage>1900</fpage>&#x02013;<lpage>1911</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TNSRE.2023.3257319</pub-id><pub-id pub-id-type="pmid">37027253</pub-id></mixed-citation>
</ref>
<ref id="B45">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>Q.</given-names></name> <name><surname>Wu</surname> <given-names>B.</given-names></name> <name><surname>Zhu</surname> <given-names>P.</given-names></name> <name><surname>Li</surname> <given-names>P.</given-names></name> <name><surname>Zuo</surname> <given-names>W.</given-names></name> <name><surname>Hu</surname> <given-names>Q.</given-names></name></person-group> (<year>2020</year>). <article-title>&#x0201C;ECA-Net: efficient channel attention for deep convolutional neural networks,&#x0201D;</article-title> in <source>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)</source>. doi: <pub-id pub-id-type="doi">10.1109/CVPR42600.2020.01155</pub-id></mixed-citation>
</ref>
<ref id="B46">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>X.</given-names></name> <name><surname>Dai</surname> <given-names>X.</given-names></name> <name><surname>Liu</surname> <given-names>Y.</given-names></name> <name><surname>Chen</surname> <given-names>X.</given-names></name> <name><surname>Hu</surname> <given-names>Q.</given-names></name> <name><surname>Hu</surname> <given-names>R.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Motor imagery electroencephalogram classification algorithm based on joint features in the spatial and frequency domains and instance transfer</article-title>. <source>Front. Hum. Neurosci</source>. <volume>17</volume>:<fpage>1175399</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fnhum.2023.1175399</pub-id><pub-id pub-id-type="pmid">37213929</pub-id></mixed-citation>
</ref>
<ref id="B47">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>X.</given-names></name> <name><surname>Hersche</surname> <given-names>M.</given-names></name> <name><surname>Magno</surname> <given-names>M.</given-names></name> <name><surname>Benini</surname> <given-names>L.</given-names></name></person-group> (<year>2024</year>). <article-title>MI-BMInet: an efficient convolutional neural network for motor imagery brain-machine interfaces with EEG channel selection</article-title>. <source>IEEE Sens. J</source>. <volume>24</volume>, <fpage>8835</fpage>&#x02013;<lpage>8847</lpage>. doi: <pub-id pub-id-type="doi">10.1109/JSEN.2024.3353146</pub-id></mixed-citation>
</ref>
<ref id="B48">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Woo</surname> <given-names>S.</given-names></name> <name><surname>Park</surname> <given-names>J.</given-names></name> <name><surname>Lee</surname> <given-names>J.-Y.</given-names></name> <name><surname>Kweon</surname> <given-names>I. S.</given-names></name></person-group> (<year>2018</year>). <article-title>&#x0201C;CBAM: convolutional block attention module,&#x0201D;</article-title> in <source>Proceedings of the European Conference on Computer Vision (ECCV)</source>. doi: <pub-id pub-id-type="doi">10.1007/978-3-030-01234-2_1</pub-id></mixed-citation>
</ref>
<ref id="B49">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wu</surname> <given-names>Z.</given-names></name> <name><surname>Pan</surname> <given-names>S.</given-names></name> <name><surname>Chen</surname> <given-names>F.</given-names></name> <name><surname>Long</surname> <given-names>G.</given-names></name> <name><surname>Zhang</surname> <given-names>C.</given-names></name> <name><surname>Yu</surname> <given-names>P. S.</given-names></name></person-group> (<year>2021</year>). <article-title>A comprehensive survey on graph neural networks</article-title>. <source>IEEE Trans. Neural Netw. Learn. Syst</source>. <volume>32</volume>, <fpage>4</fpage>&#x02013;<lpage>24</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TNNLS.2020.2978386</pub-id><pub-id pub-id-type="pmid">32217482</pub-id></mixed-citation>
</ref>
<ref id="B50">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Xu</surname> <given-names>K.</given-names></name> <name><surname>Hu</surname> <given-names>W.</given-names></name> <name><surname>Leskovec</surname> <given-names>J.</given-names></name> <name><surname>Jegelka</surname> <given-names>S.</given-names></name></person-group> (<year>2019</year>). <article-title>&#x0201C;How powerful are graph neural networks?,&#x0201D;</article-title> in <source>International Conference on Learning Representations</source>.</mixed-citation>
</ref>
<ref id="B51">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>D.</given-names></name> <name><surname>Yao</surname> <given-names>L.</given-names></name> <name><surname>Chen</surname> <given-names>K.</given-names></name> <name><surname>Monaghan</surname> <given-names>J.</given-names></name></person-group> (<year>2019</year>). <article-title>A convolutional recurrent attention model for subject-independent EEG signal analysis</article-title>. <source>IEEE Signal Process. Lett</source>. <volume>26</volume>, <fpage>715</fpage>&#x02013;<lpage>719</lpage>. doi: <pub-id pub-id-type="doi">10.1109/LSP.2019.2906824</pub-id></mixed-citation>
</ref>
<ref id="B52">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhao</surname> <given-names>X.</given-names></name> <name><surname>Zhang</surname> <given-names>H.</given-names></name> <name><surname>Zhu</surname> <given-names>G.</given-names></name> <name><surname>You</surname> <given-names>F.</given-names></name> <name><surname>Kuang</surname> <given-names>S.</given-names></name> <name><surname>Sun</surname> <given-names>L.</given-names></name></person-group> (<year>2019</year>). <article-title>A multi-branch 3D convolutional neural network for EEG-based motor imagery classification</article-title>. <source>IEEE Trans. Neural Syst. Rehabilit. Eng</source>. <volume>27</volume>, <fpage>2164</fpage>&#x02013;<lpage>2177</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TNSRE.2019.2938295</pub-id><pub-id pub-id-type="pmid">31478864</pub-id></mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn fn-type="custom" custom-type="edited-by" id="fn0001">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/626478/overview">Bin He</ext-link>, Carnegie Mellon University, United States</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by" id="fn0002">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1325047/overview">Kecheng Shi</ext-link>, University of Electronic Science and Technology of China, China</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2067741/overview">Lucian Andrei Gheorghe</ext-link>, Nissan, United States</p>
</fn>
</fn-group>
</back>
</article>