<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article article-type="research-article" dtd-version="1.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Bioinform.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Bioinformatics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Bioinform.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2673-7647</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1738132</article-id>
<article-id pub-id-type="doi">10.3389/fbinf.2026.1738132</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>SimpleKANSleepNet: a Kolmogorov&#x2013;Arnold network based sleep stage classification method</article-title>
<alt-title alt-title-type="left-running-head">Ji et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fbinf.2026.1738132">10.3389/fbinf.2026.1738132</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Ji</surname>
<given-names>Xiaopeng</given-names>
</name>
<xref ref-type="aff" rid="aff1"/>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/3264291"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing - original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Wang</surname>
<given-names>Lei</given-names>
</name>
<xref ref-type="aff" rid="aff1"/>
<uri xlink:href="https://loop.frontiersin.org/people/2033045"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhou</surname>
<given-names>Yong</given-names>
</name>
<xref ref-type="aff" rid="aff1"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
</contrib>
</contrib-group>
<aff id="aff1">
<institution>School of Computer Science and Technology/School of Artificial Intelligence, China University of Mining and Technology</institution>, <city>Xuzhou</city>, <country country="CN">China</country>
</aff>
<author-notes>
<corresp id="c001">
<label>&#x2a;</label>Correspondence: Xiaopeng Ji, <email xlink:href="mailto:jixiaopeng@cumt.edu.cn">jixiaopeng@cumt.edu.cn</email>
</corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-18">
<day>18</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>6</volume>
<elocation-id>1738132</elocation-id>
<history>
<date date-type="received">
<day>03</day>
<month>11</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>22</day>
<month>12</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>28</day>
<month>01</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2026 Ji, Wang and Zhou.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Ji, Wang and Zhou</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-18">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<p>A novel Kolmogorov&#x2013;Arnold Network (KAN) based machine learning model is proposed for the automatic sleep stage classification task. The redefined architecture of the Multilayer Perceptron (MLP) aims to build a more flexible model by using learnable activation functions. In this study, an effective KAN model named SimpleKANSleepNet is evaluated on two different datasets with temporal features and frequency features extracted from electroencephalography (EEG), electromyogram (EMG), electrooculogram (EOG), and electrocardiogram (ECG) signals through a dual-stream convolutional neural network (CNN). Compared with existing CNN-based methods and graph convolutional networks (GCNs), the proposed model achieves an overall classification accuracy, F1-score, and Cohen&#x2019;s kappa on the ISRUC-S1 and the Sleep-EDF-153 datasets of 0.812, 0.793, 0.757, 0.928, 0.929, and 0.910, respectively, which demonstrates its competitive classification performance and generality. Moreover, several data balancing methods are tested on Sleep-EDF-153 to further evaluate the potential for achieving the best results. Finally, the factors that may affect the classification ability are tested on the ISRUC-S1 dataset.</p>
</abstract>
<kwd-group>
<kwd>artificial intelligence (AI)</kwd>
<kwd>deep learning</kwd>
<kwd>electroencephalography (EEG)</kwd>
<kwd>Kolmogorov&#x2013;Arnold network</kwd>
<kwd>sleep stage classification</kwd>
</kwd-group>
<funding-group>
<award-group id="gs1">
<funding-source id="sp1">
<institution-wrap>
<institution>China Postdoctoral Science Foundation</institution>
<institution-id institution-id-type="doi" vocab="open-funder-registry" vocab-identifier="10.13039/open_funder_registry">10.13039/501100002858</institution-id>
</institution-wrap>
</funding-source>
<award-id rid="sp1">2025M781677</award-id>
</award-group>
<funding-statement>The author(s) declared that financial support was received for this work and/or its publication. This work is supported by the China Postdoctoral Science Foundation under Grant Number 2025M781677.</funding-statement>
</funding-group>
<counts>
<fig-count count="6"/>
<table-count count="7"/>
<equation-count count="12"/>
<ref-count count="35"/>
<page-count count="00"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Computational BioImaging</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<label>1</label>
<title>Introduction</title>
<p>Sleep is a complex, cyclic physiological process paramount to human cognitive function, memory consolidation, and overall physical health (<xref ref-type="bibr" rid="B27">Siegel, 2005</xref>). Common sleep problems are widespread and pose serious health risks, including heart disease, weight problems, and reduced daily productivity (<xref ref-type="bibr" rid="B33">Zhang et al., 2024</xref>). Polysomnography (PSG) analysis is the most effective way to evaluate sleep quality and identify sleep disorders, which involves the simultaneous recording of multiple physiological signals. Traditionally, PSGs used for sleep analysis include electroencephalography (EEG), electrooculography (EOG), electromyography (EMG), and electrocardiography (ECG). Collected PSGs are segmented into 30-s epochs and categorized into five stages: Wake, N1, N2, N3, and REM (rapid eye movement), according to the American Academy of Sleep Medicine (AASM) guidelines (<xref ref-type="bibr" rid="B5">Berry et al., 2012</xref>). Initially, the sleep stage classification task is processed manually by trained sleep technologists. However, this scoring process is time-consuming, subjective, and susceptible to inter-scorer variability, which creates a critical bottleneck in sleep clinics and motivates the long-standing pursuit of reliable automated systems.</p>
<p>Machine learning approaches have been applied to automatic sleep stage classification tasks for several years, and many models have been proposed. Shallow classifiers or traditional machine learning methods, including Support Vector Machines (SVMs) (<xref ref-type="bibr" rid="B11">Hussein et al., 2023</xref>), Random Forests (<xref ref-type="bibr" rid="B23">Memar and Faradji, 2018</xref>), and decision trees (<xref ref-type="bibr" rid="B2">Alshammari, 2024</xref>), etc., have demonstrated initial success. However, feature engineering is an inevitable step before inputting data into these models. Extensive features are extracted from time, frequency, and time-frequency domains by domain experts manually, and their performance is heavily affected by the quality and comprehensiveness of the engineered features. The success of deep learning in classification tasks has propelled the evolution of automated sleep staging from rule-based heuristics and traditional machine learning to contemporary deep learning paradigms. These paradigms enable end-to-end learning from raw or minimally processed signals. Specifically, Convolutional Neural Networks (CNNs) are widely adopted to extract temporal features. Furthermore, bidirectional Long Short-Term Memory (Bi-LSTM) modules are often incorporated to model the temporal context and transition rules between successive epochs. More recently, Graph Convolutional Networks (GCNs) have been employed to explicitly learn the non-Euclidean, functional relationships between different brain regions, achieving state-of-the-art performance. Despite their impressive accuracy, these deep learning models are not without limitations. An inevitable drawback of these deep learning models is their &#x201c;black-box&#x201d; nature, which offers little insight into the decision-making process. This raises concerns about trustworthiness and clinical adoption, as understanding the reason behind a diagnosis is as crucial as the diagnosis itself.</p>
<p>Inspired by the Kolmogorov&#x2013;Arnold representation theorem, a novel architecture called Kolmogorov&#x2013;Arnold Network (KAN) was proposed recently, which represents a significant departure from traditional neural network architectures (<xref ref-type="bibr" rid="B22">Liu et al., 2025</xref>). Given its superior approximation capabilities compared to conventional multilayer perceptrons, KANs are advantageous in learning bio-signals across temporal dimensions.</p>
<p>In this work, we adapt and extend the KAN algorithm into a model named SimpleKANSleepNet for sleep stage classification. Our work is primarily motivated by two persistent challenges in current deep learning-based sleep staging: (1) the &#x201c;black-box&#x201d; nature of models like CNNs and GCNs, which hampers clinical trust and adoption due to a lack of interpretability; and (2) the potential limitations of fixed-activation MLPs in modeling the intricate, non-linear, and structured relationships inherent in multi-modal bio-signals. We hypothesize that KANs, with their superior function approximation efficiency through learnable activation functions on edges and their inherent potential for interpretability, are particularly well-suited to address these challenges. The main contributions of this study are:<list list-type="simple">
<list-item>
<p>&#x2022;A novel hybrid architecture named SimpleKANSleepNet is proposed for the sleep stage classification task. It innovatively integrates a KAN-based classifier with a dual-stream CNN feature extractor and a Gate Recurrent Unit (GRU)-based temporal context module. This represents the first exploration of KANs in the domain of multi-modal sleep stage classification.</p>
</list-item>
<list-item>
<p>&#x2022;We explore the potential of the KAN architecture to serve as an alternative to MLPs for modeling complex bio-signal relationships. This exploration aims to lay the groundwork for future research into more transparent sleep staging models, moving beyond a mere application study. This also allows a preliminary assessment of KAN&#x2019;s potential for providing insights into model decisions, despite the interpretability constraints of our hybrid architecture.</p>
</list-item>
<list-item>
<p>&#x2022;Five-state sleep classification experiments are conducted on the ISRUC-S1 (<ext-link ext-link-type="uri" xlink:href="https://sleeptight.isr.uc.pt/">https://sleeptight.isr.uc.pt/</ext-link>) and the SleepEDF-153 (<ext-link ext-link-type="uri" xlink:href="http://www.physionet.org/content/sleep-edfx/1.0.0/">www.physionet.org/content/sleep-edfx/1.0.0/</ext-link>) (<xref ref-type="bibr" rid="B37">Kemp et al., 2000</xref>; <xref ref-type="bibr" rid="B38">Goldberger et al., 2000</xref>) datasets to evaluate classification performance and generality. The proposed model achieves an overall accuracy, F1-score, and Cohen&#x2019;s kappa of 0.812, 0.793, 0.757, 0.928, 0.929, and 0.910, respectively. Several additional experiments are conducted on the ISRUC datasets to identify factors affecting the classification performance, including the number of signal channels, channel types, input feature domains, and the size of the dataset.</p>
</list-item>
</list>
</p>
</sec>
<sec id="s2">
<label>2</label>
<title>Related work</title>
<sec id="s2-1">
<label>2.1</label>
<title>Sleep stage classification</title>
<p>Sleep stage classification is a crucial step in sleep analysis, where many automated methods have been reported with high performance. Deep learning approaches, especially deep learning models, have undergone a rapid evolution, progressively moving from single-modality data to multi-modal methods.</p>
<p>CNNs are the first deep learning models applied to the sleep stage classification task, which demonstrated an unparalleled ability to automatically learn discriminative features from raw bio-signals and eliminate the need for manual feature engineering. A significant milestone in the use of CNN models in sleep scoring is DeepSleepNet, which introduced a dual-path architecture: one path with large filters to capture broad frequency information and another with small filters to extract temporal details (<xref ref-type="bibr" rid="B30">Supratak et al., 2017</xref>). Furthermore, it incorporated a bidirectional Long Short-Term Memory (Bi-LSTM) layer following the CNN, to explicitly model the temporal transition rules between adjacent epochs. 2D-CNNs have also been applied to classify sleep stages, normally using frequency domain features. A hybrid model named feature fusion temporal convolutional network (FFTCN) is proposed by combining a 1D-CNN branch with a 2D-CNN branch (<xref ref-type="bibr" rid="B4">Bao et al., 2024</xref>). Raw signals are transformed from the time domain to the frequency domain through continuous wavelet transform (CWT), converting a 1D temporal signal into a 2D image for input. The temporal features are extracted from a 1D-CNN branch and finally concatenated as fusion features for classification. To capture the complex structure of multi-channel sleep data without relying on hand-crafted 2D representations, 3D-CNNs like the 3DSleepNet model are also proposed (<xref ref-type="bibr" rid="B15">Ji et al., 2023b</xref>). The 3D convolutional layers in 3DSleepNet can aggregate information from neighboring time points, different frequency bands, and across various channels simultaneously, offering a more integrated and holistic feature learning approach.</p>
<p>Although CNNs excel at processing Euclidean data, they often fail to capture the functional and spatial relationships among different channels. This gap motivated the adoption of Graph Convolutional Networks (GCNs), which are specifically designed to operate on non-Euclidean graph structures. GraphSleepNet (<xref ref-type="bibr" rid="B17">Jia et al., 2020b</xref>), Jumping Knowledge based Spatial-Temporal GCN (JK-STGCN) (<xref ref-type="bibr" rid="B13">Ji et al., 2022</xref>), and Multi-View Spatial-Temporal Graph Convolutional Network (MSTGCN) (<xref ref-type="bibr" rid="B18">Jia et al., 2021a</xref>) are three GCN models that use dynamic sleep graph to extract spatial features and learn relationships among channels. Their experimental results show that explicitly modeling inter-channel relationships can boost classification performance, and more and more GCN-related models are being proposed in the bio-signal processing field (<xref ref-type="bibr" rid="B10">Huang et al., 2025</xref>).</p>
</sec>
<sec id="s2-2">
<label>2.2</label>
<title>Kolmogorov&#x2013;arnold network</title>
<p>Kolmogorov&#x2013;Arnold Networks (KANs) are a novel architecture designed to replace Multi-Layer Perceptrons (MLPs). Inspired by the Kolmogorov&#x2013;Arnold representation theorem, which states that any multivariate continuous function can be expressed as a finite composition of continuous functions of a single variable (<xref ref-type="bibr" rid="B22">Liu et al., 2025</xref>). As <xref ref-type="fig" rid="F1">Figure 1</xref> shows, KANs place learnable univariate functions on the connections between nodes, rather than fixed non-linear activation functions on the nodes. This architecture allows learnable activation functions on the edges to adapt to complex data patterns more flexibly than fixed functions. KANs have been applied in many fields, such as traffic flow optimization (<xref ref-type="bibr" rid="B34">Zhang et al., 2025</xref>), medical image segmentation and generation (<xref ref-type="bibr" rid="B21">Li et al., 2025</xref>), and ECG analysis (<xref ref-type="bibr" rid="B1">Aghaomidi and Wang, 2024</xref>).</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>The comparison between a MLP and a KAN. <bold>(A)</bold> The MLP with learnable weights on nodes and fixed activation function. <bold>(B)</bold> A KAN with learnable activation function.</p>
</caption>
<graphic xlink:href="fbinf-06-1738132-g001.tif">
<alt-text content-type="machine-generated">Diagram comparing neural network architectures. Panel A shows a traditional multilayer perceptron (MLP) with two input nodes, one hidden layer of six neurons, and one output node, all interconnected by straight lines. Panel B illustrates a KAN architecture with two input nodes, custom-shaped activation functions in yellow boxes, intermediate node layers, and one output, highlighting diverse function shapes.</alt-text>
</graphic>
</fig>
<p>The physiological patterns of sleep stages are expressed through complex, non-linear dynamics in multi-channel biosignals such as EEG, EOG, EMG, and ECG. These signals exhibit intricate internal and mutual relationships. Traditional CNNs, GCNs, and MLPs, which rely on fixed nonlinearities like ReLU, may not represent these relationships optimally. In contrast, KANs are grounded in the Kolmogorov&#x2013;Arnold Representation Theorem. This theorem states that any multivariate continuous function can be decomposed into a finite composition of univariate continuous functions. As a result, KANs can dynamically adapt their nonlinear transformations to fit specific patterns in the data. This adaptability may promote more efficient parameter use and provide a superior approximation of the complex mapping from multimodal features to sleep stages. Therefore, the inherent properties of KANs align well with the complexities of sleep signal analysis. Using a KAN as the core classifier could enable more flexible and powerful learning of the rules underlying sleep stage transitions from multimodal features.</p>
</sec>
</sec>
<sec sec-type="materials|methods" id="s3">
<label>3</label>
<title>Materials and methods</title>
<p>
<xref ref-type="fig" rid="F2">Figure 2</xref> illustrates the architecture of the proposed SimpleKANSleepNet, which consists of three key stages: preprocessing, feature extraction, and classification.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>The architecture of the proposed model.</p>
</caption>
<graphic xlink:href="fbinf-06-1738132-g002.tif">
<alt-text content-type="machine-generated">Flowchart diagram of a neural network architecture for sleep stage classification using raw PSG signals including EEG, EOG, EMG, and ECG. Preprocessing is followed by frequency feature extraction using sequential convolutional, batch normalization, ReLU, pooling, dropout, and flatten layers repeated three times, then two linear layers output features for classification. The classification module uses batch normalization, dense layers, and softmax to predict sleep stages: W, N1, N2, N3, and REM. Colored bars represent channel encoding for each signal type.</alt-text>
</graphic>
</fig>
<sec id="s3-1">
<label>3.1</label>
<title>Dataset and processing</title>
<p>1. ISRUC: The ISRUC dataset consists of three subsets, namely, ISRUC-S1, ISRUC-S2, and ISRUC-S3 (<xref ref-type="bibr" rid="B20">Khalighi et al., 2016</xref>). The S1 subset contains 100 adult subjects who showed evidence of sleep disorders, and each subject underwent a complete PSG recording. There are eight adult subjects with evidence of breathing sleep disorders, including two different recording sessions for each subject. All ten subjects from S3 are healthy individuals with no known sleep pathologies, and each subject has a complete PSG recording. All three subsets are labelled by two sleep experts according to the AASM standards. The detailed information about the ISRUC database is listed in <xref ref-type="table" rid="T1">Table 1</xref>. The PSGs are preprocessed by the data provider, where all EEG channels and EOG channels are filtered by a Butterworth filter with a frequency range of 0.3 Hz&#x2013;35 Hz, while EMGs are filtered by a Butterworth filter with a range from 10 Hz to 70 Hz. The 50 Hz electrical noise is eliminated by a notch filter for all PSGs. Considering the dataset size and the subjects&#x2019; health conditions, only 50 random subjects are selected from ISRUC-S1 to conduct a 25-fold cross-validation for classification performance comparison and explore the factors that may affect the performance of the proposed model. To fully utilize the signal information, we select as many channels as possible. Consequently, all six EEG channels, two EOGs, one ECG, the Chin EMG, and the Leg-1 EMG are selected. All data used in our experiments are normalized epoch-by-epoch through z-score normalization:<disp-formula id="e1">
<mml:math id="m1">
<mml:mrow>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msup>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msup>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>where <inline-formula id="inf1">
<mml:math id="m2">
<mml:mrow>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mi>i</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> is the normalized signal of the <inline-formula id="inf2">
<mml:math id="m3">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>-th sleep epoch, <inline-formula id="inf3">
<mml:math id="m4">
<mml:mrow>
<mml:msup>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is the original signal of the <inline-formula id="inf4">
<mml:math id="m5">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>-th sleep epoch, <inline-formula id="inf5">
<mml:math id="m6">
<mml:mrow>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf6">
<mml:math id="m7">
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> are the mean value and standard deviation of this epoch.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Details about the ISRUC database.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Subset</th>
<th align="center">Subjects</th>
<th align="center">Age (years)</th>
<th align="center">Sleep disorder</th>
<th align="center">EEGs</th>
<th align="center">EOGs</th>
<th align="center">ECG</th>
<th align="center">EMGs</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Subgroup I</td>
<td align="center">55 male, 45 female</td>
<td align="center">51 &#xb1; 16</td>
<td align="center">Yes</td>
<td rowspan="3" align="center">F3-A2, C3-A2<break/>O1-A2, F4-A1<break/>C4-A1, O2-A1</td>
<td rowspan="3" align="center">LOC-A2<break/>ROC-A1</td>
<td rowspan="3" align="center">X2</td>
<td rowspan="3" align="center">Chin EMG (X1)<break/>Leg-1 EMG (X3)<break/>Leg-2 EMG (X4)</td>
</tr>
<tr>
<td align="center">Subgroup II</td>
<td align="center">6 male, 2 female</td>
<td align="center">46.8 &#xb1; 18.8</td>
<td align="center">Yes</td>
</tr>
<tr>
<td align="center">Subgroup III</td>
<td align="center">9 male, 1 female</td>
<td align="center">40 &#xb1; 10</td>
<td align="center">No</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>2. SleepEDF-153: The SleepEDF-153 dataset is part of the Sleep-EDF database and includes 78 subjects aged 25 to 101. Each subject had two sleep records, except for subjects 13, 36, and 52, from whom one night of data was lost due to a failing cassette, resulting in 153 available recordings. For each recording in the dataset, two EEG channels (Fpz-Cz and Pz-Oz) and one horizontal EOG channel, sampled at 100 Hz, are selected. The 50 Hz electrical noise was eliminated by a notch filter for all selected PSGs, followed by a wavelet transformation to remove noise. Finally, z-score normalization using <xref ref-type="disp-formula" rid="e1">Equation 1</xref> was applied to normalize the preprocessed data for the feature extraction step.</p>
</sec>
<sec id="s3-2">
<label>3.2</label>
<title>Feature extraction</title>
<p>Signals of <inline-formula id="inf7">
<mml:math id="m8">
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> channels are defined as <inline-formula id="inf8">
<mml:math id="m9">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="&#x7c;">
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="&#x7c;">
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="&#x7c;">
<mml:mrow>
<mml:mi>L</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>N</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula id="inf9">
<mml:math id="m10">
<mml:mrow>
<mml:mi>L</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> denotes number of sleep epochs and <inline-formula id="inf10">
<mml:math id="m11">
<mml:mrow>
<mml:msup>
<mml:mi>T</mml:mi>
<mml:mi>s</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> denotes number of data points in each epoch. For the <inline-formula id="inf11">
<mml:math id="m12">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>-th epoch <inline-formula id="inf12">
<mml:math id="m13">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="&#x7c;">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>L</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, the raw data of the <inline-formula id="inf13">
<mml:math id="m14">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>-th channel can be defined as <inline-formula id="inf14">
<mml:math id="m15">
<mml:mrow>
<mml:msup>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
</mml:msup>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="&#x7c;">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
</mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mrow>
<mml:mfenced open="{" close="}" separators="&#x7c;">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mi>l</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mrow>
<mml:mfenced open="{" close="}" separators="&#x7c;">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>L</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. In the feature extraction stage, a dual-stream CNN is used as the feature extractor. For each CNN branch, 1D convolutional layers are used along the temporal dimension to aggregate temporal information for each input channel. The dual-stream architecture is employed because the branch with larger filters can capture broader frequency information, while the branch with smaller filters can better extract fine-grained temporal information. The output of the feature extractor is defined in <xref ref-type="disp-formula" rid="e2">Equation 2</xref>:<disp-formula id="e2">
<mml:math id="m16">
<mml:mrow>
<mml:msup>
<mml:mi>X</mml:mi>
<mml:mi>i</mml:mi>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>i</mml:mi>
</mml:msubsup>
<mml:mo>&#x2016;</mml:mo>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mi>L</mml:mi>
<mml:mi>i</mml:mi>
</mml:msubsup>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>where <inline-formula id="inf15">
<mml:math id="m17">
<mml:mrow>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>i</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf16">
<mml:math id="m18">
<mml:mrow>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mi>L</mml:mi>
<mml:mi>i</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> are the outputs of the branch with smaller filters and larger filters, respectively, and <inline-formula id="inf17">
<mml:math id="m19">
<mml:mrow>
<mml:mo>&#x2016;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> is the concatenation operation.</p>
<p>The advantages of using a separate CNN feature extractor are:<list list-type="order">
<list-item>
<p>The architecture of KANs is very similar to that of MLPs, and a large input size requires substantial computing resources. Therefore, feature extraction is an essential step in the proposed model to reduce the input dimensionality.</p>
</list-item>
<list-item>
<p>Moreover, KANs currently work only on the CPU. An integrated model that combines feature extraction and classification cannot utilize GPU acceleration. Hence, a separate feature extraction step is needed to accelerate the computation.</p>
</list-item>
<list-item>
<p>The CNN feature extractor ensures that both high-level temporal features and frequency features are captured without any prior knowledge or feature engineering, which are crucial for sleep stage classification tasks.</p>
</list-item>
</list>
</p>
</sec>
<sec id="s3-3">
<label>3.3</label>
<title>KANs</title>
<p>The Kolmogorov&#x2013;Arnold Network (KAN) is a novel neural architecture fundamentally different from traditional Multi-Layer Perceptrons (MLPs). The idea of KANs is from the Kolmogorov&#x2013;Arnold Representation Theorem, which states that any multivariate continuous function <inline-formula id="inf18">
<mml:math id="m20">
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>n</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> can be represented as a finite composition of continuous univariate functions like <xref ref-type="disp-formula" rid="e3">Equation 3</xref> defined below:<disp-formula id="e3">
<mml:math id="m21">
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>q</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mi>n</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
<mml:mi>q</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3d5;</mml:mi>
<mml:mrow>
<mml:mi>q</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>p</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>where <inline-formula id="inf19">
<mml:math id="m22">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3d5;</mml:mi>
<mml:mrow>
<mml:mi>q</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>:</mml:mo>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="&#x7c;">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2192;</mml:mo>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf20">
<mml:math id="m23">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
<mml:mi>q</mml:mi>
</mml:msub>
<mml:mo>:</mml:mo>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:mo>&#x2192;</mml:mo>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> are univariate functions. This theorem suggests that complex multivariate transformations can be broken down into a sum of simpler, univariate transformations.</p>
<p>Let <inline-formula id="inf21">
<mml:math id="m24">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:msub>
<mml:mi>n</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> be the input vector where <inline-formula id="inf22">
<mml:math id="m25">
<mml:mrow>
<mml:msub>
<mml:mi>n</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the number of nodes in the <inline-formula id="inf23">
<mml:math id="m26">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>-th layer of the computational graph. A general KAN network is defined as <xref ref-type="disp-formula" rid="e4">Equation 4</xref>:<disp-formula id="e4">
<mml:math id="m27">
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mi>A</mml:mi>
<mml:mi>N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2218;</mml:mo>
<mml:msub>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2218;</mml:mo>
<mml:mo>&#x22ef;</mml:mo>
<mml:mo>&#x2218;</mml:mo>
<mml:msub>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#xa0;</mml:mo>
<mml:mo>&#x2218;</mml:mo>
<mml:mo>&#xa0;</mml:mo>
<mml:msub>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>where <inline-formula id="inf24">
<mml:math id="m28">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
<mml:mi>l</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mrow>
<mml:mfenced open="{" close="" separators="&#x7c;">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>L</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mrow>
<mml:mfenced open="" close="}" separators="&#x7c;">
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is the function matrix corresponding to the <inline-formula id="inf25">
<mml:math id="m29">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> -th KAN layer defined by <xref ref-type="disp-formula" rid="e5">Equation 5</xref>:<disp-formula id="e5">
<mml:math id="m30">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:munder>
<mml:munder>
<mml:mrow>
<mml:mfenced open="{" close="}" separators="&#x7c;">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3d5;</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>,</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mo>&#xb7;</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mtd>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3d5;</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>,</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mo>&#xb7;</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mtd>
<mml:mtd>
<mml:mo>&#x22ef;</mml:mo>
</mml:mtd>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3d5;</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>,</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>n</mml:mi>
<mml:mi>l</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mo>&#xb7;</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3d5;</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>,</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mo>&#xb7;</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mtd>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3d5;</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>,</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mo>&#xb7;</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mtd>
<mml:mtd>
<mml:mo>&#x22ef;</mml:mo>
</mml:mtd>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3d5;</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>,</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>n</mml:mi>
<mml:mi>l</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mo>&#xb7;</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mo>&#x22ee;</mml:mo>
</mml:mtd>
<mml:mtd>
<mml:mo>&#x22ee;</mml:mo>
</mml:mtd>
<mml:mtd/>
<mml:mtd>
<mml:mo>&#x22ee;</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3d5;</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>n</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mo>!</mml:mo>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mo>&#xb7;</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mtd>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3d5;</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>n</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mo>!</mml:mo>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mo>&#xb7;</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mtd>
<mml:mtd>
<mml:mo>&#x22ef;</mml:mo>
</mml:mtd>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3d5;</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>n</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>n</mml:mi>
<mml:mi>l</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mo>&#xb7;</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo stretchy="true">&#x23df;</mml:mo>
</mml:munder>
<mml:msub>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
<mml:mi>l</mml:mi>
</mml:msub>
</mml:munder>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>l</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(5)</label>
</disp-formula>where <inline-formula id="inf26">
<mml:math id="m31">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3d5;</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the activation function connects <inline-formula id="inf27">
<mml:math id="m32">
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf28">
<mml:math id="m33">
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula>, which are the <inline-formula id="inf29">
<mml:math id="m34">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> -th neuron in the <inline-formula id="inf30">
<mml:math id="m35">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>-th layer and the <inline-formula id="inf31">
<mml:math id="m36">
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> neuron in the (<inline-formula id="inf32">
<mml:math id="m37">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> &#x2b;1)-th layer, respectively.</p>
<p>Activation functions <inline-formula id="inf33">
<mml:math id="m38">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3d5;</mml:mi>
<mml:mrow>
<mml:mi>q</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> in the KAN layer are residual activation functions defined by <xref ref-type="disp-formula" rid="e6">Equation 6</xref>:<disp-formula id="e6">
<mml:math id="m39">
<mml:mrow>
<mml:mi>&#x3d5;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>b</mml:mi>
</mml:msub>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
<mml:mtext>spline</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(6)</label>
</disp-formula>where <inline-formula id="inf34">
<mml:math id="m40">
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>b</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf35">
<mml:math id="m41">
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>s</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> are redundant but still included items. <inline-formula id="inf36">
<mml:math id="m42">
<mml:mrow>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is the silu function defined by <xref ref-type="disp-formula" rid="e7">Equation 7</xref>:<disp-formula id="e7">
<mml:math id="m43">
<mml:mrow>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2b;</mml:mo>
<mml:msup>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(7)</label>
</disp-formula>
<inline-formula id="inf37">
<mml:math id="m44">
<mml:mrow>
<mml:mtext>spline</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is parametrized as a linear combination of B-splines defined in <xref ref-type="disp-formula" rid="e8">Equation 8</xref>:<disp-formula id="e8">
<mml:math id="m45">
<mml:mrow>
<mml:mtext>spline</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:mrow>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>B</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(8)</label>
</disp-formula>where <inline-formula id="inf38">
<mml:math id="m46">
<mml:mrow>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> are the learnable coefficients for the B-spline basis functions <inline-formula id="inf39">
<mml:math id="m47">
<mml:mrow>
<mml:msub>
<mml:mi>B</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</sec>
<sec id="s3-4">
<label>3.4</label>
<title>Gated Recurrent Unit (GRU)</title>
<p>Sleep is a dynamic process where the temporal context and transition rules between consecutive epochs are crucial for accurate staging. To capture these temporal dependencies, a Gated Recurrent Unit (GRU) is incorporated after the KAN-based feature fusion. GRU is a type of Recurrent Neural Network (RNN) that uses gating mechanisms to control the flow of information, effectively mitigating the vanishing gradient problem common in simple RNNs (<xref ref-type="bibr" rid="B36">Cho et al., 2014</xref>).</p>
<p>As <xref ref-type="fig" rid="F3">Figure 3</xref> shows, the GRU cell, at each time step <inline-formula id="inf40">
<mml:math id="m48">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, takes the current input <inline-formula id="inf41">
<mml:math id="m49">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> (the fused feature vector from our KAN module) and the previous hidden state <inline-formula id="inf42">
<mml:math id="m50">
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, and produces a new hidden state <inline-formula id="inf43">
<mml:math id="m51">
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. This hidden state serves as a compressed memory of the sequence up to time <inline-formula id="inf44">
<mml:math id="m52">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. The internal computations of a GRU cell are governed by two gates: the reset gate <inline-formula id="inf45">
<mml:math id="m53">
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and the update gate <inline-formula id="inf46">
<mml:math id="m54">
<mml:mrow>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>A GRU cell.</p>
</caption>
<graphic xlink:href="fbinf-06-1738132-g003.tif">
<alt-text content-type="machine-generated">Diagram illustrating the architecture of a Gated Recurrent Unit (GRU) including reset and update gates, sigmoid and tanh activations, and the flow of input, hidden state, and outputs through mathematical operations.</alt-text>
</graphic>
</fig>
<p>The formulas for the reset gate, the update gate, the candidate hidden state, and the final hidden state in a GRU cell are defined in <xref ref-type="disp-formula" rid="e9">Equations 9</xref>&#x2013;<xref ref-type="disp-formula" rid="e12">12</xref>, respectively.<list list-type="order">
<list-item>
<p>Reset Gate: This gate determines how much of the past hidden state <inline-formula id="inf47">
<mml:math id="m55">
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> should be &#x201c;forgotten&#x201d; or reset when computing the candidate activation.<disp-formula id="e9">
<mml:math id="m56">
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>r</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(9)</label>
</disp-formula>
</p>
</list-item>
<list-item>
<p>Update Gate: This gate controls how much of the new hidden state <inline-formula id="inf48">
<mml:math id="m57">
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> will be composed of the candidate activation versus a copy of the old hidden state <inline-formula id="inf49">
<mml:math id="m58">
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>.<disp-formula id="e10">
<mml:math id="m59">
<mml:mrow>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>z</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(10)</label>
</disp-formula>
</p>
</list-item>
<list-item>
<p>Candidate Hidden State: A candidate for the new hidden state is computed using the current input and the reset past hidden state.<disp-formula id="e11">
<mml:math id="m60">
<mml:mrow>
<mml:mover accent="true">
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x223c;</mml:mo>
</mml:mover>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="italic">tan</mml:mi>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2299;</mml:mo>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>h</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(11)</label>
</disp-formula>
</p>
<p>Here, <inline-formula id="inf50">
<mml:math id="m61">
<mml:mrow>
<mml:mo>&#x2299;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> denotes the element-wise product. The reset gate <inline-formula id="inf51">
<mml:math id="m62">
<mml:mrow>
<mml:msub>
<mml:mi>r</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> allows the unit to drop information that is irrelevant in the future.</p>
</list-item>
<list-item>
<p>Final Hidden State: The final hidden state is a linear interpolation between the previous hidden state and the candidate hidden state, controlled by the update gate <inline-formula id="inf52">
<mml:math id="m63">
<mml:mrow>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</list-item>
</list>
<disp-formula id="e12">
<mml:math id="m64">
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2299;</mml:mo>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2299;</mml:mo>
<mml:mover accent="true">
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x223c;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
<label>(12)</label>
</disp-formula>
</p>
<p>If the update gate <inline-formula id="inf53">
<mml:math id="m65">
<mml:mrow>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is close to 1, the unit copies the candidate <inline-formula id="inf54">
<mml:math id="m66">
<mml:mrow>
<mml:mover accent="true">
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x223c;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula>, effectively forgetting the past. If <inline-formula id="inf55">
<mml:math id="m67">
<mml:mrow>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is close to 0, the unit retains most of the previous hidden state <inline-formula id="inf56">
<mml:math id="m68">
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</sec>
</sec>
<sec id="s4">
<label>4</label>
<title>Experiments</title>
<sec id="s4-1">
<label>4.1</label>
<title>Experiment setting</title>
<p>The classification performance evaluation experiments are conducted on the ISRUC-S1 and SleepEDF-153. For each experiment, we employed a subject-independent evaluation method to assess the model&#x2019;s generalization capability. For a fair comparison between different models on the ISRUC-S1 subset, the experimental setting of the dataset remains consistent with the existing study. Hence, we randomly selected 50 subjects from the ISRUC-S1 dataset for a 25-fold cross-validation. In each fold, two subjects are randomly chosen as the validation set, and another two completely unseen subjects are held out as the test set. The remaining cases form the training set. To compare with other models fairly, we follow the same dataset dividing rule as the existing method in their experiments. A 10-fold cross-validation was performed on the SleepEDF-153 dataset. It is important to note that the SleepEDF-153 dataset consists of 153 sleep sessions (nights) from 78 subjects, as data from three nights are missing. Consequently, we treated each of these 153 sessions as an independent and distinct sample. This means that the two recording sessions from a single subject could potentially be allocated to different splits (e.g., one in the training set and the other in the validation or test set). However, sleep patterns and physiological signals from the same individual can vary significantly from one night to another due to factors such as changing sleep quality, mental state, and environmental conditions. Therefore, the data from the two nights are considered non-identical and statistically independent samples. For each fold, 13 or 14 random sessions were chosen as the validation set, and 15 or 16 unseen sessions were chosen as the test set.</p>
<p>Detailed hyperparameters of the CNN feature extractor and the proposed model are listed in <xref ref-type="table" rid="T2">Table 2</xref>. Moreover, the code will be uploaded to GitHub (<ext-link ext-link-type="uri" xlink:href="https://github.com/ji-xiaopeng/SimpleKANSleepNet">https://github.com/ji-xiaopeng/SimpleKANSleepNet</ext-link>) once the paper is published.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Hyperparameters of the feature extractor module and the KAN module.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Module</th>
<th colspan="2" align="left">Hyperparameter</th>
<th align="left">Value</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="12" align="center">Feature<break/>Extrator</td>
<td rowspan="6" align="left">Temporal branch</td>
<td align="left">Conv 1D channel size</td>
<td align="left">32&#x2013;64&#x2013;64&#x2013;64</td>
</tr>
<tr>
<td align="left">Conv 1D kernel size</td>
<td align="left">50-8-8-8</td>
</tr>
<tr>
<td align="left">Conv 1D stride size</td>
<td align="left">6-1-1-1</td>
</tr>
<tr>
<td align="left">Max pooling kernel size</td>
<td align="left">16&#x2013;8</td>
</tr>
<tr>
<td align="left">Max pooling stride size</td>
<td align="left">16&#x2013;8</td>
</tr>
<tr>
<td align="left">Dropout rate</td>
<td align="left">0.5</td>
</tr>
<tr>
<td rowspan="6" align="left">Frequency branch</td>
<td align="left">Conv 1D channel size</td>
<td align="left">64&#x2013;64&#x2013;64&#x2013;64</td>
</tr>
<tr>
<td align="left">Conv 1D kernel size</td>
<td align="left">400-6-6-6</td>
</tr>
<tr>
<td align="left">Conv 1D stride size</td>
<td align="left">50-1-1-1</td>
</tr>
<tr>
<td align="left">Max pooling kernel size</td>
<td align="left">8&#x2013;4</td>
</tr>
<tr>
<td align="left">Max pooling stride size</td>
<td align="left">8&#x2013;4</td>
</tr>
<tr>
<td align="left">Dropout rate</td>
<td align="left">0.5</td>
</tr>
<tr>
<td rowspan="2" align="center">KAN</td>
<td colspan="2" align="left">KAN layer</td>
<td align="left">Channels &#x2a; 256&#x2013;128&#x2013;64</td>
</tr>
<tr>
<td colspan="2" align="left">GRU</td>
<td align="left">64&#x2013;32</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Overall evaluation metrics, including accuracy (ACC), F1-score, and precision (PR), are used to evaluate the overall performance, while per-class metrics, including precision (PR), recall (RE), and F1-score are also tested.</p>
<p>All experiments are carried out on a workstation with an Intel I7-10700 CPU, 96 GB memory, and a Nvidia GeForce RTX 2080 Ti GPU.</p>
</sec>
<sec id="s4-2">
<label>4.2</label>
<title>Comparison with the state-of-the-art methods</title>
<p>According to the comprehensive experimental results presented in <xref ref-type="table" rid="T3">Table 3</xref>, a comparative analysis reveals the distinct performance landscape of various deep learning models for sleep stage classification on the ISRUC-S1 subset. The proposed SimpleKANSleepNet demonstrates highly competitive performance, achieving an overall accuracy of 0.812 and an F1-score of 0.793, positioning it among the top-tier models. The detailed evaluation metrics on this dataset for each class are listed in <xref ref-type="table" rid="T4">Table 4</xref>.</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Comparison between the proposed model and other methods on the 50 random subjects from the ISRUC-S1 subset.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th rowspan="2" align="center">Method</th>
<th colspan="3" align="center">Overall metrics</th>
<th colspan="5" align="center">Per-class F1-score (F1)</th>
</tr>
<tr>
<th align="center">ACC</th>
<th align="center">F1</th>
<th align="center">
<inline-formula id="inf57">
<mml:math id="m69">
<mml:mrow>
<mml:mi>&#x3ba;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th align="center">W</th>
<th align="center">N1</th>
<th align="center">N2</th>
<th align="center">N3</th>
<th align="center">REM</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">DeepSleepNet</td>
<td align="center">0.730</td>
<td align="center">0.691</td>
<td align="center">0.654</td>
<td align="center">0.850</td>
<td align="center">0.385</td>
<td align="center">0.739</td>
<td align="center">0.830</td>
<td align="center">0.648</td>
</tr>
<tr>
<td align="left">TinySleepNet</td>
<td align="center">0.764</td>
<td align="center">0.745</td>
<td align="center">0.695</td>
<td align="center">0.846</td>
<td align="center">0.548</td>
<td align="center">0.729</td>
<td align="center">0.830</td>
<td align="center">0.794</td>
</tr>
<tr>
<td align="left">MMCNN</td>
<td align="center">0.769</td>
<td align="center">0.736</td>
<td align="center">-</td>
<td align="center">0.849</td>
<td align="center">0.437</td>
<td align="center">0.770</td>
<td align="center">0.843</td>
<td align="center">0.781</td>
</tr>
<tr>
<td align="left">SeqSleepnet</td>
<td align="center">0.770</td>
<td align="center">0.683</td>
<td align="center">-</td>
<td align="center">0.844</td>
<td align="center">0.124</td>
<td align="center">0.769</td>
<td align="center">0.853</td>
<td align="center">0.794</td>
</tr>
<tr>
<td align="left">GraphSleepNet</td>
<td align="center">0.780</td>
<td align="center">0.751</td>
<td align="center">0.715</td>
<td align="center">0.889</td>
<td align="center">0.463</td>
<td align="center">0.763</td>
<td align="center">0.825</td>
<td align="center">0.813</td>
</tr>
<tr>
<td align="left">MSTGCN</td>
<td align="center">0.808</td>
<td align="center">0.787</td>
<td align="center">
<underline>0.752</underline>
</td>
<td align="center">0.885</td>
<td align="center">0.539</td>
<td align="center">
<bold>0.799</bold>
</td>
<td align="center">
<bold>0.876</bold>
</td>
<td align="center">0.838</td>
</tr>
<tr>
<td align="left">StAGN</td>
<td align="center">
<underline>0.811</underline>
</td>
<td align="center">
<underline>0.790</underline>
</td>
<td align="center">-</td>
<td align="center">
<underline>0.895</underline>
</td>
<td align="center">
<underline>0.547</underline>
</td>
<td align="center">
<underline>0.797</underline>
</td>
<td align="center">
<bold>0.876</bold>
</td>
<td align="center">0.836</td>
</tr>
<tr>
<td align="left">Metasleeplearner</td>
<td align="center">0.710</td>
<td align="center">0.678</td>
<td align="center">-</td>
<td align="center">0.772</td>
<td align="center">0.442</td>
<td align="center">0.680</td>
<td align="center">0.802</td>
<td align="center">0.697</td>
</tr>
<tr>
<td align="left">Cosleep</td>
<td align="center">0.579</td>
<td align="center">0.501</td>
<td align="center">-</td>
<td align="center">-</td>
<td align="center">-</td>
<td align="center">-</td>
<td align="center">-</td>
<td align="center">-</td>
</tr>
<tr>
<td align="left">FFTCN</td>
<td align="center">0.774</td>
<td align="center">0.745</td>
<td align="center">0.71</td>
<td align="center">0.865</td>
<td align="center">0.450</td>
<td align="center">0.767</td>
<td align="center">0.863</td>
<td align="center">0.779</td>
</tr>
<tr>
<td align="left">SimpleKANSleepNet</td>
<td align="center">
<bold>0.812</bold>
</td>
<td align="center">
<bold>0.793</bold>
</td>
<td align="center">
<bold>0.757</bold>
</td>
<td align="center">
<bold>0.898</bold>
</td>
<td align="center">
<bold>0.554</bold>
</td>
<td align="center">0.796</td>
<td align="center">
<underline>0.863</underline>
</td>
<td align="center">
<bold>0.857</bold>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="Tfn1">
<label>
<sup>a</sup>
</label>
<p>W &#x3d; awake. N1, N2 and N3 are sleep stage 1, 2, 3, separately, and are non-rapid eye movement. REM, rapid eye movement.</p>
</fn>
<fn>
<p>Bold numbers represent the best metric values, and underlined numbers represent the second best.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>Confusion matrix of the proposed model on the ISRUC-S1 dataset.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th rowspan="2" align="center">True labels</th>
<th colspan="5" align="center">Predicted</th>
<th colspan="2" align="center">Per-class metrics</th>
</tr>
<tr>
<th align="center">W</th>
<th align="center">N1</th>
<th align="center">N2</th>
<th align="center">N3</th>
<th align="center">REM</th>
<th align="center">PR</th>
<th align="center">RE</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">W</td>
<td align="center">9261</td>
<td align="center">582</td>
<td align="center">175</td>
<td align="center">16</td>
<td align="center">63</td>
<td align="center">0.878</td>
<td align="center">0.917</td>
</tr>
<tr>
<td align="center">N1</td>
<td align="center">866</td>
<td align="center">2826</td>
<td align="center">1480</td>
<td align="center">29</td>
<td align="center">354</td>
<td align="center">0.606</td>
<td align="center">0.509</td>
</tr>
<tr>
<td align="center">N2</td>
<td align="center">250</td>
<td align="center">871</td>
<td align="center">11059</td>
<td align="center">839</td>
<td align="center">231</td>
<td align="center">0.760</td>
<td align="center">0.835</td>
</tr>
<tr>
<td align="center">N3</td>
<td align="center">16</td>
<td align="center">12</td>
<td align="center">1396</td>
<td align="center">7249</td>
<td align="center">2</td>
<td align="center">0.891</td>
<td align="center">0.836</td>
</tr>
<tr>
<td align="center">REM</td>
<td align="center">146</td>
<td align="center">374</td>
<td align="center">437</td>
<td align="center">3</td>
<td align="center">4819</td>
<td align="center">0.881</td>
<td align="center">0.834</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="Tfn2">
<label>
<sup>a</sup>
</label>
<p>W &#x3d; awake. N1, N2 and N3 are sleep stage 1, 2, 3, separately, and are non-rapid eye movement. REM, rapid eye movement; PR &#x3d; precision; RE &#x3d; recall.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>Early end-to-end CNN models like DeepSleepNet (<xref ref-type="bibr" rid="B30">Supratak et al., 2017</xref>) and TinySleepNet (<xref ref-type="bibr" rid="B29">Supratak and Guo, 2020</xref>) established a strong baseline for sleep staging through deep learning methods. However, their overall and per-class performance is much lower than that of other models. This phenomenon may be due to the limited number of channels they use. While subsequent models like MMCNN (<xref ref-type="bibr" rid="B6">Chambon et al., 2018</xref>) and SeqSleepnet (<xref ref-type="bibr" rid="B26">Phan et al., 2019b</xref>) improved overall accuracy, SeqSleepNet&#x2019;s exceptionally low N1 score (F1 &#x3d; 0.124) highlights the difficulty of capturing its features with certain sequence modeling approaches.</p>
<p>Metasleeplearner (<xref ref-type="bibr" rid="B3">Banluesombatkul et al., 2021</xref>) and Cosleep (<xref ref-type="bibr" rid="B31">Ye et al., 2022</xref>) show the lowest performance among all models, whose frameworks are very special compared to other models. Metasleeplearner applies a simpler version of the DeepSleepNet architecture based on Model Agnostic Meta-Learning (MAML), while the Cosleep model is a co-training scheme, which exploits complementary information from multiple views. Given that the performance of the DeepSleepNet model is lower than expected, it is difficult for a simpler version to achieve an acceptable result. In terms of the Cosleep model, even though many effective modules are added, the self-supervised learning scheme still cannot achieve performance as high as that of supervised methods.</p>
<p>The introduction of graph-based models marked a significant advancement. GraphSleepNet (<xref ref-type="bibr" rid="B16">Jia et al., 2020a</xref>) demonstrated the benefit of modeling inter-channel relationships, notably improving the F1-score for the Wake stage. Its successors, MSTGCN (<xref ref-type="bibr" rid="B18">Jia et al., 2021a</xref>) and StAGN (<xref ref-type="bibr" rid="B7">Chen et al., 2023</xref>), further refined this paradigm by incorporating multi-view spatial-temporal convolutions and attention mechanisms, respectively, pushing the overall accuracy above 0.808.</p>
<p>Our proposed SimpleKANSleepNet achieves a balanced and robust performance. Its most notable strength lies in its best-in-class performance on the most challenging stages. It achieves the highest F1-score for the N1 stage (0.554) and the REM stage (0.857), which suggests that the Kolmogorov-Arnold Network&#x2019;s capacity for efficient and highly non-linear function approximation is particularly effective at capturing the subtle and complex patterns that characterize these stages. The strong performance on Awake, N1, N2, and REM, coupled with high overall metrics, validates the KAN architecture as a powerful and promising alternative for biosignal processing.</p>
<p>The performance landscape on the Sleep-EDF-153 dataset reveals different model characteristics. Early CNN-based methods or temporal convolutional networks (TCNs), like the DeepSleepNet (<xref ref-type="bibr" rid="B30">Supratak et al., 2017</xref>), SleepPrintNet (<xref ref-type="bibr" rid="B16">Jia et al., 2020a</xref>), MultitaskCNN (<xref ref-type="bibr" rid="B25">Phan et al., 2019a</xref>), DeepResNet (<xref ref-type="bibr" rid="B28">Sun et al., 2018</xref>), SWTCNN (<xref ref-type="bibr" rid="B12">Jadhav and Mukhopadhyay, 2022</xref>), and FFTCN (<xref ref-type="bibr" rid="B4">Bao et al., 2024</xref>), consider signal types, channel sizes, or electrode locations only to construct models without any attention mechanism, which presents low but acceptable performance. Motivated by the performance enhancement from the attention mechanism, many sleep stage classification algorithms start to incorporate attention layers to help models capture important information from data. Models with attention layers, especially those using multiple PSG channels, can pay more attention to the most valuable information in sleep stage classification. As a result, models, like the SleepEEGNet (<xref ref-type="bibr" rid="B24">Mousavi et al., 2019</xref>), AttnSleepNet (<xref ref-type="bibr" rid="B9">Eldele et al., 2021</xref>), MultiChannelSleepNet (<xref ref-type="bibr" rid="B8">Dai et al., 2023</xref>), MaskSleepNet (<xref ref-type="bibr" rid="B35">Zhu et al., 2023</xref>), SalientSleepNet (<xref ref-type="bibr" rid="B19">Jia et al., 2021b</xref>), and MMASleepNet (<xref ref-type="bibr" rid="B32">Yubo et al., 2022</xref>) show improved results. On the other hand, MixSleepNet (<xref ref-type="bibr" rid="B14">Ji et al., 2023a</xref>) processes temporal features, frequency features, and time-frequency features by combining 3D-CNNs, GCNs, and 2D-CNNs, which enables this model to analyze PSGs from multiple domains, leading to a higher performance than other existing methods.</p>
<p>On this dataset, SimpleKANSleepNet establishes a new state-of-the-art by achieving the highest overall accuracy (0.907) and Cohen&#x2019;s Kappa (0.807). This underscores its superior generalization capability and robust pattern recognition. Compared with the results on the ISRUC-S1 subset, a notable phenomenon of the results of the SimpleKANSleepNet on the Sleep-EDF-153 dataset is that the F1-score for the N1, N3, and REM stages decreases by 0.087, 0.164, and 0.057, respectively. One possible explanation is that the N1 stage is a transitional stage between the Awake stage and N2 stage, which has the characteristics of N1 and N2 at the same time, leading to the difficulty in classification. In automated sleep stage classification methods, N3 stages are sometimes misclassified into N2 stages. The boundaries between N2 and N3 sleep are blurred because the progression of slow-wave activity is a continuum, making the application of the arbitrary 20% threshold for classification inherently subjective. Moreover, the Awake stage ratio in the Sleep-EDF-153 dataset is much higher than that in the ISRUC-S1 subset, leading to a label-imbalanced problem. The lower overall F1-score and the higher overall Cohen&#x2019;s kappa also indicate this issue. Due to the fact that most models compared in <xref ref-type="table" rid="T5">Table 5</xref> use data augmentation techniques to enhance the performance, three data balance strategies are further tested and listed in <xref ref-type="table" rid="T5">Table 5</xref> for a fair comparison between the proposed method and other methods. Meanwhile, the detailed classification metrics of a confusion matrix for each class are listed in <xref ref-type="table" rid="T6">Table 6</xref>.</p>
<table-wrap id="T5" position="float">
<label>TABLE 5</label>
<caption>
<p>Comparison between the proposed model and other methods on the SleepEDF-153 dataset.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th rowspan="2" align="left">Method</th>
<th colspan="3" align="center">Overall metrics</th>
<th colspan="5" align="center">Per-class F1-score (F1)</th>
</tr>
<tr>
<th align="center">ACC</th>
<th align="center">F1</th>
<th align="center">
<inline-formula id="inf58">
<mml:math id="m70">
<mml:mrow>
<mml:mi>&#x3ba;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th align="center">W</th>
<th align="center">N1</th>
<th align="center">N2</th>
<th align="center">N3</th>
<th align="center">REM</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">DeepSleepNet</td>
<td align="left">0.820</td>
<td align="left">0.769</td>
<td align="left">0.760</td>
<td align="left">0.847</td>
<td align="left">0.466</td>
<td align="left">
<underline>0.859</underline>
</td>
<td align="left">0.848</td>
<td align="left">0.824</td>
</tr>
<tr>
<td align="left">SleepPrintNet</td>
<td align="left">0.816</td>
<td align="left">0.765</td>
<td align="left">0.747</td>
<td align="left">0.927</td>
<td align="left">0.474</td>
<td align="left">0.836</td>
<td align="left">0.800</td>
<td align="left">0.788</td>
</tr>
<tr>
<td align="left">MultitaskCNN</td>
<td align="left">0.796</td>
<td align="left">0.728</td>
<td align="left">0.72</td>
<td align="left">0.909</td>
<td align="left">0.397</td>
<td align="left">0.832</td>
<td align="left">0.766</td>
<td align="left">0.735</td>
</tr>
<tr>
<td align="left">DeepResNet</td>
<td align="left">0.820</td>
<td align="left">-</td>
<td align="left">0.769</td>
<td align="left">0.847</td>
<td align="left">0.466</td>
<td align="left">0.859</td>
<td align="left">0.848</td>
<td align="left">0.824</td>
</tr>
<tr>
<td align="left">SWTCNN</td>
<td align="left">0.825</td>
<td align="left">0.751</td>
<td align="left">0.750</td>
<td align="left">0.928</td>
<td align="left">0.395</td>
<td align="left">0.855</td>
<td align="left">0.832</td>
<td align="left">0.744</td>
</tr>
<tr>
<td align="left">FFTCN</td>
<td align="left">0.826</td>
<td align="left">0.771</td>
<td align="left">0.760</td>
<td align="left">0.922</td>
<td align="left">0.473</td>
<td align="left">0.848</td>
<td align="left">0.800</td>
<td align="left">0.810</td>
</tr>
<tr>
<td align="left">SleepEEGNet</td>
<td align="left">0.800</td>
<td align="left">0.736</td>
<td align="left">0.730</td>
<td align="left">0.917</td>
<td align="left">0.441</td>
<td align="left">0.825</td>
<td align="left">0.735</td>
<td align="left">0.761</td>
</tr>
<tr>
<td align="left">AttnSleepNet</td>
<td align="left">0.717</td>
<td align="left">0.685</td>
<td align="left">0.637</td>
<td align="left">0.842</td>
<td align="left">0.428</td>
<td align="left">0.715</td>
<td align="left">0.819</td>
<td align="left">0.611</td>
</tr>
<tr>
<td align="left">MultiChannelSleepNet</td>
<td align="left">0.850</td>
<td align="left">0.796</td>
<td align="left">0.790</td>
<td align="left">0.940</td>
<td align="left">0.530</td>
<td align="left">0.869</td>
<td align="left">0.818</td>
<td align="left">0.826</td>
</tr>
<tr>
<td align="left">MaskSleepNet</td>
<td align="left">0.838</td>
<td align="left">0.742</td>
<td align="left">0.830</td>
<td align="left">0.879</td>
<td align="left">0.506</td>
<td align="left">0.897</td>
<td align="left">0.860</td>
<td align="left">0.709</td>
</tr>
<tr>
<td align="left">SalientSleepNet</td>
<td align="left">0.826</td>
<td align="left">0.765</td>
<td align="left">0.759</td>
<td align="left">0.923</td>
<td align="left">0.505</td>
<td align="left">0.844</td>
<td align="left">0.712</td>
<td align="left">0.842</td>
</tr>
<tr>
<td align="left">MMASleepNet</td>
<td align="left">0.827</td>
<td align="left">0.776</td>
<td align="left">0.761</td>
<td align="left">0.929</td>
<td align="left">0.491</td>
<td align="left">0.849</td>
<td align="left">0.813</td>
<td align="left">0.798</td>
</tr>
<tr>
<td align="left">MixSleepNet</td>
<td align="left">0.891</td>
<td align="left">0.685</td>
<td align="left">0.770</td>
<td align="left">0.970</td>
<td align="left">0.227</td>
<td align="left">0.815</td>
<td align="left">0.760</td>
<td align="left">0.652</td>
</tr>
<tr>
<td align="left">SimpleKANSleepNet (All data)</td>
<td align="left">
<underline>0.907</underline>
</td>
<td align="left">0.752</td>
<td align="left">0.807</td>
<td align="left">
<underline>0.978</underline>
</td>
<td align="left">0.467</td>
<td align="left">0.818</td>
<td align="left">0.699</td>
<td align="left">0.800</td>
</tr>
<tr>
<td align="left">SimpleKANSleepNet (Sleep state focus)</td>
<td align="left">0.833</td>
<td align="left">0.771</td>
<td align="left">0.769</td>
<td align="left">0.932</td>
<td align="left">0.525</td>
<td align="left">0.849</td>
<td align="left">0.699</td>
<td align="left">0.851</td>
</tr>
<tr>
<td align="left">SimpleKANSleepNet (Oversampling balance)</td>
<td align="left">0.847</td>
<td align="left">
<underline>0.846</underline>
</td>
<td align="left">
<underline>0.808</underline>
</td>
<td align="left">0.912</td>
<td align="left">
<underline>0.787</underline>
</td>
<td align="left">0.744</td>
<td align="left">
<underline>0.859</underline>
</td>
<td align="left">
<underline>0.929</underline>
</td>
</tr>
<tr>
<td align="left">SimpleKANSleepNet (Undersampling balance)</td>
<td align="left">
<bold>0.928</bold>
</td>
<td align="left">
<bold>0.929</bold>
</td>
<td align="left">
<bold>0.910</bold>
</td>
<td align="left">
<bold>0.985</bold>
</td>
<td align="left">
<bold>0.904</bold>
</td>
<td align="left">
<bold>0.863</bold>
</td>
<td align="left">
<bold>0.928</bold>
</td>
<td align="left">
<bold>0.964</bold>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="Tfn3">
<label>
<sup>a</sup>
</label>
<p>W &#x3d; awake. N1, N2 and N3 are sleep stage 1, 2, 3, separately, and are non-rapid eye movement. REM, rapid eye movement.</p>
</fn>
<fn>
<p>Bold numbers represent the best metric values, and underlined numbers represent the second best.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<table-wrap id="T6" position="float">
<label>TABLE 6</label>
<caption>
<p>Confusion matrix of the proposed model on the SleepEDF-153 dataset after sleep state focus balance.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th rowspan="2" align="left">True labels</th>
<th colspan="5" align="center">Predicted</th>
<th colspan="2" align="center">Per-class metrics</th>
</tr>
<tr>
<th align="center">W</th>
<th align="center">N1</th>
<th align="center">N2</th>
<th align="center">N3</th>
<th align="center">REM</th>
<th align="center">PR</th>
<th align="center">RE</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">W</td>
<td align="center">58875</td>
<td align="center">2941</td>
<td align="center">379</td>
<td align="center">37</td>
<td align="center">324</td>
<td align="center">0.922</td>
<td align="center">0.941</td>
</tr>
<tr>
<td align="center">N1</td>
<td align="center">3873</td>
<td align="center">10589</td>
<td align="center">5032</td>
<td align="center">74</td>
<td align="center">1693</td>
<td align="center">0.555</td>
<td align="center">0.498</td>
</tr>
<tr>
<td align="center">N2</td>
<td align="center">588</td>
<td align="center">4442</td>
<td align="center">59170</td>
<td align="center">1661</td>
<td align="center">2184</td>
<td align="center">0.829</td>
<td align="center">0.870</td>
</tr>
<tr>
<td align="center">N3</td>
<td align="center">26</td>
<td align="center">37</td>
<td align="center">4927</td>
<td align="center">7891</td>
<td align="center">18</td>
<td align="center">0.816</td>
<td align="center">0.612</td>
</tr>
<tr>
<td align="center">REM</td>
<td align="center">476</td>
<td align="center">1059</td>
<td align="center">1907</td>
<td align="center">10</td>
<td align="center">21830</td>
<td align="center">0.838</td>
<td align="center">0.864</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="Tfn4">
<label>
<sup>a</sup>
</label>
<p>W &#x3d; awake. N1, N2 and N3 are sleep stage 1, 2, 3, separately, and are non-rapid eye movement. REM, rapid eye movement; PR &#x3d; precision; RE &#x3d; recall.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<sec id="s4-2-1">
<label>4.2.1</label>
<title>Sleep state focus</title>
<p>The simplest data balance strategy is the sleep state focus, which retains only 30 min of wake periods before and after the sleep periods. This method helps the model focus on the sleep state rather than the Awake state. This Awake stage balancing process enhances N1, N2, and REM classification performance, while the N3 stage performance remains at a similar level due to its low proportion.</p>
</sec>
<sec id="s4-2-2">
<label>4.2.2</label>
<title>Oversampling</title>
<p>The oversampling strategy generates new data based on existing data points, solving the data imbalance problem by increasing the number of samples in the minority class to match the number in the majority class. In our experiment, the oversampling method is applied after the sleep state focus strategy, which means that all sleep stages will be oversampled to the Awake stage of 1-h periods, patient by patient. The experimental results demonstrate that the minority classes, especially the N1 and N3 stages, increase significantly, with a slight decrease in the Awake and N2 stages.</p>
</sec>
<sec id="s4-2-3">
<label>4.2.3</label>
<title>Undersampling</title>
<p>Unlike the oversampling strategy, the undersampling strategy will decrease the number of samples in the majority class to the number of samples in the minority class and reduce the total dataset size to balance the classes. However, the resampled dataset size will be extremely affected if the distribution of classes is extremely imbalanced. In our experiment, the undersampling strategy is applied directly to the preprocessed SleepEDF-153 dataset. The experimental results show that all performance metrics achieve the highest compared with other models. A possible explanation for this phenomenon is that the undersampling strategy will reduce data diversity, leading both the training set and testing set to a similar distribution if compared to the oversampling strategy.</p>
<p>To further evaluate the effectiveness and stability of three data augmentation techniques, a box plot is presented in <xref ref-type="fig" rid="F4">Figure 4</xref>. For the situation of all data, the overall accuracy achieves the second-highest performance, while the lowest recall indicates that the model misclassifies data of the minority class into other labels. Obviously, its high classification accuracy is due to the imbalanced classes. Similarly, the sleep state focus balance strategy has little improvement on the recall or Cohen&#x2019;s kappa metrics. The oversampling enhances the F1, precision, and recall metrics by balancing the class labels. The newly generated data points have a similar distribution to the existing data, which ensures the evaluation metrics are at the same level. As a result, it has the most stable classification performance on each fold. Undersampling consistently achieves the highest scores across all evaluation metrics, with performance values predominantly above 0.90. However, a few points are far from the mean value, which indicates lower stability than the oversampling strategy.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Box plot of four experimental settings on the Sleep-EDF 153 dataset.</p>
</caption>
<graphic xlink:href="fbinf-06-1738132-g004.tif">
<alt-text content-type="machine-generated">Boxplot graphic comparing four data processing methods&#x2014;All Data (grey), Sleep State Focus (red), Oversampling (blue), and Undersampling (green)&#x2014;across five metrics: Accuracy, Cohen's kappa, F1, Precision, and Recall. Each metric shows the undersampling method yields consistently higher values and less spread than other methods, especially notable for F1, Precision, and Recall.</alt-text>
</graphic>
</fig>
</sec>
</sec>
<sec id="s4-3">
<label>4.3</label>
<title>Model analysis</title>
<p>Several additional experiments are conducted on the ISRUC-S1 subset to further test the factors that may affect the classification performance and the evaluation results are listed in <xref ref-type="table" rid="T7">Table 7</xref>.</p>
<table-wrap id="T7" position="float">
<label>TABLE 7</label>
<caption>
<p>Additional comparison experiments on the ISRUC-S1 Subset.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th rowspan="2" align="left">&#x23;</th>
<th rowspan="2" align="left">Channels</th>
<th rowspan="2" align="left">Subjects</th>
<th rowspan="2" align="left">Features</th>
<th colspan="3" align="left">Overall metrics</th>
<th colspan="5" align="left">Per-class F1-score (F1)</th>
</tr>
<tr>
<th align="left">ACC</th>
<th align="left">F1</th>
<th align="left">
<inline-formula id="inf59">
<mml:math id="m71">
<mml:mrow>
<mml:mi>&#x3ba;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th align="left">W</th>
<th align="left">N1</th>
<th align="left">N2</th>
<th align="left">N3</th>
<th align="left">REM</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">i</td>
<td align="left">1 EEG, 1 ECG</td>
<td align="left">50</td>
<td align="left">CNN</td>
<td align="left">0.721</td>
<td align="left">0.684</td>
<td align="left">0.640</td>
<td align="left">0.824</td>
<td align="left">0.410</td>
<td align="left">0.728</td>
<td align="left">0.824</td>
<td align="left">0.635</td>
</tr>
<tr>
<td align="left">ii</td>
<td align="left">1 EEG, 1 EMG</td>
<td align="left">50</td>
<td align="left">CNN</td>
<td align="left">0.761</td>
<td align="left">0.735</td>
<td align="left">0.693</td>
<td align="left">0.861</td>
<td align="left">0.458</td>
<td align="left">0.747</td>
<td align="left">0.836</td>
<td align="left">0.770</td>
</tr>
<tr>
<td align="left">iii</td>
<td align="left">1 EEG, 1 EOG</td>
<td align="left">50</td>
<td align="left">CNN</td>
<td align="left">0.770</td>
<td align="left">0.749</td>
<td align="left">0.704</td>
<td align="left">0.882</td>
<td align="left">0.515</td>
<td align="left">0.750</td>
<td align="left">0.831</td>
<td align="left">0.765</td>
</tr>
<tr>
<td align="left">iv</td>
<td align="left">1 EEG, 1 EOG, 1 EMG, 1 ECG</td>
<td align="left">50</td>
<td align="left">CNN</td>
<td align="left">0.774</td>
<td align="left">0.752</td>
<td align="left">0.709</td>
<td align="left">0.868</td>
<td align="left">0.485</td>
<td align="left">0.761</td>
<td align="left">0.839</td>
<td align="left">0.806</td>
</tr>
<tr>
<td align="left">v</td>
<td align="left">2 EEG</td>
<td align="left">50</td>
<td align="left">CNN</td>
<td align="left">0.759</td>
<td align="left">0.732</td>
<td align="left">0.690</td>
<td align="left">0.868</td>
<td align="left">0.489</td>
<td align="left">0.749</td>
<td align="left">0.846</td>
<td align="left">0.709</td>
</tr>
<tr>
<td align="left">vi</td>
<td align="left">2 EOG</td>
<td align="left">50</td>
<td align="left">CNN</td>
<td align="left">0.725</td>
<td align="left">0.699</td>
<td align="left">0.645</td>
<td align="left">0.824</td>
<td align="left">0.424</td>
<td align="left">0.710</td>
<td align="left">0.820</td>
<td align="left">0.719</td>
</tr>
<tr>
<td align="left">vii</td>
<td align="left">6 EEG</td>
<td align="left">50</td>
<td align="left">CNN</td>
<td align="left">0.775</td>
<td align="left">0.751</td>
<td align="left">0.710</td>
<td align="left">0.871</td>
<td align="left">0.500</td>
<td align="left">0.770</td>
<td align="left">0.844</td>
<td align="left">0.771</td>
</tr>
<tr>
<td align="left">viii</td>
<td align="left">6 EEG, 2 EOG</td>
<td align="left">50</td>
<td align="left">CNN</td>
<td align="left">0.782</td>
<td align="left">0.759</td>
<td align="left">0.719</td>
<td align="left">0.875</td>
<td align="left">0.510</td>
<td align="left">0.774</td>
<td align="left">0.845</td>
<td align="left">0.790</td>
</tr>
<tr>
<td align="left">ix</td>
<td align="left">All channels</td>
<td align="left">50</td>
<td align="left">CNN</td>
<td align="left">0.812</td>
<td align="left">0.793</td>
<td align="left">0.757</td>
<td align="left">0.898</td>
<td align="left">0.554</td>
<td align="left">0.796</td>
<td align="left">0.863</td>
<td align="left">0.857</td>
</tr>
<tr>
<td align="left">x</td>
<td align="left">All channels</td>
<td align="left">50</td>
<td align="left">DE</td>
<td align="left">0.753</td>
<td align="left">0.710</td>
<td align="left">0.679</td>
<td align="left">0.831</td>
<td align="left">0.358</td>
<td align="left">0.749</td>
<td align="left">0.810</td>
<td align="left">0.802</td>
</tr>
<tr>
<td align="left">xi</td>
<td align="left">All channels</td>
<td align="left">50</td>
<td align="left">STFT</td>
<td align="left">0.754</td>
<td align="left">0.729</td>
<td align="left">0.682</td>
<td align="left">0.862</td>
<td align="left">0.452</td>
<td align="left">0.737</td>
<td align="left">0.783</td>
<td align="left">0.811</td>
</tr>
<tr>
<td align="left">xii</td>
<td align="left">All channels</td>
<td align="left">50</td>
<td align="left">Statistic</td>
<td align="left">0.720</td>
<td align="left">0.644</td>
<td align="left">0.635</td>
<td align="left">0.808</td>
<td align="left">0.133</td>
<td align="left">0.719</td>
<td align="left">0.797</td>
<td align="left">0.760</td>
</tr>
<tr>
<td align="left">xiii</td>
<td align="left">All channels</td>
<td align="left">20</td>
<td align="left">CNN</td>
<td align="left">0.739</td>
<td align="left">0.703</td>
<td align="left">0.662</td>
<td align="left">0.860</td>
<td align="left">0.442</td>
<td align="left">0.710</td>
<td align="left">0.741</td>
<td align="left">0.764</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="Tfn5">
<label>
<sup>a</sup>
</label>
<p>W &#x3d; awake. N1, N2 and N3 are sleep stage 1, 2, 3, separately, and are non-rapid eye movement. REM, rapid eye movement.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>A clear conclusion can be drawn from experiments i, ii, iii, iv, and v: ECG, EMG, and EOG contribute differently to the classification performance, among which EOG contributes the most, especially for the REM stage. This phenomenon can be explained by the fact that sleep stages are categorized into REM and non-REM, with the EOG signal being a significant characteristic for identification. Consequently, EOG helps to improve the overall performance and REM stage classification. Moreover, experiments v and vi further demonstrate the importance of EEG signals for sleep stage identification, while a high performance can also be achieved by increasing the number of EOG channels. Experiments v and vii demonstrate that increasing the number of EEG channels can enhance performance, but a greater improvement is achieved through multimodal data, as shown by experiments vii, viii, and ix.</p>
<p>The comparison between experiments ix, x, xi, and xii shows the necessity of the CNN feature extractor, where differential entropy (DE), Short-Time Fourier Transform (STFT), and statistical features represent frequency domain, time-frequency domain, and temporal domain features, respectively. A simple but important conclusion can be drawn from the comparison: the CNN feature extractor can extract high-level temporal and frequency features without any prior knowledge. Moreover, pure frequency features or pure temporal features fail to capture features from the other domain, which are important for identifying a sleep epoch. Although the STFT method can extract time-frequency domain features, time resolution and frequency resolution are inversely related. Therefore, the results obtained from STFT are lower than those from CNN features.</p>
<p>The comparison between experiment xii and xiii shows the importance of sample size for classification performance and, <xref ref-type="fig" rid="F5">Figure 5</xref> shows the trend of performance improvement with increasing sample size. A fundamental observation is that the data size has a crucial impact on performance, and the proposed model can classify sleep stages correctly if sufficient data are available for training.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Performance increasing trend with samples.</p>
</caption>
<graphic xlink:href="fbinf-06-1738132-g005.tif">
<alt-text content-type="machine-generated">Line graph displaying ACC, F1-Score, and Kappa metrics on the y-axis against sample size on the x-axis. All three metrics steadily increase with sample size, with ACC consistently highest and Kappa lowest throughout.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s4-4">
<label>4.4</label>
<title>Ablation study</title>
<p>To further evaluate the effectiveness of the KAN module and the GRU module, four variant models are designed to conduct ablation experiments on the ISRUC-S1 subset. <xref ref-type="fig" rid="F6">Figure 6</xref> illustrates the comparison among four variant models. The details of these models are:</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Comparison of the designed variant models.</p>
</caption>
<graphic xlink:href="fbinf-06-1738132-g006.tif">
<alt-text content-type="machine-generated">Bar chart comparing four variant models labeled A through D on three metrics: accuracy, F1 score, and Cohen&#x2019;s kappa. Model D achieves the highest values across all metrics, followed by C, B, and A.</alt-text>
</graphic>
</fig>
<p>Variant A: The simplest variant model, consisting of a CNN feature extractor and a GRU cell.</p>
<p>Variant B: The CNN is used to extract features, which are fed into a KAN module without a GRU cell.</p>
<p>Variant C: An MLP is used to replace the KAN module with a GRU cell after the CNN feature extractor.</p>
<p>Variant D: The proposed SimpleKANSleepNet model.</p>
<p>Variant A achieves the lowest performance among all models. A reasonable explanation is that the CNN module can extract features effectively, and the GRU cell can learn transitional rules efficiently as well. However, the built-in classification part consisting of two MLP layers is too shallow to learn features. Variant B has a slight performance improvement, but it is still unacceptable. Variant C further improves the experimental results, but they are still lower than the proposed model.</p>
<p>The comparison between variant A and variant C shows the necessity of an independent classifier. A key point here is that model C cannot be seen as simply adding MLP layers to model A. Since the training process of model A with deeper MLP layers is different from that of model C. For the model A with deeper MLP layers, the training process will update all weights in a training step, and it will consider the whole model. However, the extractor part will be frozen during the training of the independent MLP classifier.</p>
<p>The comparison between variant C and variant D shows the necessity of a KAN module. Variant C uses a fixed activation. The KAN module, with its learnable activation functions, is better suited to modeling the complex interdependencies in PSG data than the fixed-activation MLP used in Variant C. Hence, the proposed model achieves a better performance.</p>
<p>The comparison between variant B and variant D shows the necessity of a GRU cell to learn the transition rules among neighbouring sleep epochs.</p>
</sec>
</sec>
<sec sec-type="conclusion" id="s5">
<label>5</label>
<title>Conclusion</title>
<p>In this study, a novel Kolmogorov&#x2013;Arnold Network-based model, named SimpleKANSleepNet, is proposed to classify sleep stages. Temporal features and frequency features are extracted by a dual-branch CNN using large and small filters, respectively. These features are then fed into a three-layer KAN for classification. The experimental results on the ISRUC-S1 subset show that the overall accuracy, the F1-score, and Cohen&#x2019;s kappa reach 0.812, 0.793, and 0.757, respectively, which achieves comparable performance. Experimental results on SleepEDF-153 show that the proposed model has a high performance on the class of majority samples, if the data is imbalanced. However, the classification performance for minority classes can be improved through data balancing strategies, especially with the undersampling strategy. It also indicates a drawback that the KAN model is sensitive to data distribution. Another drawback of the proposed model is the computational speed. Currently, the KAN-based model can only be trained on CPUs, while other deep learning architectures are GPU-based, which limits its architecture construction and execution efficiency. However, this characteristic also allows the deployment of artificial intelligence devices. Moreover, the proposed two-step model architecture splits the feature extraction and classification process, which also prevents the KAN from providing interpretability due to the non-interpretable features generated by CNN. In the future, one potential research direction is to develop a GPU-based KAN model, which would address both interpretability and computational speed issues. Furthermore, it is worthwhile to explore new KAN-based architectures for sleep stage classification tasks.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec sec-type="author-contributions" id="s7">
<title>Author contributions</title>
<p>XJ: Funding acquisition, Methodology, Validation, Writing &#x2013; original draft, Writing &#x2013; review and editing. LW: Methodology, Writing &#x2013; review and editing. YZ: Resources, Supervision, Writing &#x2013; review and editing.</p>
</sec>
<sec sec-type="COI-statement" id="s9">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="s10">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="s11">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Aghaomidi</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Ge</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>ECG-SleepNet: deep learning-based comprehensive sleep stage classification using ECG signals</article-title>. <source>arXiv:2412.01929</source>. <pub-id pub-id-type="doi">10.48550/arXiv.2412.01929</pub-id>
</mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Alshammari</surname>
<given-names>T. S.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Applying machine learning algorithms for the classification of sleep disorders</article-title>. <source>IEEE Access</source> <volume>12</volume>, <fpage>36110</fpage>&#x2013;<lpage>36121</lpage>. <pub-id pub-id-type="doi">10.1109/ACCESS.2024.3374408</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Banluesombatkul</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Ouppaphan</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Leelaarporn</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Lakhan</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Chaitusaney</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Jaimchariya</surname>
<given-names>N.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>MetaSleepLearner: a pilot study on fast adaptation of bio-signals-based sleep stage classifier to new individual subject using meta-learning</article-title>. <source>IEEE J. Biomed. Health Inf.</source> <volume>25</volume> (<issue>6</issue>), <fpage>1949</fpage>&#x2013;<lpage>1963</lpage>. <pub-id pub-id-type="doi">10.1109/JBHI.2020.3037693</pub-id>
<pub-id pub-id-type="pmid">33180737</pub-id>
</mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bao</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>W. H.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <article-title>A feature fusion model based on temporal convolutional network for automatic sleep staging using single-channel EEG</article-title>. <source>IEEE J. Biomed. Health Inf.</source> <volume>28</volume> (<issue>11</issue>), <fpage>6641</fpage>&#x2013;<lpage>6652</lpage>. <pub-id pub-id-type="doi">10.1109/JBHI.2024.3457969</pub-id>
<pub-id pub-id-type="pmid">39504300</pub-id>
</mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Berry</surname>
<given-names>R. B.</given-names>
</name>
<name>
<surname>Brooks</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Gamaldo</surname>
<given-names>C. E.</given-names>
</name>
<name>
<surname>Harding</surname>
<given-names>S. M.</given-names>
</name>
<name>
<surname>Marcus</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Vaughn</surname>
<given-names>B. V.</given-names>
</name>
</person-group> (<year>2012</year>). &#x201c;<article-title>The AASM manual for the scoring of sleep and associated events</article-title>,&#x201d; in <source>Rules, terminology and technical specifications</source> (<publisher-loc>Darien, Illinois</publisher-loc>: <publisher-name>American Academy of Sleep Medicine</publisher-name>), <volume>176</volume>, <fpage>2012</fpage>.</mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chambon</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Galtier</surname>
<given-names>M. N.</given-names>
</name>
<name>
<surname>Arnal</surname>
<given-names>P. J.</given-names>
</name>
<name>
<surname>Wainrib</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Gramfort</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>A deep learning architecture for temporal sleep stage classification using multivariate and multimodal time series</article-title>. <source>IEEE Trans. Neural Syst. Rehabilitation Eng.</source> <volume>26</volume> (<issue>4</issue>), <fpage>758</fpage>&#x2013;<lpage>769</lpage>. <pub-id pub-id-type="doi">10.1109/TNSRE.2018.2813138</pub-id>
<pub-id pub-id-type="pmid">29641380</pub-id>
</mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Dai</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2023</year>). &#x201c;<article-title>StAGN: spatial-temporal adaptive graph network via contrastive learning for sleep stage classification</article-title>,&#x201d; in <conf-name>Proceedings of the 2023 SIAM International Conference on Data Mining (SDM). Proceedings</conf-name> (<publisher-loc>Philadelphia</publisher-loc>: <publisher-name>Society for Industrial and Applied Mathematics</publisher-name>). <pub-id pub-id-type="doi">10.1137/1.9781611977653.ch23</pub-id>
</mixed-citation>
</ref>
<ref id="B36">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cho</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Merrienboer</surname>
<given-names>B. V.</given-names>
</name>
<name>
<surname>Gulcehre</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Bahdanau</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Bougares</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Schwenk</surname>
<given-names>H.</given-names>
</name>
<etal/>
</person-group> (<year>2014</year>). <article-title>Learning phrase representations using RNN encoder-decoder for statistical machine translation</article-title>. <source>arXiv preprint arXiv:1406.1078</source>
</mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dai</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Liang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Duan</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>H.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>MultiChannelSleepNet: a transformer-based model for automatic sleep stage classification with PSG</article-title>. <source>IEEE J. Biomed. Health Inf.</source> <volume>27</volume> (<issue>9</issue>), <fpage>4204</fpage>&#x2013;<lpage>4215</lpage>. <pub-id pub-id-type="doi">10.1109/JBHI.2023.3284160</pub-id>
<pub-id pub-id-type="pmid">37289607</pub-id>
</mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Eldele</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Kwoh</surname>
<given-names>C. K.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>X.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>An attention-based deep learning approach for sleep stage classification with single-channel EEG</article-title>. <source>IEEE Trans. Neural Syst. Rehabilitation Eng.</source> <volume>29</volume>, <fpage>809</fpage>&#x2013;<lpage>818</lpage>. <pub-id pub-id-type="doi">10.1109/TNSRE.2021.3076234</pub-id>
<pub-id pub-id-type="pmid">33909566</pub-id>
</mixed-citation>
</ref>
<ref id="B38">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Goldberger</surname>
<given-names>A. L.</given-names>
</name>
<name>
<surname>Amaral</surname>
<given-names>L. A.</given-names>
</name>
<name>
<surname>Glass</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Hausdorff</surname>
<given-names>J. M.</given-names>
</name>
<name>
<surname>Ivanovi</surname>
<given-names>P. C.</given-names>
</name>
<name>
<surname>Mark</surname>
<given-names>R. G.</given-names>
</name>
<etal/>
</person-group> (<year>2000</year>). <article-title>PhysioBank, PhysioToolkit, and PhysioNet: components of a new research resource for complex physiologic signals</article-title>. <source>Circulation</source> <volume>101</volume> (<issue>23</issue>), <fpage>E215</fpage>&#x2013;<lpage>220</lpage>. <pub-id pub-id-type="doi">10.1161/01.cir.101.23.e215</pub-id>
<pub-id pub-id-type="pmid">10851218</pub-id>
</mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Qiu</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Yi</given-names>
</name>
<name>
<surname>Fan</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Advancing sleep stages classification through a dual-graphormer approach</article-title>. <source>Expert Syst. Appl.</source> <volume>288</volume> (<issue>September</issue>), <fpage>128220</fpage>. <pub-id pub-id-type="doi">10.1016/j.eswa.2025.128220</pub-id>
</mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hussein</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>George Raed</surname>
<given-names>L. E.</given-names>
</name>
<name>
<surname>Firas</surname>
<given-names>S. M.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Accurate method for sleep stages classification using discriminated features and single EEG channel</article-title>. <source>Biomed. Signal Process. Control</source> <volume>84</volume> (<issue>July</issue>), <fpage>104688</fpage>. <pub-id pub-id-type="doi">10.1016/j.bspc.2023.104688</pub-id>
</mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jadhav</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Mukhopadhyay</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Automated sleep stage scoring using time-frequency spectra convolution neural network</article-title>. <source>IEEE Trans. Instrum. Meas.</source> <volume>71</volume>, <fpage>1</fpage>&#x2013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1109/TIM.2022.3177747</pub-id>
</mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ji</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Yan</surname>
<given-names>Li</given-names>
</name>
<name>
<surname>Wen</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Jumping knowledge based spatial-temporal graph convolutional networks for automatic sleep stage classification</article-title>. <source>IEEE Trans. Neural Syst. Rehabilitation Eng.</source> <volume>30</volume>, <fpage>1464</fpage>&#x2013;<lpage>1472</lpage>. <pub-id pub-id-type="doi">10.1109/TNSRE.2022.3176004</pub-id>
</mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ji</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Yan</surname>
<given-names>Li</given-names>
</name>
<name>
<surname>Wen</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2023a</year>). <article-title>3DSleepNet: a multi-channel bio-signal based sleep stages classification method using deep learning</article-title>. <source>IEEE Trans. Neural Syst. Rehabilitation Eng.</source> <volume>31</volume> (<issue>August</issue>), <fpage>3513</fpage>&#x2013;<lpage>3523</lpage>. <pub-id pub-id-type="doi">10.1109/TNSRE.2023.3309542</pub-id>
</mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ji</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Yan</surname>
<given-names>Li</given-names>
</name>
<name>
<surname>Wen</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Barua</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Acharya</surname>
<given-names>U. R.</given-names>
</name>
</person-group> (<year>2023b</year>). <article-title>MixSleepNet: a multi-type convolution combined sleep stage classification model</article-title>. <source>Comput. Methods Programs Biomed.</source>, <fpage>107992</fpage>. <pub-id pub-id-type="doi">10.1016/j.cmpb.2023.107992</pub-id>
</mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jia</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Cai</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2020a</year>). <article-title>SleepPrintNet: a multivariate multimodal neural network based on physiological time-series for automatic sleep staging</article-title>. <source>IEEE Trans. Artif. Intell.</source> <volume>1</volume> (<issue>3</issue>), <fpage>248</fpage>&#x2013;<lpage>257</lpage>. <pub-id pub-id-type="doi">10.1109/TAI.2021.3060350</pub-id>
</mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Jia</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2020b</year>). &#x201c;<article-title>Graphsleepnet: adaptive spatial-temporal graph convolutional networks for sleep stage classification</article-title>,&#x201d; in <conf-name>Proceedings of the Twenty-Ninth International Joint Conference on Artificial Intelligence, IJCAI, Proceedings of the Twenty-Ninth International Joint Conference on Artificial Intelligence</conf-name> (<publisher-loc>Yokohama</publisher-loc>: <publisher-name>IJCAI</publisher-name>), <fpage>1324</fpage>&#x2013;<lpage>1330</lpage>.</mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jia</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Ning</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>R.</given-names>
</name>
<etal/>
</person-group> (<year>2021a</year>). <article-title>Multi-view spatial-temporal graph convolutional networks with domain generalization for sleep stage classification</article-title>. <source>IEEE Trans. Neural Syst. Rehabilitation Eng.</source> <volume>29</volume>, <fpage>1977</fpage>&#x2013;<lpage>1986</lpage>. <pub-id pub-id-type="doi">10.1109/TNSRE.2021.3110665</pub-id>
<pub-id pub-id-type="pmid">34487495</pub-id>
</mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jia</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Xie</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2021b</year>). <article-title>SalientSleepNet: multimodal salient wave detection network for sleep staging</article-title>. <source>arXiv:2105.13864</source>. <pub-id pub-id-type="doi">10.48550/arXiv.2105.13864</pub-id>
</mixed-citation>
</ref>
<ref id="B37">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kemp</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Zwinderman</surname>
<given-names>A. H.</given-names>
</name>
<name>
<surname>Tuk</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Kamphuisen</surname>
<given-names>H. A. C.</given-names>
</name>
<name>
<surname>Oberye</surname>
<given-names>J. J. L.</given-names>
</name>
</person-group> (<year>2000</year>). <article-title>Analysis of a sleep-dependent neuronal feedback loop: the slow-wave microcontinuity of the EEG</article-title>. <source>IEEE Trans. Biomed. Eng.</source> <volume>47</volume> (<issue>9</issue>), <fpage>1185</fpage>&#x2013;<lpage>1194</lpage>. <pub-id pub-id-type="doi">10.1109/10.867928</pub-id>
<pub-id pub-id-type="pmid">11008419</pub-id>
</mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Khalighi</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Sousa</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Moutinho Santos</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Nunes</surname>
<given-names>U.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>ISRUC-Sleep: a comprehensive public dataset for sleep researchers</article-title>. <source>Comput. Methods Programs Biomed.</source> <volume>124</volume> (<issue>February</issue>), <fpage>180</fpage>&#x2013;<lpage>192</lpage>. <pub-id pub-id-type="doi">10.1016/j.cmpb.2015.10.013</pub-id>
<pub-id pub-id-type="pmid">26589468</pub-id>
</mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>H.</given-names>
</name>
<etal/>
</person-group> (<year>2025</year>). <article-title>U-KAN makes strong backbone for medical image segmentation and generation</article-title>. <source>Proc. AAAI Conf. Artif. Intell.</source> <volume>39</volume> (<issue>5</issue>), <fpage>4652</fpage>&#x2013;<lpage>4660</lpage>. <pub-id pub-id-type="doi">10.1609/aaai.v39i5.32491</pub-id>
</mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Vaidya</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>KAN: Kolmogorov-arnold networks</article-title>. <source>arXiv:2404.19756</source>. <pub-id pub-id-type="doi">10.48550/arXiv.2404.19756</pub-id>
</mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Memar</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Faradji</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>A novel multi-class EEG-based sleep stage classification system</article-title>. <source>IEEE Trans. Neural Syst. Rehabilitation Eng.</source> <volume>26</volume> (<issue>1</issue>), <fpage>84</fpage>&#x2013;<lpage>95</lpage>. <pub-id pub-id-type="doi">10.1109/TNSRE.2017.2776149</pub-id>
<pub-id pub-id-type="pmid">29324406</pub-id>
</mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mousavi</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Afghah</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Rajendra Acharya</surname>
<given-names>U.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>SleepEEGNet: automated sleep stage scoring with sequence to sequence deep learning approach</article-title>. <source>PLOS ONE</source> <volume>14</volume> (<issue>5</issue>), <fpage>e0216456</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0216456</pub-id>
<pub-id pub-id-type="pmid">31063501</pub-id>
</mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Phan</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Andreotti</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Cooray</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Ch&#xe9;n</surname>
<given-names>O. Y.</given-names>
</name>
<name>
<surname>De Vos</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2019a</year>). <article-title>Joint classification and prediction CNN framework for automatic sleep stage classification</article-title>. <source>IEEE Trans. Biomed. Eng.</source> <volume>66</volume> (<issue>5</issue>), <fpage>1285</fpage>&#x2013;<lpage>1296</lpage>. <pub-id pub-id-type="doi">10.1109/TBME.2018.2872652</pub-id>
<pub-id pub-id-type="pmid">30346277</pub-id>
</mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Phan</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Andreotti</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Cooray</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Ch&#xe9;n</surname>
<given-names>O. Y.</given-names>
</name>
<name>
<surname>Vos</surname>
<given-names>M.De</given-names>
</name>
</person-group> (<year>2019b</year>). <article-title>SeqSleepNet: end-to-end hierarchical recurrent neural network for sequence-to-sequence automatic sleep staging</article-title>. <source>IEEE Trans. Neural Syst. Rehabilitation Eng.</source> <volume>27</volume> (<issue>3</issue>), <fpage>400</fpage>&#x2013;<lpage>410</lpage>. <pub-id pub-id-type="doi">10.1109/TNSRE.2019.2896659</pub-id>
<pub-id pub-id-type="pmid">30716040</pub-id>
</mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Siegel</surname>
<given-names>J. M.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>Clues to the functions of mammalian sleep</article-title>. <source>Nature</source> <volume>437</volume> (<issue>7063</issue>), <fpage>7063</fpage>. <pub-id pub-id-type="doi">10.1038/nature04285</pub-id>
<pub-id pub-id-type="pmid">16251951</pub-id>
</mixed-citation>
</ref>
<ref id="B28">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Sun</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Jin</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Deep convolutional network method for automatic sleep stage classification based on neurophysiological signals</article-title>,&#x201d; in <conf-name>2018 11th International Congress on Image and Signal Processing, BioMedical Engineering and Informatics (CISP-BMEI)</conf-name>, <fpage>1</fpage>&#x2013;<lpage>5</lpage>. <pub-id pub-id-type="doi">10.1109/CISP-BMEI.2018.8633058</pub-id>
</mixed-citation>
</ref>
<ref id="B29">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Supratak</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>TinySleepNet: an efficient deep learning model for sleep stage scoring based on raw single-channel EEG</article-title>,&#x201d; in <conf-name>2020 42nd Annual International Conference of the IEEE Engineering in Medicine &#x26; Biology Society (EMBC), July</conf-name>, <fpage>641</fpage>&#x2013;<lpage>644</lpage>. <pub-id pub-id-type="doi">10.1109/EMBC44109.2020.9176741</pub-id>
</mixed-citation>
</ref>
<ref id="B30">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Supratak</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Dong</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>DeepSleepNet: a model for automatic sleep stage scoring based on raw single-channel EEG</article-title>. <source>IEEE Trans. Neural Syst. Rehabilitation Eng.</source> <volume>25</volume> (<issue>11</issue>), <fpage>1998</fpage>&#x2013;<lpage>2008</lpage>. <pub-id pub-id-type="doi">10.1109/TNSRE.2017.2721116</pub-id>
<pub-id pub-id-type="pmid">28678710</pub-id>
</mixed-citation>
</ref>
<ref id="B31">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ye</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Xiao</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Deng</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>CoSleep: a multi-view representation learning framework for self-supervised learning of sleep stage classification</article-title>. <source>IEEE Signal Process. Lett.</source> <volume>29</volume>, <fpage>189</fpage>&#x2013;<lpage>193</lpage>. <pub-id pub-id-type="doi">10.1109/LSP.2021.3130826</pub-id>
</mixed-citation>
</ref>
<ref id="B32">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yubo</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zou</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Lei</surname>
<given-names>Li</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>MMASleepNet: a multimodal attention network based on electrophysiological signals for automatic sleep staging</article-title>. <source>Front. Neurosci.</source> <volume>16</volume>. <pub-id pub-id-type="doi">10.3389/fnins.2022.973761</pub-id>
</mixed-citation>
</ref>
<ref id="B33">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Lv</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>A review of automated sleep stage based on EEG signals</article-title>. <source>Biocybern. Biomed. Eng.</source> <volume>44</volume> (<issue>3</issue>), <fpage>651</fpage>&#x2013;<lpage>673</lpage>. <pub-id pub-id-type="doi">10.1016/j.bbe.2024.06.004</pub-id>
</mixed-citation>
</ref>
<ref id="B34">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Yuan</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>TrafficKAN-GCN: graph convolutional-based kolmogorov-arnold network for traffic flow optimization</article-title>. <source>arXiv:2503.03276</source>. <pub-id pub-id-type="doi">10.48550/arXiv.2503.03276</pub-id>
</mixed-citation>
</ref>
<ref id="B35">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Fu</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Shu</surname>
<given-names>F.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>MaskSleepNet: a cross-modality adaptation neural network for heterogeneous signals processing in sleep staging</article-title>. <source>IEEE J. Biomed. Health Inf.</source> <volume>27</volume> (<issue>5</issue>), <fpage>2353</fpage>&#x2013;<lpage>2364</lpage>. <pub-id pub-id-type="doi">10.1109/JBHI.2023.3253728</pub-id>
<pub-id pub-id-type="pmid">37028323</pub-id>
</mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn fn-type="custom" custom-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/191181/overview">Vincenzo Bonnici</ext-link>, University of Parma, Italy</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3283609/overview">Sifi Nouria</ext-link>, Universite Abou Bekr Belkaid Tlemcen Faculte de Technologie, Algeria</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3298932/overview">Mahdi Samaeee</ext-link>, Shiraz University, Iran</p>
</fn>
</fn-group>
</back>
</article>