<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Neurosci.</journal-id>
<journal-title>Frontiers in Neuroscience</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Neurosci.</abbrev-journal-title>
<issn pub-type="epub">1662-453X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fnins.2024.1508747</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Neuroscience</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>ID3RSNet: cross-subject driver drowsiness detection from raw single-channel EEG with an interpretable residual shrinkage network</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Feng</surname> <given-names>Xiao</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Guo</surname> <given-names>Zhongyuan</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2864296/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Kwong</surname> <given-names>Sam</given-names></name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>School of Communications and Information Engineering, Chongqing University of Posts and Telecommunications</institution>, <addr-line>Chongqing</addr-line>, <country>China</country></aff>
<aff id="aff2"><sup>2</sup><institution>Henan High-speed Railway Operation and Maintenance Engineering Research Center</institution>, <addr-line>Zhengzhou, Henan</addr-line>, <country>China</country></aff>
<aff id="aff3"><sup>3</sup><institution>College of Electronic and Information Engineering, Southwest University</institution>, <addr-line>Chongqing</addr-line>, <country>China</country></aff>
<aff id="aff4"><sup>4</sup><institution>Department of Computer Science, City University of Hong Kong</institution>, <addr-line>Hong Kong SAR</addr-line>, <country>China</country></aff>
<aff id="aff5"><sup>5</sup><institution>School of Data Science, Lingnan University</institution>, <addr-line>Hong Kong SAR</addr-line>, <country>China</country></aff>
<author-notes>
<fn fn-type="edited-by" id="fn0001">
<p>Edited by: Szczepan Iwanski, Institute of Psychiatry and Neurology (IPiN), Poland</p>
</fn>
<fn fn-type="edited-by" id="fn0002">
<p>Reviewed by: Vincenzo Ronca, Sapienza University of Rome, Italy</p>
<p>JiQian Zhang, Anhui Normal University, China</p>
</fn>
<corresp id="c001">&#x002A;Correspondence: Zhongyuan Guo, <email>guozhongyuan@swu.edu.cn</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>08</day>
<month>01</month>
<year>2025</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>18</volume>
<elocation-id>1508747</elocation-id>
<history>
<date date-type="received">
<day>09</day>
<month>10</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>24</day>
<month>12</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2025 Feng, Guo and Kwong.</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Feng, Guo and Kwong</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Accurate monitoring of drowsy driving through electroencephalography (EEG) can effectively reduce traffic accidents. Developing a calibration-free drowsiness detection system with single-channel EEG alone is very challenging due to the non-stationarity of EEG signals, the heterogeneity among different individuals, and the relatively parsimonious compared to multi-channel EEG. Although deep learning-based approaches can effectively decode EEG signals, most deep learning models lack interpretability due to their black-box nature. To address these issues, we propose a novel interpretable residual shrinkage network, namely, ID3RSNet, for cross-subject driver drowsiness detection using single-channel EEG signals. First, a base feature extractor is employed to extract the essential features of EEG frequencies; to enhance the discriminative feature learning ability, the residual shrinkage building unit with attention mechanism is adopted to perform adaptive feature recalibration and soft threshold denoising inside the residual network is further applied to achieve automatic feature extraction. In addition, a fully connected layer with weight freezing is utilized to effectively suppress the negative influence of neurons on the model classification. With the global average pooling (GAP) layer incorporated in the residual shrinkage network structure, we introduce an EEG-based Class Activation Map (ECAM) interpretable method to enable visualization analysis of sample-wise learned patterns to effectively explain the model decision. Extensive experimental results demonstrate that the proposed method achieves the superior classification performance and has found neurophysiologically reliable evidence of classification.</p>
</abstract>
<kwd-group>
<kwd>single-channel EEG</kwd>
<kwd>drowsiness detection</kwd>
<kwd>residual shrinkage network</kwd>
<kwd>attention</kwd>
<kwd>interpretability</kwd>
</kwd-group>
<counts>
<fig-count count="8"/>
<table-count count="5"/>
<equation-count count="11"/>
<ref-count count="48"/>
<page-count count="14"/>
<word-count count="9597"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Perception Science</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="sec1">
<label>1</label>
<title>Introduction</title>
<p>Driver drowsiness is a significant factor leading to traffic accidents as it can cause a serious decline in vigilance, attention, and cognitive ability (<xref ref-type="bibr" rid="ref2">Balam and Chinara, 2021</xref>). Statistics indicate that fatigue driving may cause up to as much as 20% of all vehicle collisions (<xref ref-type="bibr" rid="ref46">Zhang Z. et al., 2022</xref>). Effectively predicting and warming about drowsiness in driving can help drivers stay alert before they become drowsy or fall asleep (<xref ref-type="bibr" rid="ref36">Perkins et al., 2023</xref>). Therefore, developing a reliable and effective drowsiness monitoring system has emerged as a critical priority in preventing traffic accidents and saving lives (<xref ref-type="bibr" rid="ref45">Zhang Y. et al., 2022</xref>).</p>
<p>Currently, there are a number of reported approaches for detecting driver fatigue or drowsiness, including behavioral (<xref ref-type="bibr" rid="ref46">Zhang Z. et al., 2022</xref>; <xref ref-type="bibr" rid="ref36">Perkins et al., 2023</xref>; <xref ref-type="bibr" rid="ref44">You et al., 2020</xref>), vehicle-based (<xref ref-type="bibr" rid="ref36">Perkins et al., 2023</xref>; <xref ref-type="bibr" rid="ref29">Lan et al., 2024</xref>), and physiological (<xref ref-type="bibr" rid="ref46">Zhang Z. et al., 2022</xref>; <xref ref-type="bibr" rid="ref29">Lan et al., 2024</xref>) approaches. For example, behavior-based approaches allow to analyze behavioral characteristics of a driver&#x2019;s face, eyes, or mouth using machine vision technique. These approaches assess alertness level by detecting facial expressions, calculating eye closure time, estimating head posture, and yawning frequency. However, they may be disturbed by lighting conditions and require accurate evaluation of head posture. Vehicle-based approaches focus on detecting drowsiness through vehicle motion and driver handling behavior data (e.g., steering wheel angle, driving acceleration, and vehicle speed) (<xref ref-type="bibr" rid="ref44">You et al., 2020</xref>). However, they rely on multiple vehicle sensors to monitor driving parameters and may face limitations such as sensitivity and adaptability to environmental factors. In addition, physiological signal-based methods monitor signs of driver drowsiness by analyzing the driver&#x2019;s physiological signals such as electrocardiogram (ECG), electromyogram (EMG), electrooculogram (EOG), and electroencephalogram (EEG) (<xref ref-type="bibr" rid="ref36">Perkins et al., 2023</xref>). Drowsiness correlates closely with brain activity, and EEG is the most adaptable and widely used for studying brain functions compared to other physiological signals. Therefore, EEG is often considered the gold standard for detecting driver drowsiness (<xref ref-type="bibr" rid="ref29">Lan et al., 2024</xref>).</p>
<p>With the rapid development of EEG acquisition devices, an increasing number of researchers have studied EEG-based drowsiness detection (<xref ref-type="bibr" rid="ref8">Cui et al., 2022a</xref>,<xref ref-type="bibr" rid="ref11">c</xref>; <xref ref-type="bibr" rid="ref42">Wan et al., 2023</xref>). Conventional EEG-based drowsiness detection methods laid a solid foundation for ongoing research in this field. To capture the EEG features of interest, they often rely on expert knowledge or priori knowledge (<xref ref-type="bibr" rid="ref11">Cui et al., 2022c</xref>). For instance, <xref ref-type="bibr" rid="ref33">Ogino and Mitsukura (2018)</xref> proposed a feature selection method using stepwise linear discriminant analysis and power spectral density features from a single-channel EEG signal. <xref ref-type="bibr" rid="ref24">Hu and Min (2018)</xref> utilized sample entropy, approximate entropy, spectral entropy, and fuzzy entropy of EEG signals as features for recognizing driving fatigue. In contrast, deep learning (DL) enables end-to-end learning from raw, high-dimensional EEG data without prior feature crafting, achieving remarkable performance (<xref ref-type="bibr" rid="ref30">Lawhern et al., 2018</xref>; <xref ref-type="bibr" rid="ref40">Schirrmeister et al., 2017</xref>; <xref ref-type="bibr" rid="ref21">Gao et al., 2019</xref>). For example, a novel EEG-based spatiotemporal convolutional neural network was developed to detect driver fatigue from multi-channel EEG signals (<xref ref-type="bibr" rid="ref21">Gao et al., 2019</xref>). <xref ref-type="bibr" rid="ref35">Paulo et al. (2021)</xref> proposed a deep convolutional neural network for cross-subject calibration-free drowsiness detection based on EEG signals&#x2019; spatiotemporal image encoding representations. <xref ref-type="bibr" rid="ref13">Di Flumeri et al. (2022)</xref> developed a synthetic EEG-based index to detect drowsy events in automotive applications. The proposed MDrow index is proved to be reliable and effective. To further improve classification performance, <xref ref-type="bibr" rid="ref31">Li et al. (2023)</xref> also attempted to propose an enhanced ensemble deep random vector functional link network for cross-subject fatigue detection performance.</p>
<p>Most of these methods are based on multi-channel EEG, achieving excellent performance. However, multi-channel EEG recording methods are complex to operate, difficult to carry, and have high device costs, all of which hinder their practical application. In addition, many EEG electrodes required to perform an EEG acquisition impose significant restrictions on the user&#x2019;s mobility on the user&#x2019;s movement. Compared to multi-channel EEG, only single-channel schedule can offer more practical advantages such as reduced relevant costs, easy signal acquisition, and improved user comfort (<xref ref-type="bibr" rid="ref23">Gong et al., 2024</xref>). However, due to the individual variability among subjects and non-stationarity in EEG signals and the relatively parsimonious compared to multi-channel EEG (<xref ref-type="bibr" rid="ref23">Gong et al., 2024</xref>; <xref ref-type="bibr" rid="ref32">Liu et al., 2024</xref>), designing a zero-calibration drowsiness detection system using only single-channel EEG remains a very challenging task. Additionally, the high sensitivity to artifacts and the low signal-to-noise ratio of EEG signals also exacerbate the difficulty of this task (<xref ref-type="bibr" rid="ref11">Cui et al., 2022c</xref>).</p>
<p>To address these limitations, some researchers recently focus on single-channel EEG-based drowsiness detection using DL methods (<xref ref-type="bibr" rid="ref32">Liu et al., 2024</xref>; <xref ref-type="bibr" rid="ref17">Fahimi et al., 2019</xref>; <xref ref-type="bibr" rid="ref14">Ding et al., 2019</xref>; <xref ref-type="bibr" rid="ref39">Reddy et al., 2024</xref>). For example, <xref ref-type="bibr" rid="ref32">Liu et al. (2024)</xref> explored a single-channel EEG-based self-training semi-supervised method to transform the unlabeled data into pseudo-labeled data and combine the fuzzy entropy feature for fatigue driving detection. <xref ref-type="bibr" rid="ref17">Fahimi et al. (2019)</xref> proposed an end-to-end deep convolutional neural network (CNN) to detect attentive mental states using a single-channel EEG. To design a portable wearable EEG device for recognizing driver drowsiness, <xref ref-type="bibr" rid="ref14">Ding et al. (2019)</xref> designed a DL model with a cascaded CNN and an attention mechanism. <xref ref-type="bibr" rid="ref39">Reddy et al. (2024)</xref> proposed an effective hybrid DL model for single-channel EEG-based subject-independent drowsiness detection, which combined discrete wavelet long short-term memory and convolutional neural networks. To extract task-relevant discriminative features, <xref ref-type="bibr" rid="ref15">Divvala and Mishra (2024)</xref> proposed a DL-based attention mechanism to recognize drowsiness state. However, these DL models in previous works are often regarded as &#x201C;black-box&#x201D; classifiers due to their lack of interpretability while maintaining accuracy (<xref ref-type="bibr" rid="ref20">Gao et al., 2023b</xref>; <xref ref-type="bibr" rid="ref10">Cui et al., 2021</xref>). Hence, it is crucial to develop an inherently interpretable DL model to address this limitation.</p>
<p>Some efforts had been made to explore interpretable models to understand the decision-making process based on the learned characteristics of input EEG. For instance, <xref ref-type="bibr" rid="ref10">Cui et al. (2021)</xref> proposed a CNN with long short-term memory (LSTM) to visualize the common EEG features learned from single-channel EEG signals for driver drowsiness classification. Furthermore, <xref ref-type="bibr" rid="ref9">Cui et al. (2022b)</xref> proposed an interpretable DL model with compact CNN structure to explain what features the model had learned from single-channel EEG signals. However, there is usually a trade-off between interpretability and performance (<xref ref-type="bibr" rid="ref43">Wang et al., 2020</xref>). Both inherently interpretable methods were designed at the cost of performance degradation as they did not adequately mine and extract the salient features implicit in raw single-channel EEG signals.</p>
<p>To address the above issues, we propose a novel interpretable residual shrinkage network (ID3RSNet) for driver drowsiness detection from single-channel EEG signals. The framework consists of a base feature extractor (BaseFE), residual shrinkage building unit (RSBU) with soft thresholding (ST), a global average pooling (GAP) layer, and a fully connected layer with weight freezing (FC-WF). First, the base feature extractor (BaseFE) extracts the essential features of EEG frequencies. Second, a residual shrinkage building unit (RSBU) with channel-wise thresholds is adopted to improve the feature learning ability. Automatic feature extraction is achieved by applying soft threshold denoising and attention mechanism within the residual shrinkage neural network. Then, a following GAP layer is added to avoid overfitting and improve generalization ability. In addition, a regularization method of weight freezing is applied in the FC layer to effectively suppress the negative influence of some input neurons on the model classification. Based on the designed residual shrinkage network structure, we can use an EEG-based class activation map (ECAM) interpretation method to visualize neurophysiologically common patterns learned from single-channel EEG signals for classification decision.</p>
<p>The main contributions in this work are as follows:</p>
<list list-type="order">
<list-item>
<p>To the best of our knowledge, we propose the first end-to-end inherently interpretable deep residual shrinkage network framework to achieve automatic feature extraction and enhance the feature learning ability for driver drowsiness detection. With only single-channel EEG used, the framework has greater potential practical value.</p>
</list-item>
<list-item>
<p>With the inherently interpretable model framework designed, we propose a class activation map interpretation method for raw single-channel EEG signals to reveal neurophysiologically common patterns in terms of the driver&#x2019;s mental state.</p>
</list-item>
<list-item>
<p>Extensive experiments with leave-one-subject-out cross-validation (LOSO-CV) demonstrate the effectiveness of the proposed method with reliable classification evidence discovered. This work also provides insight into the development of portable single-channel EEG devices with interpretable neural network for driver drowsiness detection.</p>
</list-item>
</list>
<p>The study is organized as follows. Section 2 is the materials and methods. Section 3 is the experimental results. Section 4 is the discussion and future works. The last section is conclusion.</p>
</sec>
<sec sec-type="methods" id="sec2">
<label>2</label>
<title>Methods</title>
<sec id="sec3">
<label>2.1</label>
<title>Overview</title>
<p>To accurately characterize drowsiness-related patterns and enhance feature representation from non-stationarity EEG signals with high randomness and low signal-to-noise ratio, we propose a novel interpretable residual shrinkage network (ID3RSNet) framework shown in <xref ref-type="fig" rid="fig1">Figure 1</xref>, whose framework mainly consists of three modules: base feature extractor (BaseFE), residual shrinkage building unit (RSBU), and classification. First, the BaseFE extracts the essential features from a 3-s EEG signal. Following BaseFE, we propose a residual shrinkage building unit with channel-wise thresholds (RSBU-CW) to enhance the quality of the extracted features and achieve automatic extraction of important features, which helps to improve classification performance. Then, a global average pooling layer is added as a key component in the inherently interpretable model structure, which helps to avoid overfitting and improve the generalization ability. In addition, a regularization method of weight freezing in the FC layer is applied to effectively suppress the negative influence of some input neurons on the model classification. To provide trustworthy interpretation classification for the proposed ID3RSNet, we introduce an EEG-based class activation map (ECAM) interpretation method to reveal neurophysiologically task-related patterns.</p>
<fig position="float" id="fig1">
<label>Figure 1</label>
<caption>
<p>Overall architecture of the proposed interpretable network framework.</p>
</caption>
<graphic xlink:href="fnins-18-1508747-g001.tif"/>
</fig>
</sec>
<sec id="sec4">
<label>2.2</label>
<title>Network of the proposed model</title>
<sec id="sec5">
<label>2.2.1</label>
<title>Base feature extractor</title>
<p>Considering that convolutional neural networks (CNNs) are widely utilized to capture the EEG features of time series data (<xref ref-type="bibr" rid="ref26">Hu et al., 2023</xref>), we designed a base feature extractor (BaseFE) to extract the key EEG features. The BaseFE network is designed in a shallow network structure. As shown in <xref ref-type="fig" rid="fig1">Figure 1</xref>, the S-CNN module contains five layers: a 1-D convolution layer, a batch normalization layer, a Softshrink activation layer, and a dropout layer.</p>
<p>The Conv1D (32, 64, 2) in <xref ref-type="fig" rid="fig1">Figure 1</xref> refers to 1-D convolution layer with 32 filters, a kernel size of 64, and a stride of 2. Setting the kernel size to 64, half of the 128&#x202F;Hz data sampling rate, enables the model to extract the EEG frequency features in the range above 2&#x202F;Hz. Then, a batch normalization layer and a Softshrink activation layer are followed. The batch normalization is utilized to normalize small batches across each feature dimension, effectively mitigating internal covariate shifts (<xref ref-type="bibr" rid="ref27">Ioffe and Szegedy, 2015</xref>). Softshrink function is a non-linear activation function that is mainly used for sparse representation of EEG data and noise suppression. To mitigate model overfitting, a dropout layer is introduced in the following layer.</p>
</sec>
<sec id="sec6">
<label>2.2.2</label>
<title>Residual shrinkage building unit</title>
<p>Inspired that deep residual shrinkage networks achieved high fault diagnosing performance in vibration signals (<xref ref-type="bibr" rid="ref47">Zhao et al., 2020</xref>), we introduce the residual shrinkage network to improve the feature learning ability from single-channel EEG signals. Both the attention mechanism and automatic soft thresholding are integrated into the residual network to adaptively eliminate redundant information and selected the most discriminative useful features during feature learning. This residual structure is to prevent vanishing gradients and exploding gradients in deep network.</p>
<p>Based on the fact that the importance between each channel of the features learned from the EEG signal is different, we use a residual shrinkage building unit with channel-wise thresholds (RSBU-CW) in this work. In particular, the squeeze and excitation (SE) network is adopted to obtain a set of thresholds related to the individual channels by modeling the inter-dependencies between the features (<xref ref-type="bibr" rid="ref25">Hu et al., 2020</xref>). Soft thresholding incorporated in the RSBU structure can adaptively eliminate redundant information and select highly discriminative features.</p>
<p>As is shown in <xref ref-type="fig" rid="fig1">Figure 1</xref>, two convolutions Conv1D (32, 1, 1) with a kernel size of 1 and a stride of 1 are implemented in this block. Assuming that the BaseFE module generates a feature map <italic>I</italic> &#x2208; &#x211D;<italic><sup>L&#x202F;&#x00D7;&#x202F;T</sup></italic>, we apply two convolutions operations (<italic>Conv</italic>1 and <italic>Conv</italic>2) to <italic>I</italic> to obtain <italic>U</italic> (<italic>U</italic>&#x2208; <italic>&#x211D;<sup>N&#x202F;&#x00D7;&#x202F;T</sup></italic>) in <xref ref-type="disp-formula" rid="EQ1">Equation 1</xref>:</p>
<disp-formula id="EQ1">
<label>(1)</label>
<mml:math id="M1">
<mml:mi>U</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi mathvariant="italic">Conv</mml:mi>
<mml:mn>2</mml:mn>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi mathvariant="italic">Conv</mml:mi>
<mml:mn>1</mml:mn>
<mml:mfenced open="(" close=")">
<mml:mi>I</mml:mi>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:math>
</disp-formula>
<p>where <italic>T</italic> represents the length of <italic>U</italic>, and <italic>N</italic> represents the total number of features.</p>
<p>Then, the feature map <italic>X</italic> is squeezed to a 1-D vector using an absolute operation and an adaptive average pooling layer. The excitation operation is used to capture the correlation between the individual channels, which can retain the channels with the most useful feature information and suppress the channels with less feature information (<xref ref-type="bibr" rid="ref16">Eldele et al., 2021</xref>). In this study, two fully connected (FC) layers are added for information aggregation. The first FC layer, followed by ReLU, aims to reduce dimensionality, while the subsequent layer, followed by a sigmoid function, aims to raise dimensionality. The output of the FC layers is scaled to between (0, 1) with the scaling parameters. And the scaling parameter is described in <xref ref-type="disp-formula" rid="EQ2">Equation 2</xref>:</p>
<disp-formula id="EQ2">
<label>(2)</label>
<mml:math id="M2">
<mml:msub>
<mml:mi>&#x03C3;</mml:mi>
<mml:mi>r</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>+</mml:mo>
<mml:msup>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mi>r</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfrac>
</mml:math>
</disp-formula>
<p>where <italic>z<sub>r</sub></italic> represents the feature at the <italic>r</italic>th neuron, and <italic>&#x03C3;<sub>r</sub></italic> represents the <italic>r</italic>th scaling parameter. To make the soft threshold positive and not too large, the scaling parameter <italic>&#x03C3;<sub>r</sub></italic> is multiplied with the average absolute value of <italic>U<sub>r</sub></italic> to obtain the threshold. The threshold used in RSBU-CW is expressed in <xref ref-type="disp-formula" rid="EQ3">Equation 3</xref>:</p>
<disp-formula id="EQ3">
<label>(3)</label>
<mml:math id="M3">
<mml:msub>
<mml:mi>&#x03C4;</mml:mi>
<mml:mi>r</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi>&#x03C3;</mml:mi>
<mml:mi>r</mml:mi>
</mml:msub>
<mml:mo>&#x22C5;</mml:mo>
<mml:munder>
<mml:mi mathvariant="normal">average</mml:mi>
<mml:mi>i</mml:mi>
</mml:munder>
<mml:mspace width="0.25em"/>
<mml:mo stretchy="true">|</mml:mo>
<mml:msub>
<mml:mi>U</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="true">|</mml:mo>
</mml:math>
</disp-formula>
<p>where <italic>&#x03C4;<sub>r</sub></italic> represents the threshold for the <italic>r</italic>th channel of the feature map, and <italic>i, r</italic> represent the indexes of length and channel of the feature map <italic>U</italic>, respectively. Rather than the artificial design of filters by experts, the threshold is automatically determined through the SE attention mechanism. The mechanism is that the CNN automatically conducts filter learning and transforms the original data to a new space for soft thresholding. The soft thresholding function formula can be expressed in <xref ref-type="disp-formula" rid="EQ4">Equation 4</xref>:</p>
<disp-formula id="EQ4">
<label>(4)</label>
<mml:math id="M4">
<mml:mi mathvariant="normal">y</mml:mi>
<mml:mo>=</mml:mo>
<mml:mo stretchy="true">{</mml:mo>
<mml:mtable>
<mml:mtr>
<mml:mtd>
<mml:mi>x</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x03C4;</mml:mi>
<mml:mo>,</mml:mo>
</mml:mtd>
<mml:mtd>
<mml:mi>x</mml:mi>
<mml:mo>&#x003E;</mml:mo>
<mml:mi>&#x03C4;</mml:mi>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
</mml:mtd>
<mml:mtd>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x03C4;</mml:mi>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>&#x03C4;</mml:mi>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mi>x</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>&#x03C4;</mml:mi>
<mml:mo>,</mml:mo>
</mml:mtd>
<mml:mtd>
<mml:mi>x</mml:mi>
<mml:mo>&#x003C;</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x03C4;</mml:mi>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:math>
</disp-formula>
<p>In this study, <italic>&#x03C4;</italic> is a parameter that can be learned by means of automatic learning. The thresholds can be kept in a reasonable range, thereby preventing the output of soft thresholding being all zeros. Finally, the input <italic>I</italic> is combined with the enhanced features O learned from the residual unit by adding an identity shortcut connection. The final output of this residual unit is expressed in <xref ref-type="disp-formula" rid="EQ5">Equation 5</xref>:</p>
<disp-formula id="EQ5">
<label>(5)</label>
<mml:math id="M5">
<mml:mi mathvariant="normal">P</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi>I</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi mathvariant="normal">O</mml:mi>
</mml:math>
</disp-formula>
</sec>
<sec id="sec7">
<label>2.2.3</label>
<title>GAP and FC-WF</title>
<p>To avoid overfitting, the GAP is used to replace the FC layer, and the number of model parameters is further reduced significantly. The GAP reduces the <italic>N</italic> dimensional filtered signal to <italic>N</italic> feature points through an average pooling operation. It not only helps to improve generalization capability of the network but also allows an EEG-based Class Activation Map (ECAM) interpretable method to reveal learned patterns in terms of the driver&#x2019;s mental state.</p>
<p>In the fully connected layer, the weight freezing method is introduced to suppress the update of some learnable parameters by freezing some weights during the backpropagation process. Assuming the features inputted into the FC layer are denoted as <bold>F&#x202F;=</bold> {<bold>f</bold><sub>1</sub>,&#x2026;, <bold>f</bold><italic>
<sub>n</sub>
</italic>}, where <bold>F</bold>&#x2208;&#x211D;<italic><sup>L&#x202F;&#x00D7;&#x202F;N</sup></italic>. <italic>L</italic> denotes the size of the mini-batch, and <italic>N</italic> is the feature dimension. <bold>W</bold><italic>
<sub>n</sub>
</italic> denotes the weights and the vector of inputs of the FC layer, respectively. Weight freezing is proposed as a regularization method to improve classification accuracy, which can be implemented in <xref ref-type="disp-formula" rid="EQ6">Equation 6</xref>:</p>
<disp-formula id="EQ6">
<label>(6)</label>
<mml:math id="M6">
<mml:msub>
<mml:mi mathvariant="bold">W</mml:mi>
<mml:mi>n</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold">W</mml:mi>
<mml:mi>n</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mi mathvariant="bold">K</mml:mi>
<mml:mo>&#x2299;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>&#x03B7;</mml:mi>
<mml:mo>&#x22C5;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>z</mml:mi>
<mml:mo>&#x02DC;</mml:mo>
</mml:mover>
<mml:mi>m</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:msub>
<mml:mi mathvariant="bold">f</mml:mi>
<mml:mi>n</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:math>
</disp-formula>
<p>where <bold>K</bold> is a mask matrix with identical dimensions to <bold>W</bold><italic>
<sub>n</sub>
</italic>, and the elements of <bold>K</bold> are uniformly distributed within the range [0,1]. &#x2299; is the element-wise multiplication and <italic>&#x03B7;</italic> denotes the learning rate of the optimizer.</p>
<p>When an element is masked, it cannot be updated during backpropagation. Here, we set the threshold value t (=0.2) of <bold>K</bold>, which determines the number of frozen parameters in <bold>W</bold><italic>
<sub>n</sub>
</italic>. In the FC layer, we utilize the weight freezing method not only to make sparse connections but also to improve classification accuracy. Finally, the cross-entropy loss function with label smoothing regularization of parameter <italic>&#x03B1;</italic> (=0.1) is utilized to optimize the classification model, which can be described in <xref ref-type="disp-formula" rid="EQ7">Equation 7</xref>:</p>
<disp-formula id="EQ7">
<label>(7)</label>
<mml:math id="M7">
<mml:mi mathvariant="normal">L</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>s</mml:mi>
<mml:mo>=</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo stretchy="true">&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>C</mml:mi>
</mml:msubsup>
<mml:mrow>
<mml:mfenced open="[" close=")">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">y</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x03B1;</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>+</mml:mo>
<mml:mfrac bevelled="true">
<mml:mi>&#x03B1;</mml:mi>
<mml:mi>C</mml:mi>
</mml:mfrac>
<mml:mo stretchy="true">]</mml:mo>
<mml:mo>log</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:msub>
<mml:mi mathvariant="normal">p</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mstyle>
</mml:math>
</disp-formula>
<p>where <italic>y<sub>c</sub></italic> is the true label, <italic>p<sub>c</sub></italic> is the predicted probability of the model for the class <italic>c</italic>, and <italic>C</italic> is the number of classes.</p>
</sec>
</sec>
<sec id="sec8">
<label>2.3</label>
<title>ECAM interpretation method</title>
<p>Class activation map (CAM) is a heatmap that contains classification information, and it can highlight regions most pertinent to a specific class through region-level feature highlighting (<xref ref-type="bibr" rid="ref48">Zhou et al., 2016</xref>). The designed residual shrinkage network structure is combined with an EEG-based class activation map (ECAM) interpretation technique, which allows to visualize neurophysiologically common patterns learned from single-channel EEG signals for classification decision.</p>
<p>The process of the interpretation method over the input signal is shown in <xref ref-type="fig" rid="fig2">Figure 2</xref>. The SE residual block is utilized to pay more attention to the significant channel information in the feature map by adaptively adjusting the weights of each channel. Each channel correlates with a different feature and contributes differently to each output classes. In this study, the class activation weights (CAWs) are the weights of the FC layers, leading to different weights for each feature map channel. The generated CAM will give more prominence to features that contribute significantly to the model&#x2019;s decision-making, while less relevant features will be well suppressed. The CAWs of each channel allow us to visualize discriminative regions of the EEG signals, which are considered as the basis for classification.</p>
<fig position="float" id="fig2">
<label>Figure 2</label>
<caption>
<p>Process of the interpretation method over the input EEG signal.</p>
</caption>
<graphic xlink:href="fnins-18-1508747-g002.tif"/>
</fig>
<p>Assuming the 3-s EEG input signal is <italic>X</italic>&#x202F;=&#x202F;{<italic>x<sub>i</sub></italic>} (<italic>i</italic>&#x202F;=&#x202F;1,2,&#x2026;,384), and the activation of the <italic>k</italic>th node in the output layer of the residual shrinkage unit is <italic>h<sub>k,j</sub></italic>. Where <italic>k</italic>&#x202F;=&#x202F;(1,&#x2026;,<italic>N</italic>) with <italic>N</italic>&#x202F;=&#x202F;32 representing the number of features, and <italic>j</italic>&#x202F;=&#x202F;(1,2,3,&#x2026;,<italic>T</italic>) with <italic>T</italic> representing the length in time dimension. Since the first convolution layer reduces the raw signal of length 384 to 192, the final output of the residual shrinkage unit is a feature map of 192 (<italic>T</italic>)&#x002A;32 (<italic>N</italic>). Let <inline-formula>
<mml:math id="M8">
<mml:msubsup>
<mml:mi>m</mml:mi>
<mml:mi>k</mml:mi>
<mml:mi>g</mml:mi>
</mml:msubsup>
</mml:math>
</inline-formula> denotes the feature activation of the <italic>k</italic>th node output by the GAP layer, and it can be computed as follows:</p>
<disp-formula id="EQ8">
<label>(8)</label>
<mml:math id="M9">
<mml:msubsup>
<mml:mi>m</mml:mi>
<mml:mi>k</mml:mi>
<mml:mi>g</mml:mi>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mi mathvariant="normal">T</mml:mi>
</mml:mfrac>
<mml:munder>
<mml:mstyle displaystyle="true">
<mml:mo stretchy="true">&#x2211;</mml:mo>
</mml:mstyle>
<mml:mi>j</mml:mi>
</mml:munder>
<mml:msub>
<mml:mi mathvariant="normal">h</mml:mi>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:math>
</disp-formula>
<p>Suppose <inline-formula>
<mml:math id="M10">
<mml:msubsup>
<mml:mi>m</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>d</mml:mi>
</mml:msubsup>
</mml:math>
</inline-formula> denotes the activation of the node corresponding to class <italic>c</italic> in dense layer. Here, <italic>c</italic>&#x202F;=&#x202F;0 or 1 denotes the state of alert or drowsy, respectively. We compute <inline-formula>
<mml:math id="M11">
<mml:msubsup>
<mml:mi>m</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>d</mml:mi>
</mml:msubsup>
</mml:math>
</inline-formula> as follows:</p>
<disp-formula id="EQ9">
<label>(9)</label>
<mml:math id="M12">
<mml:msubsup>
<mml:mi>m</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>d</mml:mi>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:munder>
<mml:mstyle displaystyle="true">
<mml:mo stretchy="true">&#x2211;</mml:mo>
</mml:mstyle>
<mml:mi>k</mml:mi>
</mml:munder>
<mml:msub>
<mml:mi mathvariant="normal">w</mml:mi>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mi>m</mml:mi>
<mml:mi mathvariant="normal">k</mml:mi>
<mml:mi mathvariant="normal">g</mml:mi>
</mml:msubsup>
<mml:mo>+</mml:mo>
<mml:msub>
<mml:mi mathvariant="normal">b</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:math>
</disp-formula>
<p>where <italic>&#x03C9;<sub>k,c</sub></italic> represents the FC-WF layer&#x2019;s weight associated with class <italic>c</italic> for the node activation <inline-formula>
<mml:math id="M13">
<mml:msubsup>
<mml:mi>m</mml:mi>
<mml:mi>k</mml:mi>
<mml:mi>g</mml:mi>
</mml:msubsup>
</mml:math>
</inline-formula>, and <italic>b<sub>c</sub></italic> is the bias of class c in the FC-WF layer. It is also the CAW utilized in this interpretation method. The <inline-formula>
<mml:math id="M14">
<mml:msubsup>
<mml:mi>m</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>d</mml:mi>
</mml:msubsup>
</mml:math>
</inline-formula> is considered as the final activation of the network. <italic>M<sub>c</sub></italic>(<italic>j</italic>) denotes the activation map for class <italic>c</italic>. From <xref ref-type="disp-formula" rid="EQ8">Equations 8</xref>, <xref ref-type="disp-formula" rid="EQ9">9</xref>, <italic>M<sub>c</sub></italic>(<italic>j</italic>) is computed in <xref ref-type="disp-formula" rid="EQ10">Equation 10</xref>:</p>
<disp-formula id="EQ10">
<label>(10)</label>
<mml:math id="M15">
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mi mathvariant="normal">j</mml:mi>
</mml:mfenced>
<mml:mo>=</mml:mo>
<mml:munder>
<mml:mstyle displaystyle="true">
<mml:mo stretchy="true">&#x2211;</mml:mo>
</mml:mstyle>
<mml:mi>k</mml:mi>
</mml:munder>
<mml:msub>
<mml:mi mathvariant="normal">w</mml:mi>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:math>
</disp-formula>
<p>where <italic>b<sub>c</sub></italic> and 1&#x2215;<italic>n</italic> are neglected for simplicity. Then, the heatmap is further normalized by z-score. We can have the heatmap <italic>M<sub>c</sub></italic>, where <italic>M<sub>c</sub></italic>&#x202F;=&#x202F;(<italic>z<sub>1,c</sub></italic>, <italic>z<sub>2,c</sub></italic>,&#x2026;, <italic>z<sub>T,c</sub></italic>).</p>
<p>Similar to the CAM method (<xref ref-type="bibr" rid="ref48">Zhou et al., 2016</xref>), the heatmap indicating the classification decision of the model is obtained by upsampling <italic>M<sub>c</sub></italic>(<italic>j</italic>) to the same length as the input signal. The upsampling is to use an equal interpolation method to fill the elements based on the original activation length of 192. According to the network structure, the convolution and pooling layers of BaseFE module reduce the temporal dimension to 1/2 of the input signal, and the residual shrinkage block does not change its temporal dimension. Therefore, the heatmap can be restored to the original length of 384, which is same as the input signal, by duplicating and filling each element <italic>z<sub>j,c</sub></italic> two times. The final heatmap is obtained in <xref ref-type="disp-formula" rid="EQ11">Equation 11</xref>:</p>
<disp-formula id="EQ11">
<label>(11)</label>
<mml:math id="M16">
<mml:msub>
<mml:mi>M</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:munder>
<mml:munder>
<mml:mrow>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>c</mml:mi>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="true">&#xFE38;</mml:mo>
</mml:munder>
<mml:mn>2</mml:mn>
</mml:munder>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:munder>
<mml:munder>
<mml:mrow>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>z</mml:mi>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="true">&#xFE38;</mml:mo>
</mml:munder>
<mml:mn>2</mml:mn>
</mml:munder>
</mml:mrow>
</mml:mfenced>
</mml:math>
</disp-formula>
</sec>
<sec id="sec9">
<label>2.4</label>
<title>Methods for comparison</title>
<p>In this section, there are several leading single-channel EEG-based baseline methods for comparison, including the conventional machine learning methods and state-of-the-art deep learning methods.</p>
<list list-type="order">
<list-item>
<p>Conventional methods: The power feature of EEG bands is one of the crucial features for EEG drowsy state recognition (<xref ref-type="bibr" rid="ref10">Cui et al., 2021</xref>). We calculate the relative power (delta, theta, alpha, and delta bands) from the Oz channel signal using the Welch&#x2019;s method. Different conventional classifiers are tested, which include k-nearest neighbors (KNN), Random Forest (RF), Gaussian Naive Bayes (GNB), and SVM.</p>
</list-item>
<list-item>
<p>EEGNet: EEGNet was designed as a compact CNN model (<xref ref-type="bibr" rid="ref30">Lawhern et al., 2018</xref>). We opt for EEGNet-8,2 over EEGNet-4,2 due to its higher classification accuracy. Despite its compact network, EEGNet-8,2 can achieve the state-of-the-art performance in various EEG recognition tasks.</p>
</list-item>
<list-item>
<p>ShallowConvNet (<xref ref-type="bibr" rid="ref40">Schirrmeister et al., 2017</xref>): ShallowConvNet is a shallow CNN consisting of temporal convolution, spatial convolution, and pooling layers.</p>
</list-item>
<list-item>
<p>DeepConvNet: In addition to the Shallow CNN model, <xref ref-type="bibr" rid="ref40">Schirrmeister et al. (2017)</xref> proposed another effective deep CNN model (DeepConvNet) to capture discriminative EEG features for motor imagery classification.</p>
</list-item>
<list-item>
<p>CNN-LSTM: CNN-LSTM was designed to recognize subject-independent drowsiness from single-channel EEG and provide interpretable analyze for classification (<xref ref-type="bibr" rid="ref10">Cui et al., 2021</xref>).</p>
</list-item>
<list-item>
<p>CompactCNN: CompactCNN is proposed as an interpretable DL model, which applies the CAM method to visualize EEG common features learned from single-channel EEG (<xref ref-type="bibr" rid="ref9">Cui et al., 2022b</xref>).</p>
</list-item>
<list-item>
<p>TSANet: TSANet is a deep neural network model based on temporal-spectral fused and attention which is originally used for automatic sleep staging from single-channel EEG (<xref ref-type="bibr" rid="ref18">Fu et al., 2023</xref>).</p>
</list-item>
</list>
</sec>
</sec>
<sec id="sec10">
<label>3</label>
<title>Experimental results</title>
<p>In this section, we first describe the widely used dataset, our experiment, and evaluation metrics. Then, we conduct extensive experiments and present the performance of our model in comparison with the strong baselines.</p>
<sec id="sec11">
<label>3.1</label>
<title>Data description</title>
<p>In this study, we use a public sustained-attention driving task (SADT) dataset to explore driver drowsiness detection (<xref ref-type="bibr" rid="ref5">Cao et al., 2019</xref>). The EEG data were collected from 27 participants (ranging in age from 22 to 28) with headset EEG of 32 electrodes at 500&#x202F;Hz. The driver drowsy state was induced through a 90-min nighttime driving simulation which is conducted in a VR-based driving simulator. During this procedure, lane departure events occurred when the car was drifted from the center lane either to the left or right. Participants were asked to promptly steer the car back to the center lane as soon as the events occurred. The drowsy degree was quantitatively assessed on the basis of the subjects&#x2019; reaction times to these departure events. By analyzing subjects&#x2019; reaction time to these events, it was able to gauge their level of drowsiness.</p>
<p>The recorded signals were first filtered by 1&#x2013;50&#x202F;Hz band-pass filters and then processed by artifact rejection. <xref ref-type="bibr" rid="ref9">Cui et al. (2022b)</xref> further preprocessed the EEG signals by down-sampling to 128&#x202F;Hz. Then, they selected and labeled the samples to generate a preprocessed version of the dataset. The samples were extracted for each EEG trail at 3-s length before the car deviation event (<xref ref-type="bibr" rid="ref23">Gong et al., 2024</xref>). Notably, studies have shown that drowsiness is associated with EEG power spectrum in the theta and/or alpha band (<xref ref-type="bibr" rid="ref28">Joutsiniemi et al., 1995</xref>). After attempting to find good choices of the EEG channel and power spectrum features for assessing the drowsiness-related EEG dynamics, it was found that the Oz channel is the most effective channel and its power spectrum features in the theta and alpha band have good distinguishing ability (<xref ref-type="bibr" rid="ref34">Pal et al., 2008</xref>). Therefore, we select the Oz channel data to find the most discriminative features for identifying drowsiness from alert samples from the single-channel EEG signals in the study. The dimension of each sample is 1 (Oz channel)&#x202F;&#x00D7;&#x202F;384 (sample points).</p>
<p>To ensure sufficient training and testing samples, each subject containing at least 50 samples for each state was selected for the dataset. Finally, 2,952 samples from 11 different subjects were collected to produce an unbalanced dataset (<xref ref-type="bibr" rid="ref6">Cui, 2021a</xref>) for the real situation, which is described in <xref ref-type="table" rid="tab1">Table 1</xref>. In addition, they also generated a balanced dataset including 2022 samples which has been uploaded online (<xref ref-type="bibr" rid="ref7">Cui, 2021b</xref>). In this study, we view the balanced dataset as an ideal training dataset for the models and utilize the unbalanced data of each subject to evaluate the training model.</p>
<table-wrap position="float" id="tab1">
<label>Table 1</label>
<caption>
<p>Number of samples in the unbalanced dataset.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top" rowspan="2">Subject ID</th>
<th align="center" valign="top" colspan="2">Sample number</th>
</tr>
<tr>
<th align="center" valign="top">Alert</th>
<th align="center" valign="top">Drowsiness</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">1</td>
<td align="center" valign="top">94</td>
<td align="center" valign="top">96</td>
</tr>
<tr>
<td align="left" valign="top">2</td>
<td align="center" valign="top">363</td>
<td align="center" valign="top">66</td>
</tr>
<tr>
<td align="left" valign="top">3</td>
<td align="center" valign="top">75</td>
<td align="center" valign="top">180</td>
</tr>
<tr>
<td align="left" valign="top">4</td>
<td align="center" valign="top">118</td>
<td align="center" valign="top">74</td>
</tr>
<tr>
<td align="left" valign="top">5</td>
<td align="center" valign="top">161</td>
<td align="center" valign="top">112</td>
</tr>
<tr>
<td align="left" valign="top">6</td>
<td align="center" valign="top">83</td>
<td align="center" valign="top">116</td>
</tr>
<tr>
<td align="left" valign="top">7</td>
<td align="center" valign="top">51</td>
<td align="center" valign="top">103</td>
</tr>
<tr>
<td align="left" valign="top">8</td>
<td align="center" valign="top">238</td>
<td align="center" valign="top">132</td>
</tr>
<tr>
<td align="left" valign="top">9</td>
<td align="center" valign="top">243</td>
<td align="center" valign="top">157</td>
</tr>
<tr>
<td align="left" valign="top">10</td>
<td align="center" valign="top">192</td>
<td align="center" valign="top">54</td>
</tr>
<tr>
<td align="left" valign="top">11</td>
<td align="center" valign="top">113</td>
<td align="center" valign="top">131</td>
</tr>
<tr>
<td align="left" valign="top">Total</td>
<td align="center" valign="top">1,731</td>
<td align="center" valign="top">1,221</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="sec12">
<label>3.2</label>
<title>Experimental settings and evaluation metrics</title>
<p>In our experiment, we conduct comparisons on a desktop computer with an Intel(R) Core(TM) i5-12600KF CPU and a NVIDIA GeForce GTX 1080 Ti graphics card. All the codes were implemented in Python 3.6, and our model along with the baseline methods was implemented using the PyTorch Library. For EEGNet, ShallowConvNet, and DeepConvNet, we made a slight modification to their original models. Since each second convolutional layer in these three models is designed to extract features from multi-channel EEG, here we made a slight modification in each second convolutional layer with a 1&#x202F;&#x00D7;&#x202F;1 kernel used to adapt for single-channel EEG signals, which can enhance spatial feature extraction and the feature representation ability. We utilized the Adam optimization method with a learning rate of 0.001 and set the batch size as 50.</p>
<p>For cross-subject driver drowsiness detection, the leave-one-subject-out cross-validation (LOSO-CV) is conducted to evaluate our model&#x2019;s effectiveness on a widely used dataset of 11 subjects (<xref ref-type="bibr" rid="ref1">Autthasan et al., 2022</xref>). In each fold of LOSO-CV, the EEG data samples from one subject are set up as a testing set for testing, and the data samples from all the other subjects are set up as a training set for training. This iterative process is repeated until each subject has been tested once as the test subject.</p>
<p>We use accuracy as an evaluation metric of our method in our experiment. As the metric of the F1-score considers both the precision and recall of the classification model, it is generally considered to be the most suitable metric for the unbalanced dataset. Therefore, we also adopt the F1-score as an evaluation metric on the unbalanced dataset. The metrics of sensitivity test and deletion test are also used to evaluate the interpretation of interpretable models.</p>
</sec>
<sec id="sec13">
<label>3.3</label>
<title>Results and comparison</title>
<sec id="sec14">
<label>3.3.1</label>
<title>Mean accuracy comparison on the balanced dataset</title>
<p>In this section, we compared the classification accuracy of five deep learning models tested on the balanced dataset for a standard evaluation. Each model was trained from 1 to 20 epochs, with network parameters randomized for each iteration. We conducted each evaluation by repeating each model on every subject 10 times, resulting in 110-fold for each epoch (10 times &#x00D7; 11 subjects).</p>
<p><xref ref-type="fig" rid="fig3">Figure 3</xref> shows that the proposed ID3RANet outperforms other benchmark DL models. After 5 epochs of training, it achieves a peak mean accuracy of 77.16% with the fastest convergence speed. In the rest of the first 20 epochs, its mean accuracy stabilizes at above 74.42%, outperforming the other four models. In contrast, the CNN-LSTM model exhibits slower convergence, reaching an average accuracy of 73.78% after 16 epochs of training. The CompactCNN and CNN-LSTM models achieve the overall higher mean accuracies than both models of the EEGNet and ShallowCovnet, and their highest mean accuracies in the first 20 training epochs are 73.80 and 73.78%, respectively. Even though the CompactCNN model reaches a good performance rapidly in the first 10 epochs and stabilizes at approximately 72.20%, it is lower than that of the proposed model. The results indicate that the proposed method can better capture the class-discriminative EEG features for drowsiness detection from single-channel EEG signals.</p>
<fig position="float" id="fig3">
<label>Figure 3</label>
<caption>
<p>Average cross-subject classification accuracies of the proposed model and four benchmark deep learning models for training epochs from 1 to 20.</p>
</caption>
<graphic xlink:href="fnins-18-1508747-g003.tif"/>
</fig>
</sec>
<sec id="sec15">
<label>3.3.2</label>
<title>Comparison results on the unbalanced dataset</title>
<p>In this section, we show the accuracies and F1 score comparison results of different methods tested on the unbalanced dataset, which is closer to the real-life scenarios. As shown in <xref ref-type="table" rid="tab2">Tables 2</xref>, <xref ref-type="table" rid="tab3">3</xref>, comparing to all baseline models (including conventional methods and six deep learning methods), our proposed method achieves the better performance, due to its enhanced features learning ability. Moreover, we can also draw the following conclusions. First, it can be found that conventional methods (KNN, RF, GNB, and SVM) generally achieve relatively inferior classification performance compared to deep learning-based approaches except EEGNet. This demonstrates that the end-to-end deep learning methods can learn more task-relevant discriminative features for classification. Second, our method improves the average classification accuracy of approximately 4.85% on average and 7.8% on maximum in comparison with the deep learning methods such as CompactCNN and TSANet. This demonstrates that our method can achieve effective mining of important information implicit within single-channel EEG signals by effectively exploiting attention and soft thresholding in the residual shrinkage network. Furthermore, our method achieves an average F1-score almost 1.38% higher than the best TSANet method, all of which demonstrates its effectiveness and potential practical value. Finally, multiple optimal or suboptimal classification results (optimal results bolded in black) are achieved in individual test for each subject, which also proves its strong generalization ability for classifying cross-subject drowsy states from single-channel EEG signals.</p>
<table-wrap position="float" id="tab2">
<label>Table 2</label>
<caption>
<p>Comparison of different methods for cross-subject classification accuracy on the unbalanced dataset (%).</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top" rowspan="2">Methods</th>
<th align="center" valign="top" colspan="11">Subject ID</th>
<th align="center" valign="top" rowspan="2">Avg. Acc.</th>
</tr>
<tr>
<th align="center" valign="top">1</th>
<th align="center" valign="top">2</th>
<th align="center" valign="top">3</th>
<th align="center" valign="top">4</th>
<th align="center" valign="top">5</th>
<th align="center" valign="top">6</th>
<th align="center" valign="top">7</th>
<th align="center" valign="top">8</th>
<th align="center" valign="top">9</th>
<th align="center" valign="top">10</th>
<th align="center" valign="top">11</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">KNN</td>
<td align="center" valign="top">71.05</td>
<td align="center" valign="top">41.96</td>
<td align="center" valign="top">36.86</td>
<td align="center" valign="top">67.71</td>
<td align="center" valign="top">69.60</td>
<td align="center" valign="top">67.34</td>
<td align="center" valign="top">68.83</td>
<td align="center" valign="top">62.16</td>
<td align="center" valign="top">76.00</td>
<td align="center" valign="top">83.33</td>
<td align="center" valign="top">61.89</td>
<td align="center" valign="top">64.25</td>
</tr>
<tr>
<td align="left" valign="top">RF</td>
<td align="center" valign="middle">70.53</td>
<td align="center" valign="middle">40.33</td>
<td align="center" valign="middle">35.29</td>
<td align="center" valign="middle">68.75</td>
<td align="center" valign="middle">72.53</td>
<td align="center" valign="middle">73.87</td>
<td align="center" valign="middle">71.43</td>
<td align="center" valign="middle">63.24</td>
<td align="center" valign="middle">77.25</td>
<td align="center" valign="middle">86.59</td>
<td align="center" valign="middle">56.15</td>
<td align="center" valign="middle">65.09</td>
</tr>
<tr>
<td align="left" valign="top">GNB</td>
<td align="center" valign="top">77.89</td>
<td align="center" valign="top">48.48</td>
<td align="center" valign="top">32.16</td>
<td align="center" valign="top">73.96</td>
<td align="center" valign="top">76.92</td>
<td align="center" valign="top">72.36</td>
<td align="center" valign="top">70.78</td>
<td align="center" valign="top">45.95</td>
<td align="center" valign="top">76.25</td>
<td align="center" valign="top">85.77</td>
<td align="center" valign="top">60.66</td>
<td align="center" valign="top">65.56</td>
</tr>
<tr>
<td align="left" valign="top">SVM</td>
<td align="center" valign="top">76.32</td>
<td align="center" valign="top">44.76</td>
<td align="center" valign="top">34.12</td>
<td align="center" valign="top">68.75</td>
<td align="center" valign="top">74.36</td>
<td align="center" valign="top">71.86</td>
<td align="center" valign="top">74.68</td>
<td align="center" valign="top">57.03</td>
<td align="center" valign="top">82.50</td>
<td align="center" valign="top">90.24</td>
<td align="center" valign="top">63.11</td>
<td align="center" valign="top">67.07</td>
</tr>
<tr>
<td align="left" valign="top">EEGNet</td>
<td align="center" valign="top">80.00</td>
<td align="center" valign="top">46.15</td>
<td align="center" valign="top">32.94</td>
<td align="center" valign="top">71.88</td>
<td align="center" valign="top">75.09</td>
<td align="center" valign="top">73.37</td>
<td align="center" valign="top">71.43</td>
<td align="center" valign="top">52.16</td>
<td align="center" valign="top">83.75</td>
<td align="center" valign="top">85.77</td>
<td align="center" valign="top">63.52</td>
<td align="center" valign="top">66.92</td>
</tr>
<tr>
<td align="left" valign="top">ShallowConvNet</td>
<td align="center" valign="top"><bold>85.26</bold></td>
<td align="center" valign="top">45.92</td>
<td align="center" valign="top">32.94</td>
<td align="center" valign="top">59.90</td>
<td align="center" valign="top">67.40</td>
<td align="center" valign="top">80.40</td>
<td align="center" valign="top"><bold>78.57</bold></td>
<td align="center" valign="top">42.97</td>
<td align="center" valign="top">84.00</td>
<td align="center" valign="top">89.02</td>
<td align="center" valign="top">73.36</td>
<td align="center" valign="top">67.25</td>
</tr>
<tr>
<td align="left" valign="top">DeepConvNet</td>
<td align="center" valign="top">72.63</td>
<td align="center" valign="top"><bold>59.21</bold></td>
<td align="center" valign="top">30.59</td>
<td align="center" valign="top">59.90</td>
<td align="center" valign="top">61.90</td>
<td align="center" valign="top">75.38</td>
<td align="center" valign="top">75.97</td>
<td align="center" valign="top">70.54</td>
<td align="center" valign="top">81.00</td>
<td align="center" valign="top"><bold>92.68</bold></td>
<td align="center" valign="top">57.38</td>
<td align="center" valign="top">67.02</td>
</tr>
<tr>
<td align="left" valign="top">CNN-LSTM</td>
<td align="center" valign="middle">77.89</td>
<td align="center" valign="top">42.89</td>
<td align="center" valign="top"><bold>52.55</bold></td>
<td align="center" valign="top">66.67</td>
<td align="center" valign="top">78.02</td>
<td align="center" valign="top">79.40</td>
<td align="center" valign="top">75.97</td>
<td align="center" valign="top">72.43</td>
<td align="center" valign="top">81.25</td>
<td align="center" valign="top">86.99</td>
<td align="center" valign="top">75.41</td>
<td align="center" valign="top">71.77</td>
</tr>
<tr>
<td align="left" valign="middle">CompactCNN</td>
<td align="center" valign="middle">78.42</td>
<td align="center" valign="middle">53.85</td>
<td align="center" valign="middle">52.16</td>
<td align="center" valign="middle">64.58</td>
<td align="center" valign="middle">78.39</td>
<td align="center" valign="middle">77.39</td>
<td align="center" valign="middle">72.73</td>
<td align="center" valign="middle"><bold>72.97</bold></td>
<td align="center" valign="middle">89.25</td>
<td align="center" valign="middle">84.15</td>
<td align="center" valign="middle">72.95</td>
<td align="center" valign="middle">72.44</td>
</tr>
<tr>
<td align="left" valign="top">TSANet</td>
<td align="center" valign="top">84.21</td>
<td align="center" valign="top">48.25</td>
<td align="center" valign="top">46.67</td>
<td align="center" valign="top">71.88</td>
<td align="center" valign="top">78.75</td>
<td align="center" valign="top"><bold>86.43</bold></td>
<td align="center" valign="top">77.92</td>
<td align="center" valign="top">63.24</td>
<td align="center" valign="top"><bold>91.25</bold></td>
<td align="center" valign="top">90.65</td>
<td align="center" valign="top">72.54</td>
<td align="center" valign="top">73.80</td>
</tr>
<tr>
<td align="left" valign="top">Ours</td>
<td align="center" valign="top">83.68</td>
<td align="center" valign="top">50.82</td>
<td align="center" valign="top">49.41</td>
<td align="center" valign="top"><bold>76.56</bold></td>
<td align="center" valign="top"><bold>86.81</bold></td>
<td align="center" valign="top">79.40</td>
<td align="center" valign="top">77.27</td>
<td align="center" valign="top">69.73</td>
<td align="center" valign="top">89.50</td>
<td align="center" valign="top">82.11</td>
<td align="center" valign="top"><bold>76.64</bold></td>
<td align="center" valign="top"><bold>74.72</bold></td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap position="float" id="tab3">
<label>Table 3</label>
<caption>
<p>Comparison of different methods for cross-subject classification F1-scores on the unbalanced dataset (%).</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top" rowspan="2">Methods</th>
<th align="center" valign="top" colspan="11">Subject ID</th>
<th align="center" valign="top" rowspan="2">Avg. F1.</th>
</tr>
<tr>
<th align="center" valign="top">1</th>
<th align="center" valign="top">2</th>
<th align="center" valign="top">3</th>
<th align="center" valign="top">4</th>
<th align="center" valign="top">5</th>
<th align="center" valign="top">6</th>
<th align="center" valign="top">7</th>
<th align="center" valign="top">8</th>
<th align="center" valign="top">9</th>
<th align="center" valign="top">10</th>
<th align="center" valign="top">11</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">KNN</td>
<td align="center" valign="top">72.08</td>
<td align="center" valign="top">23.85</td>
<td align="center" valign="top">29.69</td>
<td align="center" valign="top">65.17</td>
<td align="center" valign="top">50.89</td>
<td align="center" valign="top">67.01</td>
<td align="center" valign="top">78.18</td>
<td align="center" valign="top">62.96</td>
<td align="center" valign="top">74.74</td>
<td align="center" valign="top">62.39</td>
<td align="center" valign="top">57.14</td>
<td align="center" valign="top">58.55</td>
</tr>
<tr>
<td align="left" valign="top">RF</td>
<td align="center" valign="middle">70.83</td>
<td align="center" valign="middle">24.26</td>
<td align="center" valign="middle">26.01</td>
<td align="center" valign="middle">65.91</td>
<td align="center" valign="middle">56.14</td>
<td align="center" valign="middle">73.74</td>
<td align="center" valign="middle">79.82</td>
<td align="center" valign="middle">64.95</td>
<td align="center" valign="middle">75.86</td>
<td align="center" valign="middle">69.72</td>
<td align="center" valign="middle">51.14</td>
<td align="center" valign="middle">59.85</td>
</tr>
<tr>
<td align="left" valign="top">GNB</td>
<td align="center" valign="top">76.67</td>
<td align="center" valign="top"><bold>28.48</bold></td>
<td align="center" valign="top">11.28</td>
<td align="center" valign="top">73.12</td>
<td align="center" valign="top">64.41</td>
<td align="center" valign="top">71.20</td>
<td align="center" valign="top">79.82</td>
<td align="center" valign="top">56.90</td>
<td align="center" valign="top">76.31</td>
<td align="center" valign="top">66.67</td>
<td align="center" valign="top">45.45</td>
<td align="center" valign="top">59.12</td>
</tr>
<tr>
<td align="left" valign="top">SVM</td>
<td align="center" valign="top">75.41</td>
<td align="center" valign="top">26.17</td>
<td align="center" valign="top">17.65</td>
<td align="center" valign="top">64.71</td>
<td align="center" valign="top">56.25</td>
<td align="center" valign="top">69.89</td>
<td align="center" valign="top">82.19</td>
<td align="center" valign="top">62.23</td>
<td align="center" valign="top">80.45</td>
<td align="center" valign="top">76.92</td>
<td align="center" valign="top">51.61</td>
<td align="center" valign="top">60.32</td>
</tr>
<tr>
<td align="left" valign="top">EEGNet</td>
<td align="center" valign="top">81.77</td>
<td align="center" valign="top">24.92</td>
<td align="center" valign="top">15.08</td>
<td align="center" valign="top">65.90</td>
<td align="center" valign="top">56.05</td>
<td align="center" valign="top">72.13</td>
<td align="center" valign="top">82.14</td>
<td align="center" valign="top">57.59</td>
<td align="center" valign="top">81.20</td>
<td align="center" valign="top">74.07</td>
<td align="center" valign="top">43.02</td>
<td align="center" valign="top">59.44</td>
</tr>
<tr>
<td align="left" valign="top">ShallowConvNet</td>
<td align="center" valign="top">82.41</td>
<td align="center" valign="top">25.42</td>
<td align="center" valign="top">53.38</td>
<td align="center" valign="top">53.13</td>
<td align="center" valign="top">37.58</td>
<td align="center" valign="top">73.96</td>
<td align="center" valign="top">82.57</td>
<td align="center" valign="top">56.51</td>
<td align="center" valign="top">77.18</td>
<td align="center" valign="top">74.02</td>
<td align="center" valign="top">50.28</td>
<td align="center" valign="top">60.58</td>
</tr>
<tr>
<td align="left" valign="top">DeepConvNet</td>
<td align="center" valign="top">65.79</td>
<td align="center" valign="top">10.26</td>
<td align="center" valign="top">3.29</td>
<td align="center" valign="top">38.40</td>
<td align="center" valign="top">13.33</td>
<td align="center" valign="top">73.80</td>
<td align="center" valign="top">81.59</td>
<td align="center" valign="top">56.57</td>
<td align="center" valign="top">70.08</td>
<td align="center" valign="top"><bold>81.25</bold></td>
<td align="center" valign="top">37.35</td>
<td align="center" valign="top">48.34</td>
</tr>
<tr>
<td align="left" valign="top">CNN-LSTM</td>
<td align="center" valign="middle">76.40</td>
<td align="center" valign="top">25.53</td>
<td align="center" valign="top"><bold>64.52</bold></td>
<td align="center" valign="top">65.22</td>
<td align="center" valign="top">75.61</td>
<td align="center" valign="top">80.75</td>
<td align="center" valign="top">82.13</td>
<td align="center" valign="top">64.58</td>
<td align="center" valign="top">79.34</td>
<td align="center" valign="top">72.88</td>
<td align="center" valign="top"><bold>78.57</bold></td>
<td align="center" valign="top">69.59</td>
</tr>
<tr>
<td align="left" valign="middle">CompactCNN</td>
<td align="center" valign="middle">76.57</td>
<td align="center" valign="middle">26.67</td>
<td align="center" valign="middle">60.89</td>
<td align="center" valign="middle">65.66</td>
<td align="center" valign="middle">74.46</td>
<td align="center" valign="middle">78.05</td>
<td align="center" valign="middle">78.35</td>
<td align="center" valign="middle"><bold>70.23</bold></td>
<td align="center" valign="middle">87.09</td>
<td align="center" valign="middle">67.77</td>
<td align="center" valign="middle">73.60</td>
<td align="center" valign="middle">69.03</td>
</tr>
<tr>
<td align="left" valign="top">TSANet</td>
<td align="center" valign="top"><bold>83.87</bold></td>
<td align="center" valign="top">23.97</td>
<td align="center" valign="top">51.77</td>
<td align="center" valign="top">70.97</td>
<td align="center" valign="top">65.88</td>
<td align="center" valign="top"><bold>87.78</bold></td>
<td align="center" valign="top"><bold>83.65</bold></td>
<td align="center" valign="top">65.66</td>
<td align="center" valign="top"><bold>89.55</bold></td>
<td align="center" valign="top">78.10</td>
<td align="center" valign="top">67.63</td>
<td align="center" valign="top">69.89</td>
</tr>
<tr>
<td align="left" valign="top">Ours</td>
<td align="center" valign="top">83.06</td>
<td align="center" valign="top">26.48</td>
<td align="center" valign="top">58.25</td>
<td align="center" valign="top"><bold>73.37</bold></td>
<td align="center" valign="top"><bold>84.07</bold></td>
<td align="center" valign="top">79.40</td>
<td align="center" valign="top">82.59</td>
<td align="center" valign="top">66.27</td>
<td align="center" valign="top">87.35</td>
<td align="center" valign="top">65.63</td>
<td align="center" valign="top">77.47</td>
<td align="center" valign="top"><bold>71.27</bold></td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="sec16">
<label>3.3.3</label>
<title>Ablation experiments of the proposed modules</title>
<p>To verify the effectiveness of each module of our model, we conduct ablation experiments tested on the unbalanced dataset. Note that our proposed is made up of base feature extractor (BaseFE), residual shrinkage building unit (RSBU) with soft thresholding (ST), GAP, and a fully connected layer (FC) with weight freezing (WF). Specifically, we derive six model variants as follows.</p>
<list list-type="order">
<list-item>
<p><italic>BaseFE</italic>: The base feature extractor module only.</p>
</list-item>
<list-item>
<p><italic>w/o RSBU</italic>: The model removes the residual shrinkage building unit with soft thresholding.</p>
</list-item>
<list-item>
<p><italic>w/o ST</italic>: The model removes the soft thresholding.</p>
</list-item>
<list-item>
<p><italic>w/o GAP</italic>: The model removes the global average pooling layer.</p>
</list-item>
<list-item>
<p><italic>w/o WF</italic>: The model removes the weight freezing from the fully connected layer.</p>
</list-item>
<list-item>
<p><italic>ID3RSNet</italic>: The model includes each module of proposed model in this study.</p>
</list-item>
</list>
<p>From the results of ablation study shown in <xref ref-type="fig" rid="fig4">Figures 4A</xref>,<xref ref-type="fig" rid="fig4">B</xref>, we can probably summarize as follows. First, the model variable <italic>w/o RSBU</italic>, which completely removes the residual shrinkage building unit module, yields almost the worst results. This suggests that the channel-wise residual shrinkage building unit contributes significantly to enhance the features learning ability and model classification performance. Furthermore, in comparison with the model variable <italic>w/o ST</italic> and <italic>w/o RSBU</italic>, we know that the residual SE block contributes more to boosting feature representation. Second, the model variable <italic>w/o GAP</italic> achieves obviously inferior results, indicating that the GAP can effectively reduce overfitting and improve generalization capability of the proposed model. Finally, the proposed <italic>ID3RSNet</italic> achieves superior classification performance with each module integrated, which shows the necessity and effectiveness of each module.</p>
<fig position="float" id="fig4">
<label>Figure 4</label>
<caption>
<p>Result of ablation experiments on unbalanced data settings. The &#x2018;w/o&#x2019; represents &#x2018;without&#x2019;. The values of accuracy and F1-score are percentile. <bold>(A)</bold> Accuracy, <bold>(B)</bold> F1-score.</p>
</caption>
<graphic xlink:href="fnins-18-1508747-g004.tif"/>
</fig>
</sec>
<sec id="sec17">
<label>3.3.4</label>
<title>Evaluation of sensitivity test and deletion test</title>
<p>For sensitivity tests, we randomly selected 50 samples of each class from each subject in the unbalanced dataset and thus have in total 11 (subjects)&#x202F;&#x00D7;&#x202F;2 (classes)&#x202F;&#x00D7;&#x202F;50(samples)&#x202F;=&#x202F;1,100 samples for evaluation (<xref ref-type="bibr" rid="ref12">Cui et al., 2023</xref>). Inspired by the sensitivity test proposed by <xref ref-type="bibr" rid="ref12">Cui et al. (2023)</xref>, we adopted the non-linear perturbation (sine wave combined with noise) and adjusted the perturbation strength dynamically with a ranged perturbation scale of the input sample. It is assumed that the perturbations will not cause the sample to deviate significantly from its original distributed. The sensitivity test is performed on the original contribution heatmap to reflect the best correlation obtained between the perturbed batches and the model output. Here, we set the scaling factor <italic>n</italic> of perturbation to 1&#x2013;5 and calculate the Pearson correlation coefficient (PCC) between the original heatmap and perturbed heatmap as a quality metric of the contribution map. From <xref ref-type="fig" rid="fig5">Figure 5A</xref>, it can be observed that our method achieves higher average correlation coefficients in the sensitivity tests compared to the CompactCNN baseline (<xref ref-type="bibr" rid="ref9">Cui et al., 2022b</xref>). It indicates that the contribution maps generated by our model have higher stability and consistency under different input perturbations.</p>
<fig position="float" id="fig5">
<label>Figure 5</label>
<caption>
<p>Evaluation results of sensitivity test and deletion test for interpretable models. <bold>(A)</bold> Sensitivity test. <bold>(B)</bold> Deletion test.</p>
</caption>
<graphic xlink:href="fnins-18-1508747-g005.tif"/>
</fig>
<p>According to the deletion test proposed in the reference (<xref ref-type="bibr" rid="ref37">Petsiuk et al., 2018</xref>), we also use the metric of deletion test in this study. In this test, we ranked the sampling points of the input sample according to their descending order based on their values in the contribution map (<xref ref-type="bibr" rid="ref12">Cui et al., 2023</xref>). By setting the sample threshold for deleting values in the heatmap, we calculated the predicted probabilities when the corresponding points were removed from the sample by setting their values to zeros. The indicative of a high-quality contribution map is a sharp drop of the predicted probabilities for the corresponding class. From <xref ref-type="fig" rid="fig5">Figure 5B</xref>, it can be seen that our method achieves lower average values of predicted probabilities with the different sample thresholds compared to the CompactCNN baseline. It indicates that the interpretation of our method is more effective than the CompactCNN due to the removal of important features.</p>
</sec>
<sec id="sec18">
<label>3.3.5</label>
<title>Interpretation on the learned patterns from single-channel EEG signals</title>
<p>In this section, we explore what EEG patterns our proposed ID3RSNet has learned using the proposed ECAM interpretation method, which is described in Section 2.3. From the generated heatmap of EEG amplitude fluctuation and the bar graph of relative power, we can explain the most discriminative features learned as evidence of the model classification. To verify the reliable interpretation of the model decision, we compare our method with the leading interpretable deep learning method CompactCNN (<xref ref-type="bibr" rid="ref9">Cui et al., 2022b</xref>). <xref ref-type="fig" rid="fig6">Figures 6</xref>, <xref ref-type="fig" rid="fig7">7</xref> show the visualization of some representative EEG samples labeled as drowsy and alert, respectively. The label and prediction probability of each O<sub>Z</sub> channel signal sample are titled.</p>
<fig position="float" id="fig6">
<label>Figure 6</label>
<caption>
<p>Illustrative comparison of the visualization of learned patterns on correctly classified drowsy samples: <bold>(A)</bold> theta-delta burst; <bold>(B)</bold> alpha spindle.</p>
</caption>
<graphic xlink:href="fnins-18-1508747-g006.tif"/>
</fig>
<fig position="float" id="fig7">
<label>Figure 7</label>
<caption>
<p>Illustrative comparison of the visualization of learned patterns on correctly classified alert samples: <bold>(A)</bold> beta rhythm; <bold>(B)</bold> delta rhythm.</p>
</caption>
<graphic xlink:href="fnins-18-1508747-g007.tif"/>
</fig>
<p>Extensive experiments have found that most drowsy samples typically contain a high ratio of theta and delta waves, e.g., as shown in <xref ref-type="fig" rid="fig6">Figure 6A</xref>, or alpha waves, e.g., as shown in <xref ref-type="fig" rid="fig6">Figure 6B</xref>. From the visualization of amplitude fluctuation and relative power displayed in <xref ref-type="fig" rid="fig6">Figure 6A</xref>, we have discovered that both methods have captured several episodes, which contain rhythmic bursts of slow waves in the theta-delta band, which is identified as strong evidence for drowsy classification. Actually, these bursts located in the theta-delta band are closely associated with drowsiness (<xref ref-type="bibr" rid="ref4">Britton et al., 2016</xref>). From the second sample displayed in <xref ref-type="fig" rid="fig6">Figure 6B</xref>, we found that the EEG signal in the alpha band has a stronger amplitude and higher relative power, characterized by the narrow frequency peaks. Compared to CompactCNN, our method has captured more regions of spindle-like structures in the alpha band from the central part of the input signal, recognized as strong evidence of drowsiness. It is well known that the captured alpha spindles in EEG signals have been demonstrated to be a strong indicator for recognizing fatigue driving (<xref ref-type="bibr" rid="ref41">Simon et al., 2011</xref>). Compared with CompactCNN in <xref ref-type="fig" rid="fig6">Figure 6</xref>, we can discover that our method have learned more discriminative effective features such as the slow theta-delta waves and alpha waves, to achieve higher classification accuracy.</p>
<p>It can be seen that most alert samples typically contain a high ratio of beta waves, e.g., as shown in <xref ref-type="fig" rid="fig7">Figure 7A</xref>, or a high ratio of delta waves, e.g., as shown in <xref ref-type="fig" rid="fig7">Figure 7B</xref>. From <xref ref-type="fig" rid="fig7">Figure 7A</xref>, we can discover that both methods have captured some short EEG episodes containing lots of high-frequency beta waves as strong evidence for alertness. Notably, these identified beta waves were typically linked to active, busy, attention, or even electromyography (EMG) activities, which is known as the signals during wakeful state (<xref ref-type="bibr" rid="ref3">Baumeister et al., 2008</xref>; <xref ref-type="bibr" rid="ref22">Goncharova et al., 2003</xref>). Compared to CompactCNN, our method has not identified the pattern of one high amplitude peak wave that may be caused by eye blinking or eye movement, as the evidence of alertness. This suggests that our method is not only more resistant to the interference caused by the artifacts in the EEG signals, but it also achieves the superior classification result.</p>
<p>From <xref ref-type="fig" rid="fig7">Figure 7B</xref>, we can discover that both methods have captured these large-voltage and low-frequency waves (delta band) as strong evidence for alertness. With these discriminative invariant features found, both two methods have achieved high likelihoods. Due to the fact that delta waves are dominant during the deep sleep phase, they are more likely caused by sensor drifts or subject movements during wakeful state. In fact, these typical patterns in EEG signals including EMG and movements are the strongest indicators for wakefulness (<xref ref-type="bibr" rid="ref4">Britton et al., 2016</xref>). The visualization results prove that our method achieves higher classification accuracy with neurophysiologically reliable patterns found in single-channel EEG signals.</p>
</sec>
</sec>
</sec>
<sec id="sec19">
<label>4</label>
<title>Discussion and future work</title>
<sec id="sec20">
<label>4.1</label>
<title>Analysis of confusion matrixes</title>
<p>The confusion matrixes of the proposed ID3RSNet are shown in <xref ref-type="fig" rid="fig8">Figure 8</xref>. These matrixes were generated by summing the scores for each subject when serving as test data. In the case of the balanced dataset, <xref ref-type="fig" rid="fig8">Figure 8A</xref> shows that the model recognized alert and drowsy states with similar results, correctly predicting approximately 80% of each category. This indicates that the model has a relatively balanced ability to discriminate between different categories. In addition, <xref ref-type="fig" rid="fig8">Figure 8B</xref> reveals that the proposed method performed slightly better in recognizing the alert state compared to the drowsy state on the unbalanced dataset since there are more data with drowsy label than the alert. Furthermore, it can be also found that the achieved classification results of the proposed method tested on unbalanced dataset are inferior to the results tested on the balanced dataset, since the unbalanced dataset has 930 more unseen test EEG samples than the balanced dataset.</p>
<fig position="float" id="fig8">
<label>Figure 8</label>
<caption>
<p>Confounder matrix for two types of data. <bold>(A)</bold> Confusion matrix on the balanced dataset. <bold>(B)</bold> Confusion matrix on the unbalanced dataset.</p>
</caption>
<graphic xlink:href="fnins-18-1508747-g008.tif"/>
</fig>
</sec>
<sec id="sec21">
<label>4.2</label>
<title>Analysis of computational complexity</title>
<p>To verify the computational efficiency of the proposed ID3RSNet, we evaluate the testing time for each subject, the FLOPs and the number of parameters on the unbalanced dataset, and the mean training time for each subject on the balanced dataset for different lightweight deep learning methods. All experiments used the same platform with the same hardware configuration and the same software configuration, which are described in Section 3.2. <xref ref-type="table" rid="tab4">Table 4</xref> indicates that, comparing to the state-of-the-art lightweight methods, the proposed method achieves optimal performance with no significant difference in computational complexity. Even though the proposed model takes a little longer testing time for each subject than other baselines due to the extensive computation of weighting freezing (WF), this speed is still quite fast for driver drowsiness detection.</p>
<table-wrap position="float" id="tab4">
<label>Table 4</label>
<caption>
<p>Computation time, flops, and parameters with different lightweight deep learning methods.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Methods</th>
<th align="center" valign="top">ShallowConvNet</th>
<th align="center" valign="top">CNN-LSTM</th>
<th align="center" valign="top">CompactCNN</th>
<th align="center" valign="top">ID3RSNet</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">Training time (ms)</td>
<td align="center" valign="middle">7.5</td>
<td align="center" valign="middle">5.7</td>
<td align="center" valign="middle">2.2</td>
<td align="center" valign="middle"><bold>1.5</bold></td>
</tr>
<tr>
<td align="left" valign="middle">Testing time (ms)</td>
<td align="center" valign="middle">0.5</td>
<td align="center" valign="middle">0.7</td>
<td align="center" valign="middle"><bold>0.3</bold></td>
<td align="center" valign="middle">5.2</td>
</tr>
<tr>
<td align="left" valign="middle">Flops (M)</td>
<td align="center" valign="middle">50.5</td>
<td align="center" valign="middle">39.4</td>
<td align="center" valign="middle"><bold>32.9</bold></td>
<td align="center" valign="middle">44.0</td>
</tr>
<tr>
<td align="left" valign="middle">Parameters (K)</td>
<td align="center" valign="middle">4.3</td>
<td align="center" valign="middle">2.5</td>
<td align="center" valign="middle"><bold>2.2</bold></td>
<td align="center" valign="middle">5.6</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>Bold indicates optimum.</p>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="sec22">
<label>4.3</label>
<title>Analysis of different types of thresholds</title>
<p>There are two types of thresholds in residual shrinkage building unit (RSBU): channel-shared (CS) thresholds and channel-wise (CW) thresholds. The RSBU-CW differs from the RSBU-CS by applying an individual threshold to each feature map channel. As shown in <xref ref-type="table" rid="tab5">Table 5</xref>, the results demonstrate the effectiveness of channel-wise thresholds for each channel of the extracted feature map, which is adopted in our proposed ID3RSNet. This also indicates that the RSBU-CW is more effective for eliminating redundant information and automatically selecting important features comparing to the RSBU-CW.</p>
<table-wrap position="float" id="tab5">
<label>Table 5</label>
<caption>
<p>Classification performance tested on balanced and unbalanced dataset with the methods of two thresholds.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top" rowspan="2">Methods</th>
<th align="center" valign="top" colspan="2">Balanced dataset</th>
<th align="center" valign="top" colspan="2">Unbalanced dataset</th>
</tr>
<tr>
<th align="center" valign="top">Accuracy</th>
<th align="center" valign="top">F1-score</th>
<th align="center" valign="top">Accuracy</th>
<th align="center" valign="top">F1-score</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">CS</td>
<td align="center" valign="middle">76.80</td>
<td align="center" valign="middle"><bold>77.37</bold></td>
<td align="center" valign="middle">74.19</td>
<td align="center" valign="middle">71.20</td>
</tr>
<tr>
<td align="left" valign="middle">CW</td>
<td align="center" valign="middle"><bold>77.16</bold></td>
<td align="center" valign="middle">77.17</td>
<td align="center" valign="middle"><bold>74.72</bold></td>
<td align="center" valign="middle"><bold>71.27</bold></td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>Bold indicates optimum.</p>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="sec23">
<label>4.4</label>
<title>Considering federated transfer learning in future work</title>
<p>In this study, we explore a promising research topic that an inherently interpretable residual shrinkage network (ID3RSNet) is developed to improve classification performance with interpretable evidence for driver drowsiness detection. To further generalize well to unseen subjects in real-world scenarios, we must try to make further exploration to design transfer learning including unsupervised domain adaptation (UDA) methods, which have the potential in mitigating domain discrepancies among different subjects (<xref ref-type="bibr" rid="ref19">Gao et al., 2023a</xref>). It is important to note that EEG data contain rich privacy information from each individual, posing a potential risk for privacy leakage when sharing personal source data for training. Federated learning (<xref ref-type="bibr" rid="ref38">Rao et al., 2024</xref>) that can jointly deploy the deep learning model in the edge devices may address the problem of protecting data privacy and security. In the future, we will focus on the optimized solution of federated transfer learning for driver drowsiness detection with single-channel EEG. With private data for training provided by different edge devices and aligned feature distribution, this solution will further improve generalization performance for privacy-preserving drowsiness detection of source subjects.</p>
</sec>
</sec>
<sec sec-type="conclusions" id="sec24">
<label>5</label>
<title>Conclusion</title>
<p>In this study, we propose a novel interpretable residual shrinkage network (ID3RSNet) for cross-subject driver drowsiness detection with single-channel EEG. Soft thresholding and attention mechanisms integrated into the residual shrinkage network are applied to automatically enhance the representation ability of important features. In addition, both the GAP layer and the WF regularization approach are utilized to further improve classification performance. With the inherently interpretable network structure designed, we propose an EEG-based class activation map (ECAM) interpretable method to visualize discriminable common patterns of single-channel EEG signals that contribute significantly to classification. Extensive experimental results indicate that our interpretable model with neurophysiologically reliable evidences, e.g., alpha spindles and theta-delta bursts, achieves the current state-of-the-art performance. Moreover, this study also provides insight into the development of portable single-channel EEG devices with interpretable neural networks for driver drowsiness detection in real-life scenarios.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="sec25">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec sec-type="author-contributions" id="sec26">
<title>Author contributions</title>
<p>XF: Conceptualization, Data curation, Formal analysis, Investigation, Methodology, Resources, Software, Supervision, Validation, Visualization, Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing. ZG: Data curation, Formal analysis, Funding acquisition, Supervision, Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing. SK: Writing &#x2013; review &#x0026; editing.</p>
</sec>
<sec sec-type="funding-information" id="sec27">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research, authorship, and/or publication of this article. This research was supported in part by the Natural Science Foundation of Henan Province under Grant No. 222300420379, the Key Scientific Research Project of Higher Education of Henan Province under Grant 25B510002, the Science Technology Research Program of Chongqing Municipal Education Commission under Grant No. KJQN202300225, and the Chongqing Postdoctoral International Exchange and Training Program.</p>
</sec>
<sec sec-type="COI-statement" id="sec28">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="sec29">
<title>Generative AI statement</title>
<p>The authors declare that no Gen AI was used in the creation of this manuscript.</p>
</sec>
<sec sec-type="disclaimer" id="sec30">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="ref1"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Autthasan</surname> <given-names>P.</given-names></name> <name><surname>Chaisaen</surname> <given-names>R.</given-names></name> <name><surname>Sudhawiyangkul</surname> <given-names>T.</given-names></name> <name><surname>Rangpong</surname> <given-names>P.</given-names></name> <name><surname>Kiatthaveephong</surname> <given-names>S.</given-names></name> <name><surname>Dilokthanakul</surname> <given-names>N.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>MIN2Net: end-to-end multi-task learning for subject-independent motor imagery EEG classification</article-title>. <source>IEEE Trans. Biomed. Eng.</source> <volume>69</volume>, <fpage>2105</fpage>&#x2013;<lpage>2118</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TBME.2021.3137184</pub-id>, PMID: <pub-id pub-id-type="pmid">34932469</pub-id></citation></ref>
<ref id="ref2"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Balam</surname> <given-names>V. P.</given-names></name> <name><surname>Chinara</surname> <given-names>S.</given-names></name></person-group> (<year>2021</year>). <article-title>Statistical channel selection method for detecting drowsiness through single-channel EEG-based BCI system</article-title>. <source>IEEE Trans. Instrum. Meas.</source> <volume>70</volume>, <fpage>1</fpage>&#x2013;<lpage>9</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TIM.2021.3094619</pub-id></citation></ref>
<ref id="ref3"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Baumeister</surname> <given-names>J.</given-names></name> <name><surname>Barthel</surname> <given-names>T.</given-names></name> <name><surname>Geiss</surname> <given-names>K. R.</given-names></name> <name><surname>Weiss</surname> <given-names>M.</given-names></name></person-group> (<year>2008</year>). <article-title>Influence of phosphatidylserine on cognitive performance and cortical activity after induced stress</article-title>. <source>Nutr. Neurosci.</source> <volume>11</volume>, <fpage>103</fpage>&#x2013;<lpage>110</lpage>. doi: <pub-id pub-id-type="doi">10.1179/147683008X301478</pub-id></citation></ref>
<ref id="ref4"><citation citation-type="book"><person-group person-group-type="author"><name><surname>Britton</surname> <given-names>J. W.</given-names></name> <name><surname>Frey</surname> <given-names>L. C.</given-names></name> <name><surname>Hopp</surname> <given-names>J. L.</given-names></name> <name><surname>Korb</surname> <given-names>P.</given-names></name> <name><surname>Lievens</surname> <given-names>W. E.</given-names></name> <name><surname>Pestana-Knight</surname> <given-names>E. M.</given-names></name> <etal/></person-group>. (<year>2016</year>). <source>Electroencephalography (EEG): An introductory text and atlas of normal and abnormal findings in adults, children, and infants</source>. <publisher-loc>Chicago, IL, USA</publisher-loc>: <publisher-name>American Epilepsy Society</publisher-name>.</citation></ref>
<ref id="ref5"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cao</surname> <given-names>Z.</given-names></name> <name><surname>Chuang</surname> <given-names>C.</given-names></name> <name><surname>King</surname> <given-names>J.</given-names></name> <name><surname>Lin</surname> <given-names>C. T.</given-names></name></person-group> (<year>2019</year>). <article-title>Multi-channel EEG recordings during a sustained-attention driving task</article-title>. <source>Sci. Data</source> <volume>6</volume>:<fpage>19</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41597-019-0027-4</pub-id>, PMID: <pub-id pub-id-type="pmid">30952963</pub-id></citation></ref>
<ref id="ref6"><citation citation-type="other"><person-group person-group-type="author"><name><surname>Cui</surname> <given-names>J.</given-names></name></person-group> (<year>2021a</year>). EEG driver drowsiness dataset (unbalanced) [online]. Available at: <ext-link xlink:href="https://figshare.com/articles/dataset/EEG_driver_drowsiness_dataset_unbalanced/16586957" ext-link-type="uri">https://figshare.com/articles/dataset/EEG_driver_drowsiness_dataset_unbalanced/16586957</ext-link> (Accessed September 8, 2021).</citation></ref>
<ref id="ref7"><citation citation-type="other"><person-group person-group-type="author"><name><surname>Cui</surname> <given-names>J.</given-names></name></person-group> (<year>2021b</year>). EEG driver drowsiness dataset [online]. Available at: <ext-link xlink:href="https://figshare.com/articles/dataset/EEG_driver_drowsiness_dataset/14273687" ext-link-type="uri">https://figshare.com/articles/dataset/EEG_driver_drowsiness_dataset/14273687</ext-link> (Accessed September 8, 2021).</citation></ref>
<ref id="ref8"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cui</surname> <given-names>J.</given-names></name> <name><surname>Jin</surname> <given-names>X.</given-names></name> <name><surname>Hu</surname> <given-names>H.</given-names></name> <name><surname>Zhu</surname> <given-names>L.</given-names></name> <name><surname>Ozawa</surname> <given-names>K.</given-names></name> <name><surname>Pan</surname> <given-names>G.</given-names></name> <etal/></person-group>. (<year>2022a</year>). <article-title>Dynamic distribution alignment with dual-subspace mapping for cross-subject driver mental state detection</article-title>. <source>IEEE Trans. Cogn. Dev. Syst.</source> <volume>14</volume>, <fpage>1705</fpage>&#x2013;<lpage>1716</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TCDS.2021.3137530</pub-id></citation></ref>
<ref id="ref9"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cui</surname> <given-names>J.</given-names></name> <name><surname>Lan</surname> <given-names>Z.</given-names></name> <name><surname>Liu</surname> <given-names>Y.</given-names></name> <name><surname>Li</surname> <given-names>R.</given-names></name> <name><surname>Li</surname> <given-names>F.</given-names></name> <name><surname>Sourina</surname> <given-names>O.</given-names></name> <etal/></person-group>. (<year>2022b</year>). <article-title>A compact and interpretable convolutional neural network for cross-subject driver drowsiness detection from single-channel EEG</article-title>. <source>Methods</source> <volume>202</volume>, <fpage>173</fpage>&#x2013;<lpage>184</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.ymeth.2021.04.017</pub-id>, PMID: <pub-id pub-id-type="pmid">33901644</pub-id></citation></ref>
<ref id="ref10"><citation citation-type="other"><person-group person-group-type="author"><name><surname>Cui</surname> <given-names>J.</given-names></name> <name><surname>Lan</surname> <given-names>Z.</given-names></name> <name><surname>Liu</surname> <given-names>Y.</given-names></name> <name><surname>Zheng</surname> <given-names>T.</given-names></name> <name><surname>Liu</surname> <given-names>Y.</given-names></name> <name><surname>Sourina</surname> <given-names>O.</given-names></name> <etal/></person-group>. (<year>2021</year>). Subject-independent drowsiness recognition from Single-Channel EEG with an interpretable CNN-LSTM model. International Conference on Cyberworlds (CW), pp. 201&#x2013;208. doi: <pub-id pub-id-type="doi">10.48550/arXiv.2112.10894</pub-id></citation></ref>
<ref id="ref11"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cui</surname> <given-names>J.</given-names></name> <name><surname>Lan</surname> <given-names>Z.</given-names></name> <name><surname>Sourina</surname> <given-names>O.</given-names></name> <name><surname>M&#x00FC;ller-Wittig</surname> <given-names>W.</given-names></name></person-group> (<year>2022c</year>). <article-title>EEG-based cross-subject driver drowsiness recognition with an interpretable convolutional neural network</article-title>. <source>IEEE Trans. Neural Netw. Learn. Syst.</source> <volume>34</volume>, <fpage>7921</fpage>&#x2013;<lpage>7933</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TNNLS.2022.3147208</pub-id>, PMID: <pub-id pub-id-type="pmid">35171778</pub-id></citation></ref>
<ref id="ref12"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cui</surname> <given-names>J.</given-names></name> <name><surname>Yuan</surname> <given-names>L.</given-names></name> <name><surname>Wang</surname> <given-names>Z.</given-names></name> <name><surname>Li</surname> <given-names>R.</given-names></name> <name><surname>Jiang</surname> <given-names>T.</given-names></name></person-group> (<year>2023</year>). <article-title>Towards best practice of interpreting deep learning models for EEG-based brain computer interfaces</article-title>. <source>Front. Comput. Neurosci.</source> <volume>17</volume>:<fpage>1232925</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fncom.2023.1232925</pub-id>, PMID: <pub-id pub-id-type="pmid">37663037</pub-id></citation></ref>
<ref id="ref13"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Di Flumeri</surname> <given-names>G.</given-names></name> <name><surname>Ronca</surname> <given-names>V.</given-names></name> <name><surname>Giorgi</surname> <given-names>A.</given-names></name> <name><surname>Vozzi</surname> <given-names>A.</given-names></name> <name><surname>Aric&#x00F2;</surname> <given-names>P.</given-names></name> <name><surname>Sciaraffa</surname> <given-names>N.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>EEG-based index for timely detecting user&#x2019;s drowsiness occurrence in automotive applications</article-title>. <source>Front. Hum. Neurosci.</source> <volume>16</volume>:<fpage>866118</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fnhum.2022.866118</pub-id>, PMID: <pub-id pub-id-type="pmid">35669201</pub-id></citation></ref>
<ref id="ref14"><citation citation-type="other"><person-group person-group-type="author"><name><surname>Ding</surname> <given-names>S.</given-names></name> <name><surname>Yuan</surname> <given-names>Z.</given-names></name> <name><surname>An</surname> <given-names>P.</given-names></name> <name><surname>Xue</surname> <given-names>G.</given-names></name> <name><surname>Sun</surname> <given-names>W.</given-names></name> <name><surname>Zhao</surname> <given-names>J.</given-names></name></person-group> (<year>2019</year>). Cascaded convolutional neural network with attention mechanism for Mobile EEG-based driver drowsiness detection system. IEEE International Conference on Bioinformatics and Biomedicine (BIBM), pp. 1457&#x2013;1464. doi: <pub-id pub-id-type="doi">10.1109/BIBM47256.2019.8982938</pub-id></citation></ref>
<ref id="ref15"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Divvala</surname> <given-names>C.</given-names></name> <name><surname>Mishra</surname> <given-names>M.</given-names></name></person-group> (<year>2024</year>). <article-title>Deep learning-based attention mechanism for automatic drowsiness detection using EEG signal</article-title>. <source>IEEE Sens. Lett.</source> <volume>8</volume>, <fpage>1</fpage>&#x2013;<lpage>4</lpage>. doi: <pub-id pub-id-type="doi">10.1109/LSENS.2024.3363735</pub-id>, PMID: <pub-id pub-id-type="pmid">39573497</pub-id></citation></ref>
<ref id="ref16"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Eldele</surname> <given-names>E.</given-names></name> <name><surname>Chen</surname> <given-names>Z.</given-names></name> <name><surname>Liu</surname> <given-names>C.</given-names></name> <name><surname>Wu</surname> <given-names>M.</given-names></name> <name><surname>Kwoh</surname> <given-names>C. K.</given-names></name> <name><surname>Li</surname> <given-names>X.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>An attention-based deep learning approach for sleep stage classification with Single-Channel EEG</article-title>. <source>IEEE Trans. Neural Syst. Rehabil. Eng.</source> <volume>29</volume>, <fpage>809</fpage>&#x2013;<lpage>818</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TNSRE.2021.3076234</pub-id>, PMID: <pub-id pub-id-type="pmid">33909566</pub-id></citation></ref>
<ref id="ref17"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fahimi</surname> <given-names>F.</given-names></name> <name><surname>Zhang</surname> <given-names>Z.</given-names></name> <name><surname>Goh</surname> <given-names>W.</given-names></name> <name><surname>Lee</surname> <given-names>T. S.</given-names></name> <name><surname>Ang</surname> <given-names>K.</given-names></name> <name><surname>Guan</surname> <given-names>C.</given-names></name></person-group> (<year>2019</year>). <article-title>Inter-subject transfer learning with an end-to-end deep convolutional neural network for EEG-based BCI</article-title>. <source>J. Neural Eng.</source> <volume>16</volume>:<fpage>026007</fpage>. doi: <pub-id pub-id-type="doi">10.1088/1741-2552/aaf3f6</pub-id>, PMID: <pub-id pub-id-type="pmid">30524056</pub-id></citation></ref>
<ref id="ref18"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fu</surname> <given-names>G.</given-names></name> <name><surname>Zhou</surname> <given-names>Y.</given-names></name> <name><surname>Gong</surname> <given-names>P.</given-names></name> <name><surname>Wang</surname> <given-names>P.</given-names></name> <name><surname>Shao</surname> <given-names>W.</given-names></name> <name><surname>Zhang</surname> <given-names>D.</given-names></name></person-group> (<year>2023</year>). <article-title>A temporal-spectral fused and attention-based deep model for automatic sleep staging</article-title>. <source>IEEE Trans. Neural Syst. Rehabil. Eng.</source> <volume>31</volume>, <fpage>1008</fpage>&#x2013;<lpage>1018</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TNSRE.2023.3238852</pub-id>, PMID: <pub-id pub-id-type="pmid">37022069</pub-id></citation></ref>
<ref id="ref19"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gao</surname> <given-names>Y.</given-names></name> <name><surname>Liu</surname> <given-names>Y.</given-names></name> <name><surname>She</surname> <given-names>Q.</given-names></name> <name><surname>Zhang</surname> <given-names>J.</given-names></name></person-group> (<year>2023a</year>). <article-title>Domain adaptive algorithm based on multi-manifold embedded distributed alignment for brain-computer interfaces</article-title>. <source>IEEE J. Biomed. Health Inform.</source> <volume>27</volume>, <fpage>296</fpage>&#x2013;<lpage>307</lpage>. doi: <pub-id pub-id-type="doi">10.1109/JBHI.2022.3218453</pub-id>, PMID: <pub-id pub-id-type="pmid">36315544</pub-id></citation></ref>
<ref id="ref20"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gao</surname> <given-names>Y.</given-names></name> <name><surname>Liu</surname> <given-names>A.</given-names></name> <name><surname>Wang</surname> <given-names>L.</given-names></name> <name><surname>Qian</surname> <given-names>R.</given-names></name> <name><surname>Chen</surname> <given-names>X.</given-names></name></person-group> (<year>2023b</year>). <article-title>A self-interpretable deep learning model for seizure prediction using a multi-scale prototypical part network</article-title>. <source>IEEE Trans. Neural Syst. Rehabil. Eng.</source> <volume>31</volume>, <fpage>1847</fpage>&#x2013;<lpage>1856</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TNSRE.2023.3260845</pub-id>, PMID: <pub-id pub-id-type="pmid">37030672</pub-id></citation></ref>
<ref id="ref21"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gao</surname> <given-names>Z.</given-names></name> <name><surname>Wang</surname> <given-names>X.</given-names></name> <name><surname>Yang</surname> <given-names>Y.</given-names></name> <name><surname>Mu</surname> <given-names>C.</given-names></name> <name><surname>Cai</surname> <given-names>Q.</given-names></name> <name><surname>Dang</surname> <given-names>W.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>EEG-based spatio-temporal convolutional neural network for driver fatigue evaluation</article-title>. <source>IEEE Trans. Neural Netw. Learn. Syst.</source> <volume>30</volume>, <fpage>2755</fpage>&#x2013;<lpage>2763</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TNNLS.2018.2886414</pub-id>, PMID: <pub-id pub-id-type="pmid">30640634</pub-id></citation></ref>
<ref id="ref22"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Goncharova</surname> <given-names>I. I.</given-names></name> <name><surname>McFarland</surname> <given-names>D. J.</given-names></name> <name><surname>Vaughan</surname> <given-names>T. M.</given-names></name> <name><surname>Wolpaw</surname> <given-names>J. R.</given-names></name></person-group> (<year>2003</year>). <article-title>EMG contamination of EEG: spectral and topographical characteristics</article-title>. <source>Clin. Neurophysiol.</source> <volume>114</volume>, <fpage>1580</fpage>&#x2013;<lpage>1593</lpage>. doi: <pub-id pub-id-type="doi">10.1016/S1388-2457(03)00093-2</pub-id>, PMID: <pub-id pub-id-type="pmid">12948787</pub-id></citation></ref>
<ref id="ref23"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gong</surname> <given-names>P.</given-names></name> <name><surname>Wang</surname> <given-names>P.</given-names></name> <name><surname>Zhou</surname> <given-names>Y.</given-names></name> <name><surname>Wen</surname> <given-names>X.</given-names></name> <name><surname>Zhang</surname> <given-names>D.</given-names></name></person-group> (<year>2024</year>). <article-title>TFAC-net: a temporal-Frequential attentional convolutional network for driver drowsiness recognition with Single-Channel EEG[J]</article-title>. <source>IEEE Trans. Intell. Transp. Syst.</source> <volume>25</volume>, <fpage>7004</fpage>&#x2013;<lpage>7016</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TITS.2023.3347075</pub-id></citation></ref>
<ref id="ref24"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hu</surname> <given-names>J.</given-names></name> <name><surname>Min</surname> <given-names>J.</given-names></name></person-group> (<year>2018</year>). <article-title>Automated detection of driver fatigue based on EEG signals using gradient boosting decision tree model</article-title>. <source>Cogn. Neurodyn.</source> <volume>12</volume>, <fpage>431</fpage>&#x2013;<lpage>440</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s11571-018-9485-1</pub-id>, PMID: <pub-id pub-id-type="pmid">30137879</pub-id></citation></ref>
<ref id="ref25"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hu</surname> <given-names>J.</given-names></name> <name><surname>Shen</surname> <given-names>L.</given-names></name> <name><surname>Albanie</surname> <given-names>S.</given-names></name> <name><surname>Sun</surname> <given-names>G.</given-names></name> <name><surname>Wu</surname> <given-names>E.</given-names></name></person-group> (<year>2020</year>). <article-title>Squeeze-and-excitation networks</article-title>. <source>IEEE Trans. Pattern Anal. Mach. Intell.</source> <volume>42</volume>, <fpage>2011</fpage>&#x2013;<lpage>2023</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TPAMI.2019.2913372</pub-id>, PMID: <pub-id pub-id-type="pmid">31034408</pub-id></citation></ref>
<ref id="ref26"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hu</surname> <given-names>Y.</given-names></name> <name><surname>Yan</surname> <given-names>J.</given-names></name> <name><surname>Fang</surname> <given-names>F.</given-names></name> <name><surname>Wang</surname> <given-names>Y.</given-names></name></person-group> (<year>2023</year>). <article-title>Character encoding-based motor imagery EEG classification using CNN</article-title>. <source>IEEE Sens. Lett.</source> <volume>7</volume>, <fpage>1</fpage>&#x2013;<lpage>4</lpage>. doi: <pub-id pub-id-type="doi">10.1109/LSENS.2023.3320066</pub-id></citation></ref>
<ref id="ref27"><citation citation-type="other"><person-group person-group-type="author"><name><surname>Ioffe</surname> <given-names>S.</given-names></name> <name><surname>Szegedy</surname> <given-names>C.</given-names></name></person-group> (<year>2015</year>). Batch normalization: accelerating deep network training by reducing internal covariate shift. The 32nd International Conference on Machine Learning, pp. 448&#x2013;456. doi: <pub-id pub-id-type="doi">10.48550/arXiv.1502.03167</pub-id></citation></ref>
<ref id="ref28"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Joutsiniemi</surname> <given-names>S.-L.</given-names></name> <name><surname>Kaski</surname> <given-names>S.</given-names></name> <name><surname>Larsen</surname> <given-names>T. A.</given-names></name></person-group> (<year>1995</year>). <article-title>Self-organizing map in recognition of topographic patterns of EEG spectra</article-title>. <source>IEEE Trans. Biomed. Eng.</source> <volume>42</volume>, <fpage>1062</fpage>&#x2013;<lpage>1068</lpage>. doi: <pub-id pub-id-type="doi">10.1109/10.469372</pub-id>, PMID: <pub-id pub-id-type="pmid">7498909</pub-id></citation></ref>
<ref id="ref29"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lan</surname> <given-names>Z.</given-names></name> <name><surname>Zhao</surname> <given-names>J.</given-names></name> <name><surname>Liu</surname> <given-names>P.</given-names></name> <name><surname>Zhang</surname> <given-names>C.</given-names></name> <name><surname>Lyu</surname> <given-names>N.</given-names></name> <name><surname>Guo</surname> <given-names>L.</given-names></name></person-group> (<year>2024</year>). <article-title>Driving fatigue detection based on fusion of EEG and vehicle motion information</article-title>. <source>Biomed. Signal Process. Control</source> <volume>92</volume>:<fpage>106031</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.bspc.2024.106031</pub-id></citation></ref>
<ref id="ref30"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lawhern</surname> <given-names>V. J.</given-names></name> <name><surname>Solon</surname> <given-names>A. J.</given-names></name> <name><surname>Waytowich</surname> <given-names>N. R.</given-names></name> <name><surname>Gordon</surname> <given-names>S. M.</given-names></name> <name><surname>Hung</surname> <given-names>C. P.</given-names></name> <name><surname>Lance</surname> <given-names>B. J.</given-names></name></person-group> (<year>2018</year>). <article-title>EEGNet: a compact convolutional neural network for EEG-based brain&#x2013;computer interfaces</article-title>. <source>J. Neural Eng.</source> <volume>15</volume>:<fpage>056013</fpage>. doi: <pub-id pub-id-type="doi">10.1088/1741-2552/aace8c</pub-id>, PMID: <pub-id pub-id-type="pmid">29932424</pub-id></citation></ref>
<ref id="ref31"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname> <given-names>R.</given-names></name> <name><surname>Gao</surname> <given-names>R.</given-names></name> <name><surname>Yuan</surname> <given-names>L.</given-names></name> <name><surname>Suganthan</surname> <given-names>P. N.</given-names></name> <name><surname>Wang</surname> <given-names>L.</given-names></name> <name><surname>Sourina</surname> <given-names>O.</given-names></name></person-group> (<year>2023</year>). <article-title>An enhanced ensemble deep random vector functional link network for driver fatigue recognition</article-title>. <source>Eng. Appl. Artif. Intell.</source> <volume>123</volume>:<fpage>106237</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.engappai.2023.106237</pub-id></citation></ref>
<ref id="ref32"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>Y.</given-names></name> <name><surname>Xiang</surname> <given-names>Z.</given-names></name> <name><surname>Yan</surname> <given-names>Z.</given-names></name> <name><surname>Jin</surname> <given-names>J.</given-names></name> <name><surname>Shu</surname> <given-names>L.</given-names></name> <name><surname>Zhang</surname> <given-names>L.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>CEEMDAN fuzzy entropy based fatigue driving detection using single-channel EEG[J]</article-title>. <source>Biomed. Signal Process. Control</source> <volume>95</volume>:<fpage>106460</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.bspc.2024.106460</pub-id></citation></ref>
<ref id="ref33"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ogino</surname> <given-names>M.</given-names></name> <name><surname>Mitsukura</surname> <given-names>Y.</given-names></name></person-group> (<year>2018</year>). <article-title>Portable drowsiness detection through use of a prefrontal single-channel electroencephalogram</article-title>. <source>Sensors</source> <volume>18</volume>:<fpage>4477</fpage>. doi: <pub-id pub-id-type="doi">10.3390/s18124477</pub-id>, PMID: <pub-id pub-id-type="pmid">30567347</pub-id></citation></ref>
<ref id="ref34"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pal</surname> <given-names>N. R.</given-names></name> <name><surname>Chuang</surname> <given-names>C.</given-names></name> <name><surname>Ko</surname> <given-names>L. W.</given-names></name> <etal/></person-group>. (<year>2008</year>). <article-title>EEG-based subject-and session-independent drowsiness detection: an unsupervised approach</article-title>. <source>EURASIP J. Adv. Signal Process.</source> <volume>2008</volume>, <fpage>1</fpage>&#x2013;<lpage>11</lpage>. doi: <pub-id pub-id-type="doi">10.1155/2008/519480</pub-id></citation></ref>
<ref id="ref35"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Paulo</surname> <given-names>J. R.</given-names></name> <name><surname>Pires</surname> <given-names>G.</given-names></name> <name><surname>Nunes</surname> <given-names>U. J.</given-names></name></person-group> (<year>2021</year>). <article-title>Cross-subject zero calibration Driver&#x2019;s drowsiness detection: exploring spatiotemporal image encoding of EEG signals for convolutional neural network classification</article-title>. <source>IEEE Trans. Neural Syst. Rehabil. Eng.</source> <volume>29</volume>, <fpage>905</fpage>&#x2013;<lpage>915</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TNSRE.2021.3079505</pub-id>, PMID: <pub-id pub-id-type="pmid">33979288</pub-id></citation></ref>
<ref id="ref36"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Perkins</surname> <given-names>E.</given-names></name> <name><surname>Sitaula</surname> <given-names>C.</given-names></name> <name><surname>Burke</surname> <given-names>M.</given-names></name> <name><surname>Marzbanrad</surname> <given-names>F.</given-names></name></person-group> (<year>2023</year>). <article-title>Challenges of driver drowsiness prediction: the remaining steps to implementation</article-title>. <source>IEEE Trans. Intell. Veh.</source> <volume>8</volume>, <fpage>1319</fpage>&#x2013;<lpage>1338</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TIV.2022.3224690</pub-id></citation></ref>
<ref id="ref37"><citation citation-type="other"><person-group person-group-type="author"><name><surname>Petsiuk</surname> <given-names>V.</given-names></name> <name><surname>Das</surname> <given-names>A.</given-names></name> <name><surname>Saenko</surname> <given-names>K.</given-names></name></person-group> (<year>2018</year>). Rise: randomized input sampling for explanation of black-box models. arXiv preprint arXiv:1806.07421.</citation></ref>
<ref id="ref38"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rao</surname> <given-names>B.</given-names></name> <name><surname>Zhang</surname> <given-names>J.</given-names></name> <name><surname>Wu</surname> <given-names>D.</given-names></name> <name><surname>Zhu</surname> <given-names>C.</given-names></name> <name><surname>Sun</surname> <given-names>X.</given-names></name> <name><surname>Chen</surname> <given-names>B.</given-names></name></person-group> (<year>2024</year>). <article-title>Privacy inference attack and defense in centralized and federated learning: a comprehensive survey</article-title>. <source>IEEE Trans. Artif. Intell.</source> <volume>1</volume>, <fpage>1</fpage>&#x2013;<lpage>22</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TAI.2024.3363670</pub-id>, PMID: <pub-id pub-id-type="pmid">39573497</pub-id></citation></ref>
<ref id="ref39"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Reddy</surname> <given-names>Y. R. M.</given-names></name> <name><surname>Muralidhar</surname> <given-names>P.</given-names></name> <name><surname>Srinivas</surname> <given-names>M.</given-names></name></person-group> (<year>2024</year>). <article-title>An effective hybrid deep learning model for Single-Channel EEG-based subject-independent drowsiness recognition[J]</article-title>. <source>Brain Topogr.</source> <volume>37</volume>, <fpage>1</fpage>&#x2013;<lpage>18</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s10548-023-01016-0</pub-id>, PMID: <pub-id pub-id-type="pmid">37995000</pub-id></citation></ref>
<ref id="ref40"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Schirrmeister</surname> <given-names>R. T.</given-names></name> <name><surname>Springenberg</surname> <given-names>J. T.</given-names></name> <name><surname>Fiederer</surname> <given-names>L. D. J.</given-names></name> <name><surname>Glasstetter</surname> <given-names>M.</given-names></name> <name><surname>Eggensperger</surname> <given-names>K.</given-names></name> <name><surname>Tangermann</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>Deep learning with convolutional neural networks for EEG decoding and visualization</article-title>. <source>Hum. Brain Mapp.</source> <volume>38</volume>, <fpage>5391</fpage>&#x2013;<lpage>5420</lpage>. doi: <pub-id pub-id-type="doi">10.1002/hbm.23730</pub-id>, PMID: <pub-id pub-id-type="pmid">28782865</pub-id></citation></ref>
<ref id="ref41"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Simon</surname> <given-names>M.</given-names></name> <name><surname>Schmidt</surname> <given-names>E. A.</given-names></name> <name><surname>Kincses</surname> <given-names>W. E.</given-names></name> <name><surname>Fritzsche</surname> <given-names>M.</given-names></name> <name><surname>Bruns</surname> <given-names>A.</given-names></name> <name><surname>Aufmuth</surname> <given-names>C.</given-names></name> <etal/></person-group>. (<year>2011</year>). <article-title>EEG alpha spindle measures as indicators of driver fatigue under real traffic conditions</article-title>. <source>Clin. Neurophysiol.</source> <volume>122</volume>, <fpage>1168</fpage>&#x2013;<lpage>1178</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.clinph.2010.10.044</pub-id>, PMID: <pub-id pub-id-type="pmid">21333592</pub-id></citation></ref>
<ref id="ref42"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wan</surname> <given-names>Z.</given-names></name> <name><surname>Li</surname> <given-names>M.</given-names></name> <name><surname>Liu</surname> <given-names>S.</given-names></name> <name><surname>Huang</surname> <given-names>J.</given-names></name> <name><surname>Tan</surname> <given-names>H.</given-names></name> <name><surname>Duan</surname> <given-names>W.</given-names></name></person-group> (<year>2023</year>). <article-title>EEGformer: a transformer&#x2013;based brain activity classification method using EEG signal</article-title>. <source>Front. Neurosci.</source> <volume>17</volume>:<fpage>1148855</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fnins.2023.1148855</pub-id>, PMID: <pub-id pub-id-type="pmid">37034169</pub-id></citation></ref>
<ref id="ref43"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>Y.</given-names></name> <name><surname>Su</surname> <given-names>H.</given-names></name> <name><surname>Zhang</surname> <given-names>B.</given-names></name> <name><surname>Hu</surname> <given-names>X.</given-names></name></person-group> (<year>2020</year>). <article-title>Interpret neural networks by extracting critical subnetworks</article-title>. <source>IEEE Trans. Image Process.</source> <volume>29</volume>, <fpage>6707</fpage>&#x2013;<lpage>6720</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TIP.2020.2993098</pub-id>, PMID: <pub-id pub-id-type="pmid">32406837</pub-id></citation></ref>
<ref id="ref44"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>You</surname> <given-names>F.</given-names></name> <name><surname>Gong</surname> <given-names>Y.</given-names></name> <name><surname>Tu</surname> <given-names>H.</given-names></name> <name><surname>Liang</surname> <given-names>J.</given-names></name> <name><surname>Wang</surname> <given-names>H.</given-names></name></person-group> (<year>2020</year>). <article-title>A fatigue driving detection algorithm based on facial motion information entropy</article-title>. <source>J. Adv. Transp.</source> <volume>2020</volume>, <fpage>1</fpage>&#x2013;<lpage>17</lpage>. doi: <pub-id pub-id-type="doi">10.1155/2020/8851485</pub-id>, PMID: <pub-id pub-id-type="pmid">39136038</pub-id></citation></ref>
<ref id="ref45"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>Y.</given-names></name> <name><surname>Guo</surname> <given-names>R.</given-names></name> <name><surname>Peng</surname> <given-names>Y.</given-names></name> <name><surname>Kong</surname> <given-names>W.</given-names></name> <name><surname>Nie</surname> <given-names>F.</given-names></name> <name><surname>Lu</surname> <given-names>B. L.</given-names></name></person-group> (<year>2022</year>). <article-title>An auto-weighting incremental random vector functional link network for EEG-based driving fatigue detection</article-title>. <source>IEEE Trans. Instrum. Meas.</source> <volume>71</volume>, <fpage>1</fpage>&#x2013;<lpage>14</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TIM.2022.3216409</pub-id>, PMID: <pub-id pub-id-type="pmid">39573497</pub-id></citation></ref>
<ref id="ref46"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>Z.</given-names></name> <name><surname>Ning</surname> <given-names>H.</given-names></name> <name><surname>Zhou</surname> <given-names>F.</given-names></name></person-group> (<year>2022</year>). <article-title>A systematic survey of driving fatigue monitoring</article-title>. <source>IEEE Trans. Intell. Transp. Syst.</source> <volume>23</volume>, <fpage>19999</fpage>&#x2013;<lpage>20020</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TITS.2022.3189346</pub-id></citation></ref>
<ref id="ref47"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhao</surname> <given-names>M.</given-names></name> <name><surname>Zhong</surname> <given-names>S.</given-names></name> <name><surname>Fu</surname> <given-names>X.</given-names></name> <name><surname>Tang</surname> <given-names>B.</given-names></name> <name><surname>Pecht</surname> <given-names>M.</given-names></name></person-group> (<year>2020</year>). <article-title>Deep residual shrinkage networks for fault diagnosis</article-title>. <source>IEEE Trans. Industr. Inform.</source> <volume>16</volume>, <fpage>4681</fpage>&#x2013;<lpage>4690</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TII.2019.2943898</pub-id></citation></ref>
<ref id="ref48"><citation citation-type="other"><person-group person-group-type="author"><name><surname>Zhou</surname> <given-names>B.</given-names></name> <name><surname>Khosla</surname> <given-names>A.</given-names></name> <name><surname>Lapedriza</surname> <given-names>A.</given-names></name> <name><surname>Oliva</surname> <given-names>A.</given-names></name> <name><surname>Torralba</surname> <given-names>A.</given-names></name></person-group> (<year>2016</year>). Learning deep features for discriminative localization. IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2921&#x2013;2929. doi: <pub-id pub-id-type="doi">10.1109/CVPR.2016.319</pub-id></citation></ref>
</ref-list>
</back>
</article>