<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article article-type="research-article" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Digit. Health</journal-id><journal-title-group>
<journal-title>Frontiers in Digital Health</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Digit. Health</abbrev-journal-title></journal-title-group>
<issn pub-type="epub">2673-253X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fdgth.2026.1749570</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>A refined SMOTE-ENN optimization method based on machine learning for heart rate variability data classification</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author"><name><surname>Zhang</surname><given-names>Biao</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
<contrib contrib-type="author"><name><surname>Liang</surname><given-names>Muzi</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
<contrib contrib-type="author"><name><surname>Zhou</surname><given-names>Yuanlun</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/3333641/overview"/><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role></contrib>
<contrib contrib-type="author"><name><surname>Ji</surname><given-names>Binbin</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role></contrib>
<contrib contrib-type="author"><name><surname>Han</surname><given-names>Meng</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role></contrib>
<contrib contrib-type="author"><name><surname>Li</surname><given-names>Hongyan</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
<contrib contrib-type="author"><name><surname>Lang</surname><given-names>Xufeng</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/2684227/overview" /><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
<contrib contrib-type="author"><name><surname>Song</surname><given-names>Yihua</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref><uri xlink:href="https://loop.frontiersin.org/people/2409823/overview" /><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
<contrib contrib-type="author"><name><surname>Gao</surname><given-names>Run</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
<contrib contrib-type="author" corresp="yes"><name><surname>Zhou</surname><given-names>Zuojian</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="cor1">&#x002A;</xref><uri xlink:href="https://loop.frontiersin.org/people/2405513/overview" /><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
<contrib contrib-type="author" corresp="yes"><name><surname>Qiao</surname><given-names>Xuebin</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="cor1">&#x002A;</xref><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>School of Artificial Intelligence and Information Technology, Nanjing University of Chinese Medicine</institution>, <city>Nanjing</city>, <country country="cn">China</country></aff>
<aff id="aff2"><label>2</label><institution>Jiangsu Province Engineering Research Center of TCM Intelligence Health Service, Nanjing University of Chinese Medicine</institution>, <city>Nanjing</city>, <country country="cn">China</country></aff>
<aff id="aff3"><label>3</label><institution>The Affiliated Brain Hospital of Nanjing Medical University</institution>, <city>Nanjing</city>, <country country="cn">China</country></aff>
<author-notes>
<corresp id="cor1"><label>&#x002A;</label><bold>Correspondence:</bold> Zuojian Zhou <email xlink:href="mailto:zhouzj@njucm.edu.cn">zhouzj@njucm.edu.cn</email> Xuebin Qiao <email xlink:href="mailto:qiaoxb@njucm.edu.cn">qiaoxb@njucm.edu.cn</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-12"><day>12</day><month>02</month><year>2026</year></pub-date>
<pub-date publication-format="electronic" date-type="collection"><year>2026</year></pub-date>
<volume>8</volume><elocation-id>1749570</elocation-id>
<history>
<date date-type="received"><day>24</day><month>11</month><year>2025</year></date>
<date date-type="rev-recd"><day>31</day><month>12</month><year>2025</year></date>
<date date-type="accepted"><day>20</day><month>01</month><year>2026</year></date>
</history>
<permissions>
<copyright-statement>&#x00A9; 2026 Zhang, Liang, Zhou, Ji, Han, Li, Lang, Song, Gao, Zhou and Qiao.</copyright-statement>
<copyright-year>2026</copyright-year><copyright-holder>Zhang, Liang, Zhou, Ji, Han, Li, Lang, Song, Gao, Zhou and Qiao</copyright-holder><license><ali:license_ref start_date="2026-02-12">https://creativecommons.org/licenses/by/4.0/</ali:license_ref><license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p></license>
</permissions>
<abstract><sec><title>Introduction</title>
<p>The classification of imbalanced heart rate variability (HRV) data utilizing machine learning algorithms is of considerable significance for the early detection of depression. In this work, a refined SMOTE-ENN hybrid optimization method based on machine learning algorithms is proposed, which achieves precise classification of Autonomic Nervous System (ANS) states using imbalanced and limited HRV data.</p>
</sec><sec><title>Methods</title>
<p>The refined Synthetic Minority Over-sampling Technique generates new minority class samples through in-line and off-line linear interpolation. Subsequently, the refined Edited Nearest Neighbor under-sampling algorithm is employed to remove most of the noisy data while retaining selected boundary data to reduce the overfitting risk. Four machine learning algorithms are employed to classify the optimized HRV data using the refined SMOTE-ENN method from 321 participants, including support vector machine, random forest, neural network, and K-nearest neighbors.</p>
</sec><sec><title>Results</title>
<p>The results indicate that the classification accuracy of all four methods surpasses 91&#x0025;, with the AUC (Area Under the Curve) values exceeding 0.92 following the refined SMOTE-ENN optimization. In comparison to the classification results with classical SMOTE optimization of the four machine learning algorithms, the mean accuracy, precision, recall, and F1 score improved by 0.12, 0.12, 0.10 and 0.11, respectively. Feature importance analysis reveals that SDNN (standard deviation of NN intervals) has the most significant impact on HRV classification results, reflecting its influence on ANS.</p>
</sec><sec><title>Discussion</title>
<p>The refined SMOTE-ENN optimization method enhances the detection performance of the machine learning algorithms for classifying imbalanced HRV data, providing valuable technical support for the early detection of depression.</p>
</sec>
</abstract>
<kwd-group>
<kwd>depression detection</kwd>
<kwd>heart rate variability</kwd>
<kwd>imbalanced data</kwd>
<kwd>machine learning</kwd>
<kwd>over-sampling technique</kwd>
</kwd-group><funding-group><funding-statement>The author(s) declared that financial support was received for this work and/or its publication. This research is funded by the Jiangsu Province Engineering Research Center of TCM Intelligence Health Service Open Project (ZHZYY202404).</funding-statement></funding-group><counts>
<fig-count count="6"/>
<table-count count="8"/><equation-count count="6"/><ref-count count="30"/><page-count count="12"/><word-count count="0"/></counts><custom-meta-group><custom-meta><meta-name>section-at-acceptance</meta-name><meta-value>Health Informatics</meta-value></custom-meta></custom-meta-group>
</article-meta>
</front>
<body><sec id="s1" sec-type="intro"><label>1</label><title>Introduction</title>
<p>Depression can be seriously harmful to one&#x0027;s physical and mental health (<xref ref-type="bibr" rid="B1">1</xref>, <xref ref-type="bibr" rid="B2">2</xref>). Early detection of depression is crucial for timely intervention and for improving patient prognosis. Currently, the commonly used methods for early detection of depression are health questionnaires (<xref ref-type="bibr" rid="B3">3</xref>), sleep pattern analysis (<xref ref-type="bibr" rid="B4">4</xref>), and heart rate variability (HRV) analysis (<xref ref-type="bibr" rid="B5">5</xref>). Health questionnaires, such as the Patient Health Questionnaire (PHQ-9) and General Anxiety Disorder Scale (GAD-7), are widely used due to their simplicity and convenience (<xref ref-type="bibr" rid="B6">6</xref>). However, self-reported data are often subjective and susceptible to biases, such as patients&#x0027; preconceptions and memory recall. To reduce the subjectivity inherent in health questionnaires, sleep pattern analysis has been proposed as an alternative method for assessing mental health status (<xref ref-type="bibr" rid="B7">7</xref>). Nevertheless, sleep issues may arise from multiple causes, such as physical illness, lifestyle, and even the sleep environment. Additionally, the accuracy of sleep tracking by consumer wearable devices remains limited, so the reliability of sleep pattern analysis needs further improvement (<xref ref-type="bibr" rid="B8">8</xref>).</p>
<p>In recent years, HRV has gained increasing attention as an objective and quantifiable method for early depression detection (<xref ref-type="bibr" rid="B9">9</xref>). Compared with health questionnaires and sleep pattern analysis, HRV has the advantages of objectivity and easy quantification, so it has been widely applied in depression detection. HRV refers to the variability of the time interval between consecutive heartbeats and is an important indicator of autonomic nervous system (ANS) function (<xref ref-type="bibr" rid="B10">10</xref>). It reflects the balance between the sympathetic nervous system (SNS) and the parasympathetic nervous system (PNS). Lower HRV is usually associated with increased SNS activity and decreased PNS activity (<xref ref-type="bibr" rid="B9">9</xref>). In 2019, Hartmann et al. (<xref ref-type="bibr" rid="B11">11</xref>) studied the HRV between 62 depressive individuals without antidepressant medication and 65 healthy controls. The results revealed differences in HRV parameters between the depressive patients and the healthy controls at baseline, and the changes in HRV parameter values are correlated with changes in symptom severity of depression. In 2018, Koch et al. (<xref ref-type="bibr" rid="B12">12</xref>) conducted a comprehensive analysis of 21 studies on patients with major depression, including 2,250 depressive patients and 1,982 healthy controls. The results showed that all the measured HRV parameters of patients with major depression were lower than those of healthy controls. In 2023, Wu et al. (<xref ref-type="bibr" rid="B13">13</xref>) conducted a meta-analysis of 43 papers on depressive patients. The comprehensive analysis results showed that the HRV measurement parameters of depressed individuals were lower than those of healthy controls, except for LF/HF, suggesting that depressed people may be at a higher risk of cardiovascular diseases than healthy people.</p>
<p>Although HRV is closely related to depressive symptoms and can be used as an effective biological indicator for early detection of depression, imbalanced data usually exist when collecting HRV data from participants, especially when sample sizes are small (<xref ref-type="bibr" rid="B14">14</xref>). Imbalanced data indicates that the sample size of the majority class is much larger than the sample size of the minority class, and it is quite common in medical diagnosis (<xref ref-type="bibr" rid="B15">15</xref>). When the value of minority class samples overwhelms that of majority class samples, the effect of imbalanced data becomes particularly significant, such as in the detection of malignant tumors (<xref ref-type="bibr" rid="B16">16</xref>). However, traditional classification algorithms usually assume that the training dataset is balanced, or assume that the misclassification loss of different classes is the same (<xref ref-type="bibr" rid="B17">17</xref>, <xref ref-type="bibr" rid="B18">18</xref>), which is not applicable in imbalanced diagnosis data. Consequently, how to train machine learning algorithms that can effectively classify imbalanced HRV data remains an urgent problem (<xref ref-type="bibr" rid="B19">19</xref>).</p>
<p>To address this issue, data resampling techniques have been widely adopted to adjust the class distribution. Among them, the Synthetic Minority Oversampling Technique (SMOTE) and its variants have proven particularly effective in medical signal analysis. For instance, Hussain et al. (<xref ref-type="bibr" rid="B20">20</xref>) demonstrated that applying SMOTE to multimodal HRV features significantly improved the sensitivity and overall accuracy of congestive heart failure detection, verifying that synthetic oversampling can preserve the discriminative power of physiological signals. Beyond standard oversampling, hybrid strategies have also been proposed to further refine classification boundaries. Xu et al. (<xref ref-type="bibr" rid="B15">15</xref>) introduced a method combining SMOTE with Edited Nearest Neighbor (ENN), which not only balances the class distribution but also eliminates noisy samples from the majority class. Their results across various datasets suggest that such hybrid resampling is superior in handling the noise and overlap often found in complex medical data. These studies collectively indicate that data resampling is a viable strategy for improving the classification performance of imbalanced physiological data. Currently, studies on imbalanced data based on SMOTE typically use original datasets with large sample sizes (<xref ref-type="bibr" rid="B20">20</xref>&#x2013;<xref ref-type="bibr" rid="B22">22</xref>), generally ranging from several thousand to millions. However, its effectiveness in the specific context of depression detection using limited and sparse HRV samples remains to be fully explored.</p>
<p>In this work, a refined SMOTE-ENN hybrid optimization method based on machine learning algorithms is proposed to address the classification challenges of limited and imbalanced sample distribution. The refined Synthetic Minority Over-sampling Technique (r-SMOTE) generates new minority class samples by in-line and off-line linear interpolation. Afterwards, the refined Edited Nearest Neighbor (r-ENN) under-sampling method is employed to remove most of the noisy data and retain selected boundary data to reduce the overfitting risk. The performance of the refined SMOTE-ENN method is demonstrated by optimizing the imbalanced HRV data from 321 participants, with each record containing nine features. These data are divided into seven ANS states, with minority class samples comprising less than 5&#x0025; of the total data.</p>
<p>Four machine learning algorithms are employed to evaluate the classification performance after data optimization, including Support Vector Machine (SVM), Random Forest (RF), Neural Network (NN), and K-Nearest Neighbor (KNN). The classification results show that following refined SMOTE-ENN optimization, the overall classification accuracy of the four machine learning algorithms exceeds 91&#x0025;, with the AUC (area under the receiver operating characteristic curve) values surpassing 0.92. In comparison to the classification results with classical SMOTE optimization of the four machine learning algorithms, the mean accuracy, precision, recall, and F1 score improved by 0.12, 0.12, 0.10 and 0.11, respectively. The refined SMOTE-ENN method improves the detection performance of machine learning algorithms in classifying limited and imbalanced data, providing valuable technical support for the early detection of depression.</p>
<p>Currently, most public datasets focus on binary classification, e.g., Stress vs. Rest in WESAD, or Arrhythmia vs. Normal in MIT-BIH. To the best of our knowledge, there is no public dataset that provides the fine-grained, 7-category taxonomy of autonomic nervous system states used in this study. This scarcity actually highlights the novelty and clinical value of the proposed categorization system. On the other hand, classical SMOTE positions newly generated samples on the lines connecting the original samples in the sample space. The refined SMOTE-ENN introduces off-line interpolation, which expands the data space and increases the diversity of generated data through nonlinear transformations introduced by trigonometric functions, overcoming the limitations of linear interpolation in the traditional SMOTE method. The proposed method demonstrates good optimization capabilities for imbalanced datasets, especially when the dataset size is small.</p>
</sec>
<sec id="s2" sec-type="methods"><label>2</label><title>Materials and methods</title>
<sec id="s2a"><label>2.1</label><title>Samples and data</title>
<p>The samples were recruited from 321 participants (89 males and 232 females) between July and September of 2024 in the Affiliated Brain Hospital of Nanjing Medical University, Nanjing. The mean age is 33.16&#x2009;&#x00B1;&#x2009;15.41 years (mean&#x2009;&#x00B1;&#x2009;SD). The mean height, weight, and Body Mass Index (BMI) are 165.23&#x2009;&#x00B1;&#x2009;8.02&#x2005;cm, 62.52&#x2009;&#x00B1;&#x2009;13.44&#x2005;kg, and 22.78&#x2009;&#x00B1;&#x2009;3.89&#x2005;kg/m<sup>2</sup>, respectively. The experimental flowchart is shown in <xref ref-type="fig" rid="F1">Figure&#x00A0;1</xref>. The participants&#x0027; electrocardiogram (ECG) signals are filtered to remove baseline drift, myoelectric interference, and 50&#x2005;Hz power frequency noise. The filtered signals are then digitally recorded using a high-speed, high-precision analog-to-digital converter, and the participants&#x0027; information is anonymized.</p>
<fig id="F1" position="float"><label>Figure&#x00A0;1</label>
<caption><p>Overall flowchart of the experiment. The participants&#x0027; ECG signals are collected using the 6-lead method, and nine HRV parameters are calculated. The refined SMOTE-ENN method is applied to optimize the imbalanced HRV data, followed by the use of four machine learning algorithms to classify the optimized data.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-08-1749570-g001.tif"><alt-text content-type="machine-generated">Flowchart displaying a four-step process for classifying autonomic nervous system states. Step 1: ECG signal sampling and filtering. Step 2: Time and frequency analysis with feature extraction. Step 3: Data optimization using refined SMOTE-ENN. Step 4: Modeling and classification with four machine learning methods: SVM, RF, NN, and KNN. Each step is illustrated with relevant graphs and diagrams.</alt-text>
</graphic>
</fig>
<p>Frequency and time domain analysis methods are employed to calculate nine HRV parameters based on the ECG signals, which are used as the features of the training dataset, as shown in <xref ref-type="table" rid="T1">Table&#x00A0;1</xref>. These nine parameters possess strong scientific validity and representativeness in the field of HRV research. These nine features cover the core dimensions of both time and frequency domains, allowing for the characterization of the dynamic balance of the autonomic nervous system from multiple perspectives (<xref ref-type="bibr" rid="B19">19</xref>, <xref ref-type="bibr" rid="B23">23</xref>, <xref ref-type="bibr" rid="B24">24</xref>). Specifically, HR provides the basic physiological background; SDNN reflects the overall regulatory capacity of the autonomic nervous system; rmSSD and pNN50 focus on evaluating parasympathetic nerve activity. In frequency domain analysis, LF and HF allow researchers to distinguish the contributions of sympathetic and parasympathetic nerves; VLF is closely related to metabolic and thermoregulatory processes in long-term monitoring; TSP integrates the overall energy distribution, providing a global input for machine learning models.</p>
<table-wrap id="T1" position="float"><label>Table&#x00A0;1</label>
<caption><p>Nine features of the HRV dataset.</p></caption>
<table>
<colgroup>
<col align="left"/>
<col align="left"/>
<col align="left"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">Feature</th>
<th valign="top" align="center">Unit</th>
<th valign="top" align="center">Biological definition</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">HR</td>
<td valign="top" align="left">bpm</td>
<td valign="top" align="left">Heart rate</td>
</tr>
<tr>
<td valign="top" align="left">SDNN</td>
<td valign="top" align="left">ms</td>
<td valign="top" align="left">Standard deviation of NN (normal R-peaks) intervals</td>
</tr>
<tr>
<td valign="top" align="left">rmSSD</td>
<td valign="top" align="left">ms</td>
<td valign="top" align="left">Square root of the mean of the sum of the squares of differences between successive NN intervals</td>
</tr>
<tr>
<td valign="top" align="left">PNN50</td>
<td valign="top" align="left">&#x0025;</td>
<td valign="top" align="left">Proportion of the number of NN-interval difference of successive NN intervals which are greater than 50&#x2005;ms divided by the total number of NN intervals</td>
</tr>
<tr>
<td valign="top" align="left">VLF</td>
<td valign="top" align="left">ms<sup>2</sup></td>
<td valign="top" align="left">Very low frequency power (0.003&#x2013;0.04&#x2005;Hz)</td>
</tr>
<tr>
<td valign="top" align="left">LF</td>
<td valign="top" align="left">ms<sup>2</sup></td>
<td valign="top" align="left">Low frequency power (0.04&#x2013;0.15&#x2005;Hz)</td>
</tr>
<tr>
<td valign="top" align="left">HF</td>
<td valign="top" align="left">ms<sup>2</sup></td>
<td valign="top" align="left">High frequency power (0.15&#x2013;0.4&#x2005;Hz)</td>
</tr>
<tr>
<td valign="top" align="left">TSP</td>
<td valign="top" align="left">ms<sup>2</sup></td>
<td valign="top" align="left">Total spectrum power (&#x003C;0.4&#x2005;Hz)</td>
</tr>
<tr>
<td valign="top" align="left">LF/HF</td>
<td valign="top" align="left">&#x2013;</td>
<td valign="top" align="left">LF/HF ratio</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The ANS states of all participants are divided into seven classes, such as fatigue and sympathetic nervous system disorder, according to the participants&#x0027; past medical conditions and the doctors&#x0027; diagnosis records. The data labeling process employed a combination of the clinical diagnostic data and HRV data analysis (<xref ref-type="bibr" rid="B19">19</xref>, <xref ref-type="bibr" rid="B23">23</xref>, <xref ref-type="bibr" rid="B25">25</xref>). HRV data was collected in the hospital, and the participants had already undergone relevant clinical diagnoses, including: head-up tilt test, deep breathing test, valsalva maneuver, and clinical questionnaires. The following physiological and biochemical data were obtained: baseline blood pressure, BMI, age, medical history, cortisol, and catecholamines. Based on these diagnostic and physiological data, combined with HRV data, physicians categorized participants into seven groups. For example: Stress: high cortisol levels or high PSS-10 score; ANS Balance: low COMPASS-31 score, normal blood pressure, and no relevant medical history. These seven classes are used as the labels of the training dataset, as shown in <xref ref-type="table" rid="T2">Table&#x00A0;2</xref>.</p>
<table-wrap id="T2" position="float"><label>Table&#x00A0;2</label>
<caption><p>Seven labels of the HRV dataset.</p></caption>
<table>
<colgroup>
<col align="left"/>
<col align="left"/>
<col align="left"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">Label</th>
<th valign="top" align="center">Autonomic nervous system state</th>
<th valign="top" align="center">Description</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">1</td>
<td valign="top" align="left">Fatigue</td>
<td valign="top" align="left">Relative hyperactivity of parasympathetic nervous system after long-term stress or energy depletion</td>
</tr>
<tr>
<td valign="top" align="left">2</td>
<td valign="top" align="left">Sympathetic nervous system disorder</td>
<td valign="top" align="left">Sympathetic dysfunction, with parasympathetic dominance</td>
</tr>
<tr>
<td valign="top" align="left">3</td>
<td valign="top" align="left">Autonomic nervous system excitability</td>
<td valign="top" align="left">The overall activity levels of sympathetic and parasympathetic systems are elevated</td>
</tr>
<tr>
<td valign="top" align="left">4</td>
<td valign="top" align="left">Autonomic nervous system balance</td>
<td valign="top" align="left">Sympathetic and parasympathetic nervous are in a reasonable dynamic range</td>
</tr>
<tr>
<td valign="top" align="left">5</td>
<td valign="top" align="left">Dysautonomia</td>
<td valign="top" align="left">Autonomic nervous system disorder</td>
</tr>
<tr>
<td valign="top" align="left">6</td>
<td valign="top" align="left">Stress</td>
<td valign="top" align="left">Highly excited sympathetic nervous system</td>
</tr>
<tr>
<td valign="top" align="left">7</td>
<td valign="top" align="left">Vagus nerve disorder</td>
<td valign="top" align="left">Damage to parasympathetic function</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s2b"><label>2.2</label><title>Refined SMOTE-ENN optimization method</title>
<p>The HRV dataset is relatively limited and exhibits an unbalanced distribution across classes. The majority of participants fall into label 4 (ANS balance), comprising 195 cases, while label 2 (sympathetic nervous system disorder) contains only six cases. The r-SMOTE model based on the traditional SMOTE algorithm is proposed to address the extremely imbalanced sample distribution. SMOTE is a technique designed to address class imbalance by generating synthetic minority class samples, which is accomplished by interpolating between existing minority class samples. The first step in the proposed r-SMOTE is feature standardization. The nine HRV features are standardized using z-scores to eliminate discrepancies in dimensionality. Let <italic>z</italic> represent the standardized feature value, and the <xref ref-type="disp-formula" rid="disp-formula1">Equation 1</xref> is obtained:<disp-formula id="disp-formula1"><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="DM1"><mml:mrow><mml:mi mathvariant="bold-italic">z</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mfrac><mml:mrow><mml:mrow><mml:mi mathvariant="bold-italic">x</mml:mi></mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mrow><mml:mi mathvariant="bold-italic">&#x03BC;</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mi mathvariant="bold-italic">&#x03C3;</mml:mi></mml:mrow></mml:mfrac></mml:mrow></mml:mstyle></mml:math><label>(1)</label></disp-formula>where <italic>&#x00B5;</italic> is the mean and <italic>&#x03C3;</italic> is the standard deviation. This transformation ensures that the interpolation process in the r-SMOTE algorithm is not affected by scale differences in the feature space.</p>
<p>The second step of r-SMOTE involves synthesizing new samples through feature interpolation. For each minority class sample <italic>x</italic><sub>i</sub>, r-SMOTE calculates its nearest neighbors in the feature space. For example, if the neighbor parameter <italic>k</italic>&#x2009;&#x003D;&#x2009;3, r-SMOTE selects three nearest neighbor samples for each minority class sample and generates a new sample that fully utilizes the information from these neighboring samples. Given that the HRV data in the dataset consists of nine features in <xref ref-type="table" rid="T1">Table&#x00A0;1</xref>, the Euclidean distance between two samples <italic>x</italic><sub>i</sub>&#x2009;&#x003D;&#x2009;(<italic>x</italic><sub>i1</sub>, <italic>x</italic><sub>i2</sub>&#x2026; <italic>x</italic><sub>i9</sub>) and <italic>x</italic><sub>j</sub>&#x2009;&#x003D;&#x2009;(<italic>x</italic><sub>j1</sub>, <italic>x</italic><sub>j2</sub>&#x2026; <italic>x</italic><sub>j9</sub>) is defined by <xref ref-type="disp-formula" rid="disp-formula2">Equation 2</xref>:<disp-formula id="disp-formula2"><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="DM2"><mml:mi>d</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mrow><mml:mspace width="thickmathspace" /></mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mi>j</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:msqrt><mml:msubsup><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mn>9</mml:mn></mml:msubsup><mml:mrow><mml:msup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mspace width="thinmathspace" /><mml:mi>j</mml:mi><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mn>2</mml:mn></mml:msup></mml:mrow></mml:msqrt></mml:math><label>(2)</label></disp-formula>where <italic>x</italic><sub>ik</sub> and <italic>x</italic><sub>jk</sub> are the <italic>k</italic>th feature values of samples <italic>x</italic><sub>i</sub> and <italic>x</italic><sub>j</sub>, respectively. Synthetic samples are generated through in-line and off-line linear interpolation between the minority class sample and its neighboring samples. The synthetic sample <italic>x</italic><sub>new1</sub> of linear interpolation is defined by <xref ref-type="disp-formula" rid="disp-formula3">Equation 3</xref>:<disp-formula id="disp-formula3"><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="DM3"><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>n</mml:mi><mml:mi>e</mml:mi><mml:mi>w</mml:mi><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>+</mml:mo><mml:mi>&#x03BB;</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mi>j</mml:mi></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:math><label>(3)</label></disp-formula>where <italic>&#x03BB;</italic> is a random value in the range [&#x2212;1, 1]. This process ensures that the synthetic sample lies on the line crossing the original sample and its neighboring sample, thus maintaining the continuity and correlation of the data. Taking the sample generation of two features as an example, the in-line linear interpolation of r-SMOTE is shown in <xref ref-type="fig" rid="F2">Figure&#x00A0;2a</xref>.</p>
<fig id="F2" position="float"><label>Figure&#x00A0;2</label>
<caption><p>Illustration of the r-SMOTE <bold>(a)</bold> and r-ENN <bold>(b)</bold> principle. <bold>(a)</bold> r-SMOTE generates <italic>x</italic><sub>new1</sub> by the in-line linear interpolation and <italic>x</italic><sub>new2</sub> by the off-line linear interpolation. <bold>(b)</bold> r-ENN retains a small part of the data on the decision boundary to reduce the risk of overfitting. These data are indicated by the dashed boxes in the figure.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-08-1749570-g002.tif"><alt-text content-type="machine-generated">Diagram with two panels: (a) shows a geometric representation of data points \\(x_i\\) and \\(x_j\\) with vectors and angles, illustrating new positions \\(x_\\textnew1\\) and \\(x_\\textnew2\\) using parameters \\(d\\), \\(\\lambda d\\), and \\(\\theta\\). (b) displays a scatter plot of colored clusters in blue, green, and orange against Feature 1 and Feature 2 axes, highlighting specific points in dashed squares.</alt-text>
</graphic>
</fig>
<p>The synthetic sample <italic>x</italic><sub>new2</sub> generated by the off-line linear interpolation is located on the circle whose diameter is the line connecting <italic>x</italic><sub>i</sub> and <italic>x</italic><sub>j</sub>, as shown in <xref ref-type="fig" rid="F2">Figure&#x00A0;2a</xref>. The Euclidean distance between <italic>x</italic><sub>i</sub> and <italic>x</italic><sub>new2</sub> is <italic>d</italic>cos<italic>&#x03B8;</italic>, where <italic>&#x03B8;</italic> is a random value in the range [&#x2212;1/2&#x03C0;, 1/2&#x03C0;]. <italic>&#x03B8;</italic>&#x2009;&#x003D;&#x2009;&#x00B1;1/2&#x03C0; is defined when <italic>x</italic><sub>new2</sub> coincides with <italic>x</italic><sub>i</sub>, and <italic>&#x03B8;</italic>&#x2009;&#x003D;&#x2009;0 when <italic>x</italic><sub>new2</sub> coincides with <italic>x</italic><sub>j.</sub> The coordinate of <italic>x</italic><sub>new2</sub> in <xref ref-type="fig" rid="F2">Figure&#x00A0;2a</xref> can be calculated as <xref ref-type="disp-formula" rid="disp-formula4">Equation 4</xref>:<disp-formula id="disp-formula4"><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="DM4"><mml:mtable columnalign="right left" rowspacing=".5em" columnspacing="thickmathspace" displaystyle="true"><mml:mtr><mml:mtd /><mml:mtd><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>n</mml:mi><mml:mi>e</mml:mi><mml:mi>w</mml:mi><mml:mn>2</mml:mn><mml:mo>,</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:mi>d</mml:mi><mml:mrow><mml:mi mathvariant="normal">cos</mml:mi></mml:mrow><mml:mi>&#x03B8;</mml:mi><mml:mrow><mml:mi mathvariant="normal">cos</mml:mi></mml:mrow><mml:mi>&#x03C6;</mml:mi></mml:mtd></mml:mtr><mml:mtr><mml:mtd /><mml:mtd><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>n</mml:mi><mml:mi>e</mml:mi><mml:mi>w</mml:mi><mml:mn>2</mml:mn><mml:mo>,</mml:mo><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:mi>d</mml:mi><mml:mrow><mml:mi mathvariant="normal">cos</mml:mi></mml:mrow><mml:mi>&#x03B8;</mml:mi><mml:mrow><mml:mi mathvariant="normal">sin</mml:mi></mml:mrow><mml:mi>&#x03C6;</mml:mi></mml:mtd></mml:mtr><mml:mtr><mml:mtd /><mml:mtd><mml:mi>&#x03C6;</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:mi mathvariant="normal">arctan</mml:mi></mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mfrac><mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mspace width="thinmathspace" /><mml:mi>j</mml:mi><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mn>2</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mspace width="thinmathspace" /><mml:mi>j</mml:mi><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:mrow></mml:mfrac></mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mi>&#x03B8;</mml:mi></mml:mstyle></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(4)</label></disp-formula>where <italic>d</italic> is defined by Equation (<xref ref-type="disp-formula" rid="disp-formula2">2</xref>). The number of minority samples is effectively increased by in-line and off-line linear interpolation, and the synthetic samples retain the characteristics of the original samples.</p>
<p>Following the r-SMOTE, the r-ENN method is proposed to clean the data based on the traditional ENN (<xref ref-type="bibr" rid="B15">15</xref>). r-ENN identifies the <italic>k</italic>-nearest neighbor samples for each sample <italic>x</italic><sub>i</sub> in the dataset. If the category of sample <italic>x</italic><sub>i</sub> is consistent with the category determined by its <italic>k</italic>-neighbors, the sample is considered correctly classified and retained. A smaller value of <italic>k</italic> ensures that the synthetic samples are closer to the original samples, thus helping to remove noisy data. In this experiment, <italic>k</italic>&#x2009;&#x003D;&#x2009;3 is set for r-ENN. Conversely, if the category differs, the sample is considered to lie on the decision boundary and is prone to be misclassified, thus most of these data will be deleted. In contrast to traditional ENN, r-ENN retains a small part of data on the decision boundary to enhance the generalization ability and reduce the risk of overfitting, and these retained boundary data are shown in the dashed box in <xref ref-type="fig" rid="F2">Figure&#x00A0;2b</xref>. In this experiment, 10&#x0025; to 20&#x0025; of boundary data are retained.</p>
<p>As a result of applying the refined SMOTE-ENN optimization method, the total number of HRV data samples increased from 321 to 1,172, and the distribution of data across each label is shown in <xref ref-type="table" rid="T3">Table&#x00A0;3</xref>.</p>
<table-wrap id="T3" position="float"><label>Table&#x00A0;3</label>
<caption><p>Original data and refined SMOTE-ENN optimization data.</p></caption>
<table>
<colgroup>
<col align="left"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">ANS states</th>
<th valign="top" align="center">Samples of original data</th>
<th valign="top" align="center">Samples of refined SMOTE-ENN optimization data</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Fatigue</td>
<td valign="top" align="center">63</td>
<td valign="top" align="center">142</td>
</tr>
<tr>
<td valign="top" align="left">Sympathetic nervous system disorder</td>
<td valign="top" align="center">6</td>
<td valign="top" align="center">209</td>
</tr>
<tr>
<td valign="top" align="left">Autonomic nervous system excitability</td>
<td valign="top" align="center">9</td>
<td valign="top" align="center">172</td>
</tr>
<tr>
<td valign="top" align="left">Autonomic nervous system balance</td>
<td valign="top" align="center">195</td>
<td valign="top" align="center">125</td>
</tr>
<tr>
<td valign="top" align="left">Dysautonomia</td>
<td valign="top" align="center">12</td>
<td valign="top" align="center">175</td>
</tr>
<tr>
<td valign="top" align="left">Stress</td>
<td valign="top" align="center">24</td>
<td valign="top" align="center">166</td>
</tr>
<tr>
<td valign="top" align="left">Vagus nerve disorder</td>
<td valign="top" align="center">12</td>
<td valign="top" align="center">183</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s2c"><label>2.3</label><title>Model construction</title>
<p>20&#x0025; of the data is randomly selected as the test set, resulting in 937 samples in the training set and 235 samples in the test set. To assess the performance of the refined SMOTE-ENN optimization method across various classification algorithms, four algorithms are chosen for data classification: Support Vector Machine, Random Forest, Neural Network, and K-Nearest Neighbor. These four algorithms encompass discriminative models (SVM), ensemble models (RF), generative and feature learning models (NN), and non-parametric models (KNN), providing a comprehensive assessment of the HRV data. Specifically, SVM maps HRV features to a high-dimensional space using kernel functions, effectively handling non-linear classification problems with small sample sizes (<xref ref-type="bibr" rid="B26">26</xref>). RF can automatically capture complex interactions between HRV features by constructing multiple decision trees and introducing randomness (<xref ref-type="bibr" rid="B27">27</xref>). Neural networks extract implicit patterns from HRV data that are difficult for traditional statistical methods to capture through multi-layer non-linear transformations (<xref ref-type="bibr" rid="B28">28</xref>). KNN performs effective classification based on similarity, and it is very suitable as a baseline model to verify the degree of clustering of HRV data in the feature space since it does not require assumptions about data distribution (<xref ref-type="bibr" rid="B29">29</xref>). Other machine learning algorithms do not perform as representatively in HRV classification as these four models. For example, the naive Bayes classifier assumes that all input features are independent; however, there is strong coupling between HRV features. For instance, LF and TSP are highly correlated mathematically and physiologically, and rmSSD and HF both reflect parasympathetic nervous activity. This interdependence violates the basic assumptions of Bayes&#x2019; theorem, thus reducing classification accuracy (<xref ref-type="bibr" rid="B30">30</xref>).</p>
<sec id="s2c1"><label>2.3.1</label><title>Support vector machine</title>
<p>The hyperparameters <italic>C</italic>, <italic>&#x03B3;</italic>, and kernel function need to be optimized for the SVM model. The regularization parameter <italic>C</italic> controls the trade-off between model complexity and training error. The optimization objective of the SVM is determined by <xref ref-type="disp-formula" rid="disp-formula5">Equation 5</xref>:<disp-formula id="disp-formula5"><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="DM5"><mml:munder><mml:mo movablelimits="true" form="prefix">min</mml:mo><mml:mrow><mml:mi mathvariant="bold-italic">&#x03C9;</mml:mi></mml:mrow></mml:munder><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac></mml:mrow><mml:mo fence="false" stretchy="false">&#x2016;</mml:mo><mml:mrow><mml:mi mathvariant="bold-italic">&#x03C9;</mml:mi></mml:mrow><mml:msup><mml:mo fence="false" stretchy="false">&#x2016;</mml:mo><mml:mn>2</mml:mn></mml:msup><mml:mo>+</mml:mo><mml:mi>C</mml:mi><mml:msubsup><mml:mo movablelimits="false">&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>N</mml:mi></mml:msubsup><mml:mrow><mml:msub><mml:mi>&#x03BE;</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:mstyle></mml:math><label>(5)</label></disp-formula>where <italic>&#x03C9;</italic> represents the normal vector of the hyperplane, determining the direction of the classification boundary; <italic>C</italic> is the regularization parameter, <italic>N</italic> is the number of training samples; <italic>&#x03BE;</italic><sub>i</sub> denotes the slack variable, which allows a small number of training samples to be on the incorrect side of the hyperplane. A high value of <italic>C</italic> can lead to overfitting, while a low value may result in underfitting. <italic>C</italic>&#x2009;&#x003D;&#x2009;10 is chosen as the regularization parameter through the grid search optimization.</p>
<p>The kernel function determines the shape and properties of the data after mapping to a high-dimensional space. The feature vector of HRV data is complex in this experiment, so the RBF kernel is selected through optimization, which maps the data to an infinite-dimensional space. The RBF kernel is defined by <xref ref-type="disp-formula" rid="disp-formula6">Equation 6</xref>:<disp-formula id="disp-formula6"><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="DM6"><mml:mi>K</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>x</mml:mi><mml:mo>,</mml:mo><mml:msup><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">&#x2032;</mml:mi></mml:mrow></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mrow><mml:mi mathvariant="normal">exp</mml:mi></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mi>&#x03B3;</mml:mi><mml:mo fence="false" stretchy="false">&#x2016;</mml:mo><mml:mi>x</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">&#x2032;</mml:mi></mml:mrow></mml:msup></mml:mrow><mml:msup><mml:mo fence="false" stretchy="false">&#x2016;</mml:mo><mml:mn>2</mml:mn></mml:msup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:math><label>(6)</label></disp-formula>where <italic>&#x03B3;</italic> determines the range of similarity calculations and influences the scope of the RBF kernel function. <italic>&#x03B3;</italic>&#x2009;&#x003D;&#x2009;1 is selected through the grid search optimization in this experiment.</p>
</sec>
<sec id="s2c2"><label>2.3.2</label><title>Random forest</title>
<p>Random forest builds multiple decision trees and aggregates the prediction results of each tree to improve the prediction accuracy and generalization ability of the model and reduce the risk of overfitting. This experiment is a classification task, and the voting method is used to determine the final category. Six key hyperparameters are optimized, and the best parameters are: 1,000 trees, <italic>entropy</italic> as the splitting criterion, <italic>sqrt</italic> for maximum features, minimum samples split at 2, minimum samples leaf at 1, and bootstrap sampling set to <italic>False</italic>.</p>
</sec>
<sec id="s2c3"><label>2.3.3</label><title>Neural network</title>
<p>A fully connected neural network is constructed to classify the optimized data, with the network architecture illustrated in <xref ref-type="fig" rid="F3">Figure&#x00A0;3</xref>. The initial fully connected layer maps the 9-dimensional features into a 256-dimensional space. ReLU activation is employed to mitigate the vanishing gradient problem and accelerate training. The He initialization is used to optimize the initial parameters, and the increased feature dimensionality enhances the model&#x0027;s capacity. The output is standardized using Batch Normalization, adjusting the mean and variance of the data to 0 and 1, respectively. To prevent overfitting and enhance the model&#x0027;s generalization ability, the Dropout technique is applied, randomly masking 30&#x0025; of the neurons. The second fully connected layer performs a nonlinear transformation in the 128-dimensional space, using ReLU activation combined with L2 regularization to prevent overfitting, deepen the network, and improve the feature abstraction ability. Batch Normalization and Dropout are also employed here to further stabilize the output distribution and increase the model&#x0027;s robustness. The feature dimension is then compressed from 128 to 64. Finally, the probability distribution across the seven labels is generated through the Softmax output layer.</p>
<fig id="F3" position="float"><label>Figure&#x00A0;3</label>
<caption><p>The architecture of the neural network. The network consists of one input layer, three hidden layers, and one output layer. Nine features of the HRV data are input to the network, then transformed by the hidden layers, and finally output in the form of the probability of seven ANS states.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-08-1749570-g003.tif"><alt-text content-type="machine-generated">Diagram of a neural network architecture showing an input layer with nine features labeled X, followed by three hidden layers with 256, 128, and 64 neurons respectively, and an output layer labeled Y.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s2c4"><label>2.3.4</label><title>K-nearest neighbor</title>
<p>KNN is an instance-based learning algorithm that performs classification by measuring the distance between the input sample and the existing samples in the training dataset. For a given new sample, KNN calculates the distance between this sample and all the samples in the training set, selects the <italic>k</italic> nearest neighbors based on the smallest distances, and predicts the output of the sample according to the category of the neighbors. In this study, the hyperparameters of the KNN classifier are optimized using grid search. The optimal parameters selected are the algorithm&#x2009;&#x003D;&#x2009;<italic>auto</italic>; distance&#x2009;&#x003D;&#x2009;<italic>Euclidean</italic>; number of nearest neighbors&#x2009;&#x003D;&#x2009;3.</p>
<p>The optimized hyperparameters of SVM, RF, and KNN after grid search are summarized in <xref ref-type="table" rid="T4">Table&#x00A0;4</xref>. For the NN model, an empirical architecture design is adopted, combined with an early stopping mechanism and an adaptive learning rate adjustment strategy to prevent overfitting, rather than the traditional grid search optimization.</p>
<table-wrap id="T4" position="float"><label>Table&#x00A0;4</label>
<caption><p>Hyperparameters with optimization of the four methods.</p></caption>
<table>
<colgroup>
<col align="left"/>
<col align="left"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">Methods</th>
<th valign="top" align="center">Hyperparameters with optimization</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">SVM</td>
<td valign="top" align="left"><italic>C</italic>&#x2009;&#x003D;&#x2009;10; kernel: RBF; <italic>&#x03B3;</italic>&#x2009;&#x003D;&#x2009;1</td>
</tr>
<tr>
<td valign="top" align="left">RF</td>
<td valign="top" align="left">1,000 trees; splitting criterion: <italic>entropy</italic>; maximum features: <italic>sqrt</italic>; minimum samples split: 2; minimum samples leaf: 1; bootstrap sampling: <italic>False</italic></td>
</tr>
<tr>
<td valign="top" align="left">NN</td>
<td valign="top" align="left">number of hidden layers: 3; neurons in the three hidden layers: 256, 128, 64; activation function: ReLU; Dropout: 0.3</td>
</tr>
<tr>
<td valign="top" align="left">KNN</td>
<td valign="top" align="left">algorithm: <italic>auto</italic>; distance: <italic>Euclidean</italic>; number of nearest neighbors: 3</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
</sec>
<sec id="s3"><label>3</label><title>Results and discussion</title>
<p>The overall performance of the four classification models is presented in <xref ref-type="table" rid="T5">Table&#x00A0;5</xref>. Accuracy is defined as the ratio of the number of samples correctly predicted to the total number of samples, reflecting the overall performance of the model. Precision is defined as True Positive/(True Positive&#x2009;&#x002B;&#x2009;False Positive), indicating the reliability of the model&#x0027;s prediction of positive samples. Recall is defined as True Positive/(True Positive&#x2009;&#x002B;<sans-serif>&#x2009;False</sans-serif> Negative), reflecting the model&#x0027;s ability to capture positive samples. The F1 score is defined as the harmonic mean of the precision and recall, reflecting the balance between these two parameters. The classification accuracy for all models exceeds 0.91, and the F1 score exceeds 0.9. Specifically, KNN demonstrates the best performance, with the overall classification accuracy of 0.97 and F1 score of 0.96, indicating that all four machine learning models have achieved strong classification results on the optimized dataset.</p>
<table-wrap id="T5" position="float"><label>Table&#x00A0;5</label>
<caption><p>Classification results of the data using refined SMOTE-ENN optimization.</p></caption>
<table>
<colgroup>
<col align="left"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">Method</th>
<th valign="top" align="center">Accuracy</th>
<th valign="top" align="center">Precision</th>
<th valign="top" align="center">Recall</th>
<th valign="top" align="center">F1 score</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">SVM</td>
<td valign="top" align="center">0.93</td>
<td valign="top" align="center">0.92</td>
<td valign="top" align="center">0.92</td>
<td valign="top" align="center">0.92</td>
</tr>
<tr>
<td valign="top" align="left">RF</td>
<td valign="top" align="center">0.94</td>
<td valign="top" align="center">0.94</td>
<td valign="top" align="center">0.93</td>
<td valign="top" align="center">0.93</td>
</tr>
<tr>
<td valign="top" align="left">NN</td>
<td valign="top" align="center">0.91</td>
<td valign="top" align="center">0.91</td>
<td valign="top" align="center">0.90</td>
<td valign="top" align="center">0.90</td>
</tr>
<tr>
<td valign="top" align="left">KNN</td>
<td valign="top" align="center">0.97</td>
<td valign="top" align="center">0.97</td>
<td valign="top" align="center">0.96</td>
<td valign="top" align="center">0.96</td>
</tr>
<tr>
<td valign="top" align="left">Mean</td>
<td valign="top" align="center">0.94</td>
<td valign="top" align="center">0.94</td>
<td valign="top" align="center">0.93</td>
<td valign="top" align="center">0.93</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The confusion matrices of the four machine learning models are shown in <xref ref-type="fig" rid="F4">Figure&#x00A0;4</xref>. The classification results for labels 2, 5, 6, and 7 are particularly strong, with label 2 showing slight overfitting in the SVM model. This may be due to the limited amount of original data for label 2, resulting in the synthetic data being overly concentrated in the feature space. To solve this issue, the <italic>k</italic>-value can be appropriately increased in the r-ENN to introduce a small amount of noisy data, thereby enhancing the model&#x0027;s generalization ability. The classification accuracy for label 1 is the lowest across the four models, which may be attributed to the generation of excessive noise data for this label by r-SMOTE. To address this issue, a smaller value of <italic>&#x03BB;</italic> and <italic>&#x03B8;</italic> could be used in r-SMOTE to ensure that the synthetic data is closer to the original data in the feature space.</p>
<fig id="F4" position="float"><label>Figure&#x00A0;4</label>
<caption><p>Confusion matrices of the four machine learning models. <bold>(a)</bold> Confusion matrix of SVM. <bold>(b)</bold> Confusion matrix of RF. <bold>(c)</bold> Confusion matrix of NN. <bold>(d)</bold> Confusion matrix of KNN. The classification accuracy for all models exceeds 0.91, and the F1 score exceeds 0.9. KNN demonstrates the best performance among them, with the overall classification accuracy of 0.97 and F1 score of 0.96.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-08-1749570-g004.tif"><alt-text content-type="machine-generated">Four confusion matrices compare model performance: (a) SVM, (b) RF, (c) NN, and (d) KNN. Each matrix displays actual versus predicted labels for seven classes, showing counts of correct predictions along the diagonal.</alt-text>
</graphic>
</fig>
<p>The classification results on the data using the traditional SMOTE method of the four models are summarized in <xref ref-type="table" rid="T6">Table&#x00A0;6</xref>. For the SMOTE optimization, the mean accuracy, precision, recall, and F1 score are 0.82, 0.82, 0.83, 0.82, respectively. After applying the refined SMOTE-ENN optimization as shown in <xref ref-type="table" rid="T5">Table&#x00A0;5</xref>, the mean accuracy, precision, recall, and F1 score are 0.94, 0.94, 0.93, 0.93, respectively, improving by 0.12, 0.12, 0.10, and 0.11, respectively. These improvements demonstrate that the refined SMOTE-ENN method is more suitable for optimization on datasets with small sample sizes and imbalanced data distributions compared with the traditional SMOTE, which effectively enhances the classification performance of the four models.</p>
<table-wrap id="T6" position="float"><label>Table&#x00A0;6</label>
<caption><p>Classification results of the data using traditional SMOTE optimization.</p></caption>
<table>
<colgroup>
<col align="left"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">Method</th>
<th valign="top" align="center">Accuracy</th>
<th valign="top" align="center">Precision</th>
<th valign="top" align="center">Recall</th>
<th valign="top" align="center">F1 score</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">SVM</td>
<td valign="top" align="center">0.8</td>
<td valign="top" align="center">0.81</td>
<td valign="top" align="center">0.82</td>
<td valign="top" align="center">0.81</td>
</tr>
<tr>
<td valign="top" align="left">RF</td>
<td valign="top" align="center">0.81</td>
<td valign="top" align="center">0.81</td>
<td valign="top" align="center">0.82</td>
<td valign="top" align="center">0.81</td>
</tr>
<tr>
<td valign="top" align="left">NN</td>
<td valign="top" align="center">0.84</td>
<td valign="top" align="center">0.83</td>
<td valign="top" align="center">0.85</td>
<td valign="top" align="center">0.84</td>
</tr>
<tr>
<td valign="top" align="left">KNN</td>
<td valign="top" align="center">0.82</td>
<td valign="top" align="center">0.82</td>
<td valign="top" align="center">0.81</td>
<td valign="top" align="center">0.81</td>
</tr>
<tr>
<td valign="top" align="left">Mean</td>
<td valign="top" align="center">0.82</td>
<td valign="top" align="center">0.82</td>
<td valign="top" align="center">0.83</td>
<td valign="top" align="center">0.82</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The Receiver Operating Characteristic (ROC) curves of the four machine learning models are calculated to evaluate the classification performance of the model, as shown in <xref ref-type="fig" rid="F5">Figure&#x00A0;5</xref>. True Positive Rate (TPR) is taken as the vertical axis, which is defined as TPR&#x2009;&#x003D;<sans-serif>&#x2009;True</sans-serif> Positive/(True Positive&#x2009;&#x002B;<sans-serif>&#x2009;False</sans-serif> Negative), indicating the rate of samples that are correctly predicted as positive and are truly positive. False Positive Rate (FPR) is taken as the horizontal axis, which is defined as FPR&#x2009;&#x003D;<sans-serif>&#x2009;False</sans-serif> Positive/(False Positive&#x2009;&#x002B;<sans-serif>&#x2009;True</sans-serif> Negative), indicating the rate of samples that are incorrectly predicted as positive and are truly negative. The ROC curves of all models reach a TPR of more than 80&#x0025; when the FPR is 10&#x0025;, which is notably higher than the diagonal line, suggesting that the four models exhibit strong discriminative ability. All curves are positioned close to the upper left corner, and the AUC values exceed 0.92 for all models, demonstrating their capability to effectively distinguish between positive and negative samples. Two similar previous studies are compared with this work, and the results are shown in <xref ref-type="table" rid="T7">Table&#x00A0;7</xref>. Compared to previous classification results using the SMOTE method on large samples, the refined SMOTE-ENN has certain advantages in optimizing and classifying small datasets.</p>
<fig id="F5" position="float"><label>Figure&#x00A0;5</label>
<caption><p>The ROC curves of the four machine learning models. <bold>(a)</bold> ROC curve of SVM. <bold>(b)</bold> ROC curve of RF. <bold>(c)</bold> ROC curve of NN. <bold>(d)</bold> ROC curve of KNN. All curves are positioned close to the upper left corner, and the AUC values exceed 0.92 for all models, demonstrating their capability to effectively distinguish between positive and negative samples.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-08-1749570-g005.tif"><alt-text content-type="machine-generated">ROC curves for different classifiers. (a) SVM with highest AUC of 1.0000 for Class 2. (b) RF with highest AUC of 1.0000 for Classes 2 and 5. (c) NN with highest AUC of 1.0000 for Classes 2 and 5. (d) KNN with highest AUC of 1.0000 for Class 7. Each graph shows true positive rate versus false positive rate, indicating model performance.</alt-text>
</graphic>
</fig>
<table-wrap id="T7" position="float"><label>Table&#x00A0;7</label>
<caption><p>Comparison results of this work with previous studies.</p></caption>
<table>
<colgroup>
<col align="left"/>
<col align="left"/>
<col align="left"/>
<col align="left"/>
<col align="left"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">Study</th>
<th valign="top" align="center">Data size</th>
<th valign="top" align="center">Features</th>
<th valign="top" align="center">Methods</th>
<th valign="top" align="center">Evaluation results</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Hussain et al. (<xref ref-type="bibr" rid="B20">20</xref>)</td>
<td valign="top" align="left">2,000 samples</td>
<td valign="top" align="left">SDSD, SD,<break/>RMSSD, SDANN, Approximate entropy,<break/>Sample entropy,<break/>Wavelet entropy,<break/>Shannon entropy,<break/>Wavelet norm entropy,<break/>et al.</td>
<td valign="top" align="left">SMOTE</td>
<td valign="top" align="left"><bold>KNN</bold>:<break/>Accuracy: 80&#x0025;<break/>sensitivity: 84.61&#x0025;<break/>specificity: 77.27&#x0025;<break/>AUC: 88.63&#x0025;<break/><bold>NB</bold>:<break/>Accuracy: 88.57&#x0025;<break/>sensitivity: 84.61&#x0025;<break/>specificity: 90.90&#x0025;<break/>AUC: 92.65&#x0025;</td>
</tr>
<tr>
<td valign="top" align="left">Reddy et al. (<xref ref-type="bibr" rid="B22">22</xref>)</td>
<td valign="top" align="left">Approximately 1,480,000 samples</td>
<td valign="top" align="left">heart rate,<break/>mean and standard deviation,<break/>EDA signal,<break/>peak frequency, Power spectral density,<break/>respiratory frequency, et al.</td>
<td valign="top" align="left">SMOTE</td>
<td valign="top" align="left"><bold>XGBoost:</bold><break/>Accuracy: 88.22&#x0025;<break/>Precision: 85.83&#x0025;<break/>Recall: 89.14&#x0025;<break/>F-score: 87.16&#x0025;<break/><bold>Logistic Regression:</bold><break/>Accuracy: 91.36&#x0025;<break/>Precision: 90.21&#x0025;<break/>Recall: 92.36&#x0025;<break/>F-score: 91.19&#x0025;</td>
</tr>
<tr>
<td valign="top" align="left">This work</td>
<td valign="top" align="left">321 samples</td>
<td valign="top" align="left">HR<break/>SDNN<break/>rmSSD<break/>PNN50<break/>VLF<break/>LF<break/>HF<break/>TSP<break/>LF/HF</td>
<td valign="top" align="left">Refined SMOTE-ENN</td>
<td valign="top" align="left"><bold>KNN:</bold><break/>Accuracy: 97&#x0025;<break/>Precision: 97&#x0025;<break/>Recall: 96&#x0025;<break/>F-score: 96&#x0025;<break/><bold>RF:</bold><break/>Accuracy: 94&#x0025;<break/>Precision: 94&#x0025;<break/>Recall: 93&#x0025;<break/>F-score: 93&#x0025;</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>In some cases, such as the SVM model for label 2, the AUC value reaches 1.0, suggesting potential overfitting. The main reasons are as follows: refined SMOTE-ENN generates new samples by interpolating between two real minority class samples. The synthetic samples are highly similar to the original samples, which might lead to data leakage and overfitting. Furthermore, the experimental dataset has a small number of samples, with some classes having fewer than 10 samples. Refined SMOTE-ENN generates a large amount of data based on these few points, causing the features of that class to be extremely densely distributed in certain narrow regions and resulting in overfitting. To address these issues, samples can be generated only at the classification boundaries, instead of across all minority class samples. Additionally, the data processing flow can be further adjusted to prevent data leakage.</p>
<p>The feature importance distribution of the four models is evaluated to examine the clinical interpretability of HRV classification, as shown in <xref ref-type="fig" rid="F6">Figure&#x00A0;6</xref>. The feature importance rankings vary across models. For instance, in the SVM model, the three most influential features are HR, SDNN, and rmSSD, while in the RF model, the top three features are SDNN, HF, and LF. <xref ref-type="table" rid="T8">Table&#x00A0;8</xref> presents the ranking of the nine features across all four machine learning models. The results indicate that SDNN has the highest mean rank of 1.5. SDNN represents the standard deviation of all NN intervals, which is a key indicator of overall heart rate variability and exerts the most significant influence on HRV classification. A high SDNN value typically signifies good ANS performance and robust heart adaptation to external stressors. Conversely, changes in the standard deviation of NN intervals are often associated with significant shifts in HRV. The feature with the lowest ranking is TSP, suggesting that TSP has the least impact on HRV classification. This may be due to the fact that, although ECG signals from healthy individuals and depressed patients exhibit varying power levels across different frequency bands, their total power sum is similar, leading to minimal distinction in this feature.</p>
<fig id="F6" position="float"><label>Figure&#x00A0;6</label>
<caption><p>Feature importance distribution of the four machine learning models. <bold>(a)</bold> Feature importance of SVM. <bold>(b)</bold> Feature importance of RF. <bold>(c)</bold> Feature importance of NN. <bold>(d)</bold> Feature importance of KNN. SDNN has the highest mean rank of 1.5, indicating that SDNN is a key indicator of overall heart rate variability and exerts the most significant influence on HRV classification.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-08-1749570-g006.tif"><alt-text content-type="machine-generated">Bar charts showing feature importance scores for four models: (a) SVM, with HR, SDNN, and rmSSD as top features; (b) RF, with SDNN, HF, and LF leading; (c) NN, highlighting SDNN, HF, and HR; (d) KNN, emphasizing HR, SDNN, and rmSSD. Each chart ranks features by importance score.</alt-text>
</graphic>
</fig>
<table-wrap id="T8" position="float"><label>Table&#x00A0;8</label>
<caption><p>Feature importance ranking of the four machine learning models.</p></caption>
<table>
<colgroup>
<col align="left"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">Method</th>
<th valign="top" align="center">SDNN</th>
<th valign="top" align="center">HR</th>
<th valign="top" align="center">LF</th>
<th valign="top" align="center">rmSSD</th>
<th valign="top" align="center">HF</th>
<th valign="top" align="center">LF/HF</th>
<th valign="top" align="center">VLF</th>
<th valign="top" align="center">PNN50</th>
<th valign="top" align="center">TSP</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">SVM</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">6</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">9</td>
<td valign="top" align="center">7</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">8</td>
</tr>
<tr>
<td valign="top" align="left">RF</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">7</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">9</td>
<td valign="top" align="center">6</td>
<td valign="top" align="center">8</td>
</tr>
<tr>
<td valign="top" align="left">NN</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">6</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">7</td>
<td valign="top" align="center">8</td>
<td valign="top" align="center">9</td>
</tr>
<tr>
<td valign="top" align="left">KNN</td>
<td valign="top" align="center">2</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">4</td>
<td valign="top" align="center">3</td>
<td valign="top" align="center">9</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">6</td>
<td valign="top" align="center">7</td>
<td valign="top" align="center">8</td>
</tr>
<tr>
<td valign="top" align="left">Mean</td>
<td valign="top" align="center">1.5</td>
<td valign="top" align="center">2.25</td>
<td valign="top" align="center">4.25</td>
<td valign="top" align="center">4.75</td>
<td valign="top" align="center">5.5</td>
<td valign="top" align="center">5.5</td>
<td valign="top" align="center">6.5</td>
<td valign="top" align="center">6.5</td>
<td valign="top" align="center">8.25</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>A limitation of this study is the lack of external validation on public datasets. This is primarily due to the unique nature of the 7-category autonomic nervous system labeling protocol, which does not directly map to the binary or ternary labels (e.g., stress/non-stress) found in currently available public databases. Therefore, the generalization performance of the proposed model on populations with different demographic distributions remains to be verified in future studies. Furthermore, this work is positioned as a pilot study demonstrating the feasibility of identifying complex autonomic states using HRV features. One of the primary goals is to establish the correlation between the proposed 9 features and the 7 functional states within a controlled cohort, and the cross-dataset generalization will be the critical next step of this work.</p>
</sec>
<sec id="s4" sec-type="conclusions"><label>4</label><title>Conclusion</title>
<p>Currently, studies on imbalanced data based on SMOTE typically use original datasets with large sample sizes, generally ranging from several thousand to millions. However, its effectiveness in the specific context of depression detection using limited and sparse HRV samples remains to be fully explored. In this work, a refined SMOTE-ENN hybrid optimization method is proposed to address the classification challenges of limited and imbalanced sample distribution. The model utilizes the r-SMOTE algorithm to generate synthetic minority class samples by in-line and off-line linear interpolation, combining it with the r-ENN under-sampling technique to remove noisy data while retaining selected boundary data to reduce the overfitting risk. The experimental results show that after refined SMOTE-ENN optimization, the overall classification accuracy of the four machine learning models exceeds 91&#x0025;, with the AUC values exceeding 0.92. In comparison to the classification results with classical SMOTE optimization of the four machine learning algorithms, the mean accuracy, precision, recall, and F1 score improved by 0.12, 0.12, 0.10 and 0.11, respectively. The classical SMOTE positions newly generated samples on the lines connecting the original samples in the sample space. The proposed method introduces off-line interpolation, which expands the data space and increases the diversity of generated data through nonlinear transformations introduced by trigonometric functions, overcoming the limitations of linear interpolation in the traditional SMOTE method. The refined SMOTE-ENN optimization method improves the detection performance of machine learning models and provides reliable technical support for the early detection of depression.</p>
<p>Despite the promising results, this study has limitations that point to directions for future research. First, the proposed method effectively addressed the data imbalance, and future studies should focus on collecting larger-scale datasets from multi-center clinical trials to further validate the generalization ability of the model. Second, relying solely on HRV features may limit the feature space; therefore, future work could explore multimodal fusion strategies by integrating HRV with other physiological indicators, such as electroencephalogram signals or voice acoustics. In terms of real-world applications, the proposed method demonstrates certain potential due to its computational efficiency and non-invasive nature, making it suitable for deployment on edge devices, such as smartwatches and health monitoring bands.</p>
</sec>
</body>
<back>
<sec id="s5" sec-type="data-availability"><title>Data availability statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation.</p>
</sec>
<sec id="s6" sec-type="ethics-statement"><title>Ethics statement</title>
<p>The studies involving humans were approved by the Ethics Committee of Nanjing University of Chinese Medicine. The studies were conducted in accordance with the local legislation and institutional requirements. The ethics committee/institutional review board waived the requirement of written informed consent for participation from the participants or the participants&#x0027; legal guardians/next of kin because the study uses anonymised data.</p>
</sec>
<sec id="s7" sec-type="author-contributions"><title>Author contributions</title>
<p>BZ: Conceptualization, Data curation, Investigation, Methodology, Software, Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing. ML: Investigation, Validation, Writing &#x2013; review &#x0026; editing. YZ: Data curation, Formal analysis, Investigation, Software, Writing &#x2013; original draft. BJ: Data curation, Formal analysis, Investigation, Software, Writing &#x2013; original draft. MH: Data curation, Formal analysis, Software, Writing &#x2013; original draft. HL: Conceptualization, Validation, Writing &#x2013; review &#x0026; editing. XL: Conceptualization, Validation, Writing &#x2013; review &#x0026; editing. YS: Conceptualization, Validation, Writing &#x2013; review &#x0026; editing. RG: Conceptualization, Resources, Writing &#x2013; review &#x0026; editing. ZZ: Conceptualization, Funding acquisition, Methodology, Supervision, Writing &#x2013; review &#x0026; editing. XQ: Conceptualization, Funding acquisition, Methodology, Supervision, Writing &#x2013; review &#x0026; editing.</p>
</sec>
<sec id="s9" sec-type="COI-statement"><title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s10" sec-type="ai-statement"><title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec id="s11" sec-type="disclaimer"><title>Publisher&#x0027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list><title>References</title>
<ref id="B1"><label>1.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Southwick</surname> <given-names>SM</given-names></name> <name><surname>Charney</surname> <given-names>DS</given-names></name></person-group>. <article-title>The science of resilience: implications for the prevention and treatment of depression</article-title>. <source>Science</source>. (<year>2012</year>) <volume>338</volume>(<issue>6103</issue>):<fpage>79</fpage>&#x2013;<lpage>82</lpage>. <pub-id pub-id-type="doi">10.1126/science.1222942</pub-id><pub-id pub-id-type="pmid">23042887</pub-id></mixed-citation></ref>
<ref id="B2"><label>2.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hammen</surname> <given-names>C</given-names></name></person-group>. <article-title>Stress and depression</article-title>. <source>Annu. Rev Clin Psychol</source>. (<year>2005</year>) <volume>1</volume>(<issue>1</issue>):<fpage>293</fpage>&#x2013;<lpage>319</lpage>. <pub-id pub-id-type="doi">10.1146/annurev.clinpsy.1.102803.143938</pub-id><pub-id pub-id-type="pmid">17716090</pub-id></mixed-citation></ref>
<ref id="B3"><label>3.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kroenke</surname> <given-names>K</given-names></name> <name><surname>Spitzer</surname> <given-names>RL</given-names></name> <name><surname>Williams</surname> <given-names>JB</given-names></name></person-group>. <article-title>The patient health questionnaire-2: validity of a two-item depression screener</article-title>. <source>Med Care</source>. (<year>2003</year>) <volume>41</volume>(<issue>11</issue>):<fpage>1284</fpage>&#x2013;<lpage>92</lpage>. <pub-id pub-id-type="doi">10.1097/01.MLR.0000093487.78664.3C</pub-id><pub-id pub-id-type="pmid">14583691</pub-id></mixed-citation></ref>
<ref id="B4"><label>4.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lovato</surname> <given-names>N</given-names></name> <name><surname>Gradisar</surname> <given-names>M</given-names></name></person-group>. <article-title>A meta-analysis and model of the relationship between sleep and depression in adolescents: recommendations for future research and clinical practice</article-title>. <source>Sleep Med Rev</source>. (<year>2014</year>) <volume>18</volume>(<issue>6</issue>):<fpage>521</fpage>&#x2013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1016/j.smrv.2014.03.006</pub-id><pub-id pub-id-type="pmid">24857255</pub-id></mixed-citation></ref>
<ref id="B5"><label>5.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kemp</surname> <given-names>AH</given-names></name> <name><surname>Quintana</surname> <given-names>DS</given-names></name> <name><surname>Gray</surname> <given-names>MA</given-names></name> <name><surname>Felmingham</surname> <given-names>KL</given-names></name> <name><surname>Brown</surname> <given-names>K</given-names></name> <name><surname>Gatt</surname> <given-names>JM</given-names></name></person-group>. <article-title>Impact of depression and antidepressant treatment on heart rate variability: a review and meta-analysis</article-title>. <source>Biol Psychiatry</source>. (<year>2010</year>) <volume>67</volume>(<issue>11</issue>):<fpage>1067</fpage>&#x2013;<lpage>74</lpage>. <pub-id pub-id-type="doi">10.1016/j.biopsych.2009.12.012</pub-id><pub-id pub-id-type="pmid">20138254</pub-id></mixed-citation></ref>
<ref id="B6"><label>6.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Villarreal-Zegarra</surname> <given-names>D</given-names></name> <name><surname>Barrera-Begazo</surname> <given-names>J</given-names></name> <name><surname>Otaz&#x00FA;-Alfaro</surname> <given-names>S</given-names></name> <name><surname>Mayo-Puchoc</surname> <given-names>N</given-names></name> <name><surname>Bazo-Alvarez</surname> <given-names>JC</given-names></name> <name><surname>Huarcaya-Victoria</surname> <given-names>J</given-names></name></person-group>. <article-title>Sensitivity and specificity of the patient health questionnaire (PHQ-9, PHQ-8, PHQ-2) and general anxiety disorder scale (GAD-7, GAD-2) for depression and anxiety diagnosis: a cross-sectional study in a Peruvian hospital population</article-title>. <source>BMJ Open</source>. (<year>2023</year>) <volume>13</volume>(<issue>9</issue>):<fpage>e076193</fpage>. <pub-id pub-id-type="doi">10.1136/bmjopen-2023-076193</pub-id><pub-id pub-id-type="pmid">37714674</pub-id></mixed-citation></ref>
<ref id="B7"><label>7.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Steiger</surname> <given-names>A</given-names></name> <name><surname>Pawlowski</surname> <given-names>M</given-names></name></person-group>. <article-title>Depression and sleep</article-title>. <source>Int J Mol Sci</source>. (<year>2019</year>) <volume>20</volume>(<issue>3</issue>):<fpage>607</fpage>. <pub-id pub-id-type="doi">10.3390/ijms20030607</pub-id><pub-id pub-id-type="pmid">30708948</pub-id></mixed-citation></ref>
<ref id="B8"><label>8.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>De Zambotti</surname> <given-names>M</given-names></name> <name><surname>Goldstein</surname> <given-names>C</given-names></name> <name><surname>Cook</surname> <given-names>J</given-names></name> <name><surname>Menghini</surname> <given-names>L</given-names></name> <name><surname>Altini</surname> <given-names>M</given-names></name> <name><surname>Cheng</surname> <given-names>P</given-names></name><etal/></person-group> <article-title>State of the science and recommendations for using wearable technology in sleep and circadian research</article-title>. <source>Sleep</source>. (<year>2024</year>) <volume>47</volume>(<issue>4</issue>):<fpage>zsad325</fpage>. <pub-id pub-id-type="doi">10.1093/sleep/zsad325</pub-id><pub-id pub-id-type="pmid">38149978</pub-id></mixed-citation></ref>
<ref id="B9"><label>9.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gullett</surname> <given-names>N</given-names></name> <name><surname>Zajkowska</surname> <given-names>Z</given-names></name> <name><surname>Walsh</surname> <given-names>A</given-names></name> <name><surname>Harper</surname> <given-names>R</given-names></name> <name><surname>Mondelli</surname> <given-names>V</given-names></name></person-group>. <article-title>Heart rate variability (HRV) as a way to understand associations between the autonomic nervous system (ANS) and affective states: a critical review of the literature</article-title>. <source>Int J Psychophysiol</source>. (<year>2023</year>) <volume>192</volume>:<fpage>35</fpage>&#x2013;<lpage>42</lpage>. <pub-id pub-id-type="doi">10.1016/j.ijpsycho.2023.08.001</pub-id><pub-id pub-id-type="pmid">37543289</pub-id></mixed-citation></ref>
<ref id="B10"><label>10.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>El-Malahi</surname> <given-names>O</given-names></name> <name><surname>Mohajeri</surname> <given-names>D</given-names></name> <name><surname>B&#x00E4;uerle</surname> <given-names>A</given-names></name> <name><surname>Mincu</surname> <given-names>R</given-names></name> <name><surname>Rothenaicher</surname> <given-names>K</given-names></name> <name><surname>Ullrich</surname> <given-names>G</given-names></name><etal/></person-group> <article-title>The effect of stress-reducing interventions on heart rate variability in cardiovascular disease: a systematic review and meta-analysis</article-title>. <source>Life</source>. (<year>2024</year>) <volume>14</volume>(<issue>6</issue>):<fpage>749</fpage>. <pub-id pub-id-type="doi">10.3390/life14060749</pub-id><pub-id pub-id-type="pmid">38929732</pub-id></mixed-citation></ref>
<ref id="B11"><label>11.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hartmann</surname> <given-names>R</given-names></name> <name><surname>Schmidt</surname> <given-names>FM</given-names></name> <name><surname>Sander</surname> <given-names>C</given-names></name> <name><surname>Hegerl</surname> <given-names>U</given-names></name></person-group>. <article-title>Heart rate variability as indicator of clinical state in depression</article-title>. <source>Front Psychiatry</source>. (<year>2019</year>) <volume>9</volume>:<fpage>735</fpage>. <pub-id pub-id-type="doi">10.3389/fpsyt.2018.00735</pub-id><pub-id pub-id-type="pmid">30705641</pub-id></mixed-citation></ref>
<ref id="B12"><label>12.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Koch</surname> <given-names>C</given-names></name> <name><surname>Wilhelm</surname> <given-names>M</given-names></name> <name><surname>Salzmann</surname> <given-names>S</given-names></name> <name><surname>Rief</surname> <given-names>W</given-names></name> <name><surname>Euteneuer</surname> <given-names>F</given-names></name></person-group>. <article-title>A meta-analysis of heart rate variability in major depression</article-title>. <source>Psychol Med</source>. (<year>2019</year>) <volume>49</volume>(<issue>12</issue>):<fpage>1948</fpage>&#x2013;<lpage>57</lpage>. <pub-id pub-id-type="doi">10.1017/S0033291719001351</pub-id><pub-id pub-id-type="pmid">31239003</pub-id></mixed-citation></ref>
<ref id="B13"><label>13.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wu</surname> <given-names>Q</given-names></name> <name><surname>Miao</surname> <given-names>X</given-names></name> <name><surname>Cao</surname> <given-names>Y</given-names></name> <name><surname>Chi</surname> <given-names>A</given-names></name> <name><surname>Xiao</surname> <given-names>T</given-names></name></person-group>. <article-title>Heart rate variability status at rest in adult depressed patients: a systematic review and meta-analysis</article-title>. <source>Front Public Health</source>. (<year>2023</year>) <volume>11</volume>:<fpage>1243213</fpage>. <pub-id pub-id-type="doi">10.3389/fpubh.2023.1243213</pub-id><pub-id pub-id-type="pmid">38169979</pub-id></mixed-citation></ref>
<ref id="B14"><label>14.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lu</surname> <given-names>W</given-names></name> <name><surname>Hou</surname> <given-names>H</given-names></name> <name><surname>Chu</surname> <given-names>J</given-names></name></person-group>. <article-title>Control, feature fusion for imbalanced ECG data analysis</article-title>. <source>Biomed Signal Process Control</source>. (<year>2018</year>) <volume>41</volume>:<fpage>152</fpage>&#x2013;<lpage>60</lpage>. <pub-id pub-id-type="doi">10.1016/j.bspc.2017.11.010</pub-id></mixed-citation></ref>
<ref id="B15"><label>15.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Xu</surname> <given-names>Z</given-names></name> <name><surname>Shen</surname> <given-names>D</given-names></name> <name><surname>Nie</surname> <given-names>T</given-names></name> <name><surname>Kou</surname> <given-names>Y</given-names></name></person-group>. <article-title>A hybrid sampling algorithm combining M-SMOTE and ENN based on random forest for medical imbalanced data</article-title>. <source>J Biomed Inform</source>. (<year>2020</year>) <volume>107</volume>:<fpage>103465</fpage>. <pub-id pub-id-type="doi">10.1016/j.jbi.2020.103465</pub-id><pub-id pub-id-type="pmid">32512209</pub-id></mixed-citation></ref>
<ref id="B16"><label>16.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Devarriya</surname> <given-names>D</given-names></name> <name><surname>Gulati</surname> <given-names>C</given-names></name> <name><surname>Mansharamani</surname> <given-names>V</given-names></name> <name><surname>Sakalle</surname> <given-names>A</given-names></name> <name><surname>Bhardwaj</surname> <given-names>A</given-names></name></person-group>. <article-title>Unbalanced breast cancer data classification using novel fitness functions in genetic programming</article-title>. <source>Expert Syst Appl</source>. (<year>2020</year>) <volume>140</volume>:<fpage>112866</fpage>. <pub-id pub-id-type="doi">10.1016/j.eswa.2019.112866</pub-id></mixed-citation></ref>
<ref id="B17"><label>17.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>O&#x2019;Brien</surname> <given-names>R</given-names></name> <name><surname>Ishwaran</surname> <given-names>H</given-names></name></person-group>. <article-title>A random forests quantile classifier for class imbalanced data</article-title>. <source>Pattern Recogn</source>. (<year>2019</year>) <volume>90</volume>:<fpage>232</fpage>&#x2013;<lpage>49</lpage>. <pub-id pub-id-type="doi">10.1016/j.patcog.2019.01.036</pub-id></mixed-citation></ref>
<ref id="B18"><label>18.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>Y</given-names></name> <name><surname>Jiang</surname> <given-names>J</given-names></name></person-group>. <article-title>Adaptive bi-weighting toward automatic initialization and model selection for HMM-based hybrid meta-clustering ensembles</article-title>. <source>IEEE Trans Cybern</source>. (<year>2018</year>) <volume>49</volume>(<issue>5</issue>):<fpage>1657</fpage>&#x2013;<lpage>68</lpage>. <pub-id pub-id-type="doi">10.1109/TCYB.2018.2809562</pub-id><pub-id pub-id-type="pmid">29994293</pub-id></mixed-citation></ref>
<ref id="B19"><label>19.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ishaque</surname> <given-names>S</given-names></name> <name><surname>Khan</surname> <given-names>N</given-names></name> <name><surname>Krishnan</surname> <given-names>S</given-names></name></person-group>. <article-title>Trends in heart-rate variability signal analysis</article-title>. <source>Front Digit Health</source>. (<year>2021</year>) <volume>3</volume>:<fpage>639444</fpage>. <pub-id pub-id-type="doi">10.3389/fdgth.2021.639444</pub-id><pub-id pub-id-type="pmid">34713110</pub-id></mixed-citation></ref>
<ref id="B20"><label>20.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hussain</surname> <given-names>L</given-names></name> <name><surname>Lone</surname> <given-names>KJ</given-names></name> <name><surname>Awan</surname> <given-names>IA</given-names></name> <name><surname>Abbasi</surname> <given-names>AA</given-names></name> <name><surname>Pirzada</surname> <given-names>J-u-R</given-names></name></person-group>. <article-title>Detecting congestive heart failure by extracting multimodal features with synthetic minority oversampling technique (SMOTE) for imbalanced data using robust machine learning techniques</article-title>. <source>Waves Random Complex Media</source>. (<year>2022</year>) <volume>32</volume>(<issue>3</issue>):<fpage>1079</fpage>&#x2013;<lpage>102</lpage>. <pub-id pub-id-type="doi">10.1080/17455030.2020.1810364</pub-id></mixed-citation></ref>
<ref id="B21"><label>21.</label><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Kumar</surname> <given-names>GK</given-names></name> <name><surname>Anila</surname> <given-names>M</given-names></name> <name><surname>Manikyam</surname> <given-names>NRH</given-names></name> <name><surname>Thatha</surname> <given-names>VN</given-names></name> <name><surname>Reddy</surname> <given-names>RVK</given-names></name> <name><surname>Papana</surname> <given-names>KR</given-names></name></person-group>. <article-title>Heart failure detection through SMOTE for augmentation and machine learning approach for classification</article-title>. In: <person-group person-group-type="editor"><name><surname>Nidhya</surname> <given-names>R</given-names></name> <name><surname>Kumar</surname> <given-names>M</given-names></name> <name><surname>Karthik</surname> <given-names>S</given-names></name> <name><surname>Anand</surname> <given-names>R</given-names></name> <name><surname>Balamurugan</surname> <given-names>S</given-names></name></person-group>, eidtors. <source>Smart Factories for Industry 5.0 Transformation</source>. <publisher-loc>Hoboken, NJ</publisher-loc>: <publisher-name>Wiley Online Library</publisher-name> (<year>2025</year>). p. <fpage>123</fpage>&#x2013;<lpage>34</lpage>.</mixed-citation></ref>
<ref id="B22"><label>22.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Reddy</surname> <given-names>GR</given-names></name> <name><surname>Reddy</surname> <given-names>JJ</given-names></name> <name><surname>Reddy</surname> <given-names>MS</given-names></name> <name><surname>Sravani</surname> <given-names>I</given-names></name> <name><surname>Kumar</surname> <given-names>VKA</given-names></name></person-group>. <article-title>Enhancing stress detection employing physiological signals from the WESAD dataset: a machine learning approach with SMOTE</article-title>. <source>Milestone Trans Med Technometrics</source>. (<year>2024</year>) <volume>2</volume>(<issue>2</issue>):<fpage>79</fpage>&#x2013;<lpage>87</lpage>. <pub-id pub-id-type="doi">10.5281/zenodo.14566207</pub-id></mixed-citation></ref>
<ref id="B23"><label>23.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Camm</surname> <given-names>AJ</given-names></name> <name><surname>Malik</surname> <given-names>M</given-names></name> <name><surname>Bigger</surname> <given-names>JT</given-names></name> <name><surname>Breithardt</surname> <given-names>G</given-names></name> <name><surname>Cerutti</surname> <given-names>S</given-names></name> <name><surname>Cohen</surname> <given-names>RJ</given-names></name><etal/></person-group> <article-title>Heart rate variability: standards of measurement, physiological interpretation and clinical use. Task force of the European Society of Cardiology and the north American society of pacing and electrophysiology</article-title>. <source>Circulation</source>. (<year>1996</year>) <volume>93</volume>(<issue>5</issue>):<fpage>1043</fpage>&#x2013;<lpage>65</lpage>. <pub-id pub-id-type="doi">10.1161/01.CIR.93.5.1043</pub-id><pub-id pub-id-type="pmid">8598068</pub-id></mixed-citation></ref>
<ref id="B24"><label>24.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Shaffer</surname> <given-names>F</given-names></name> <name><surname>Ginsberg</surname> <given-names>JP</given-names></name></person-group>. <article-title>An overview of heart rate variability metrics and norms</article-title>. <source>Front Public Health</source>. (<year>2017</year>) <volume>5</volume>:<fpage>258</fpage>. <pub-id pub-id-type="doi">10.3389/fpubh.2017.00258</pub-id><pub-id pub-id-type="pmid">29034226</pub-id></mixed-citation></ref>
<ref id="B25"><label>25.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Pham</surname> <given-names>T</given-names></name> <name><surname>Lau</surname> <given-names>ZJ</given-names></name> <name><surname>Chen</surname> <given-names>SA</given-names></name> <name><surname>Makowski</surname> <given-names>D</given-names></name></person-group>. <article-title>Heart rate variability in psychology: a review of HRV indices and an analysis tutorial</article-title>. <source>Sensors</source>. (<year>2021</year>) <volume>21</volume>(<issue>12</issue>):<fpage>3998</fpage>. <pub-id pub-id-type="doi">10.3390/s21123998</pub-id><pub-id pub-id-type="pmid">34207927</pub-id></mixed-citation></ref>
<ref id="B26"><label>26.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mandal</surname> <given-names>S</given-names></name> <name><surname>Sinha</surname> <given-names>N</given-names></name></person-group>. <article-title>Prediction of atrial fibrillation based on nonlinear modeling of heart rate variability signal and SVM classifier</article-title>. <source>Res Biomed Eng</source>. (<year>2021</year>) <volume>37</volume>(<issue>4</issue>):<fpage>725</fpage>&#x2013;<lpage>36</lpage>. <pub-id pub-id-type="doi">10.1007/s42600-021-00175-y</pub-id></mixed-citation></ref>
<ref id="B27"><label>27.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Dahal</surname> <given-names>K</given-names></name> <name><surname>Bogue-Jimenez</surname> <given-names>B</given-names></name> <name><surname>Doblas</surname> <given-names>A</given-names></name></person-group>. <article-title>Global stress detection framework combining a reduced set of HRV features and random forest model</article-title>. <source>Sensors</source>. (<year>2023</year>) <volume>23</volume>(<issue>11</issue>):<fpage>5220</fpage>. <pub-id pub-id-type="doi">10.3390/s23115220</pub-id><pub-id pub-id-type="pmid">37299947</pub-id></mixed-citation></ref>
<ref id="B28"><label>28.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Mortensen</surname> <given-names>JA</given-names></name> <name><surname>Mollov</surname> <given-names>ME</given-names></name> <name><surname>Chatterjee</surname> <given-names>A</given-names></name> <name><surname>Ghose</surname> <given-names>D</given-names></name> <name><surname>Li</surname> <given-names>FY</given-names></name></person-group>. <article-title>Multi-class stress detection through heart rate variability: a deep neural network based study</article-title>. <source>IEEE Access</source>. (<year>2023</year>) <volume>11</volume>:<fpage>57470</fpage>&#x2013;<lpage>80</lpage>. <pub-id pub-id-type="doi">10.1109/ACCESS.2023.3274478</pub-id></mixed-citation></ref>
<ref id="B29"><label>29.</label><mixed-citation publication-type="confproc"><person-group person-group-type="author"><name><surname>Purnamasari</surname> <given-names>PD</given-names></name> <name><surname>Martmis</surname> <given-names>R</given-names></name> <name><surname>Wijaya</surname> <given-names>RR</given-names></name></person-group>. <article-title>Stress detection application based on heart rate variability (HRV) and k-nearest neighbor (kNN)</article-title>. <conf-name>2019 International Conference on Electrical Engineering and Computer Science (ICECOS), IEEE</conf-name> (<year>2019</year>). p. <fpage>271</fpage>&#x2013;<lpage>6</lpage></mixed-citation></ref>
<ref id="B30"><label>30.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lewandowicz</surname> <given-names>B</given-names></name> <name><surname>Kisia&#x0142;a</surname> <given-names>K</given-names></name></person-group>. <article-title>Comparison of support vector machine, naive Bayes, and K-nearest neighbors algorithms for classifying heart disease</article-title>. <conf-name>International Conference on Information and Software Technologies</conf-name>. <publisher-loc>Cham</publisher-loc>: <publisher-name>Springer Nature Switzerland</publisher-name> (<year>2023</year>). p. <fpage>274</fpage>&#x2013;<lpage>285</lpage></mixed-citation></ref></ref-list>
<fn-group>
<fn id="n1" fn-type="custom" custom-type="edited-by"><p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1165892/overview">Gloria Cosoli</ext-link>, Universit&#x00E0; Telematica eCampus, Italy</p></fn>
<fn id="n2" fn-type="custom" custom-type="reviewed-by"><p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2573057/overview">Vittoria Cipollone</ext-link>, Marche Polytechnic University, Italy</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3310874/overview">Febriyanti Panjaitan</ext-link>, Universitas Satu, Indonesia</p></fn>
</fn-group>
</back>
</article>