<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Physiol.</journal-id>
<journal-title>Frontiers in Physiology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Physiol.</abbrev-journal-title>
<issn pub-type="epub">1664-042X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1638788</article-id>
<article-id pub-id-type="doi">10.3389/fphys.2025.1638788</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Physiology</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>AI-augmented prenatal care: a dual-modal fetal health assessment system integrating cardiotocography and uterine contraction synergy</article-title>
<alt-title alt-title-type="left-running-head">Qiu et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fphys.2025.1638788">10.3389/fphys.2025.1638788</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Qiu</surname>
<given-names>Tianxin</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/3085020/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhou</surname>
<given-names>Xinghe</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhou</surname>
<given-names>Jun</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Lin</surname>
<given-names>Chunxia</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Jiang</surname>
<given-names>Shiling</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Cheng</surname>
<given-names>Hui</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Wang</surname>
<given-names>Xinhao</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>You</surname>
<given-names>Qingshan</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Department of Obstetrics</institution>, <institution>West China Longquan Hospital Sichuan University</institution>, <institution>The First People&#x2019;s Hospital of Longquanyi District Chengdu</institution>, <addr-line>Chengdu</addr-line>, <addr-line>Sichuan</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Faculty of Science, Civil Aviation Flight University of China</institution>, <addr-line>Chengdu</addr-line>, <addr-line>Sichuan</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/231802/overview">Rinshu Dwivedi</ext-link>, National Institute of Technology, India</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3022736/overview">Yuer Yang</ext-link>, The University of Hong Kong, Hong Kong SAR, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3110295/overview">Lulu Wang</ext-link>, Reykjav&#xed;k University, Iceland</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Chunxia Lin, <email>986138020@qq.com</email>
</corresp>
</author-notes>
<pub-date pub-type="epub">
<day>29</day>
<month>09</month>
<year>2025</year>
</pub-date>
<pub-date pub-type="collection">
<year>2025</year>
</pub-date>
<volume>16</volume>
<elocation-id>1638788</elocation-id>
<history>
<date date-type="received">
<day>05</day>
<month>06</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>03</day>
<month>09</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2025 Qiu, Zhou, Zhou, Lin, Jiang, Cheng, Wang and You.</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Qiu, Zhou, Zhou, Lin, Jiang, Cheng, Wang and You</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>Fetal heart monitoring (FHR) is a critical tool for assessing fetal health, but traditional methods rely on subjective physician interpretation, exhibiting significant variability that can lead to misdiagnosis and overtreatment. Artificial intelligence (AI) technology offers a novel approach to address this issue, yet existing research predominantly utilizes unimodal (FHR-only) data, failing to align with clinical guidelines emphasizing &#x201c;bimodality analysis of fetal heart rate and uterine contractions (UC).&#x201d; This study aims to develop a deep learning-based bimodal intelligent monitoring system to enhance the accuracy and clinical utility of fetal health assessment.</p>
</sec>
<sec>
<title>Methods</title>
<p>The research team constructed the first fetal heart-contraction bimodal clinical dataset for Chinese pregnant women (n &#x3d; 326). Based on the DenseNet121 architecture, a selective attention mechanism (SK module) was introduced, proposing the DenseNet121-SK model. Standardized FHR and UC signals were extracted using image processing techniques. Dense connections and the SK module dynamically fused multi-scale features (e.g., transient fluctuations and contraction cycle associations). The model employed lightweight design during training to enhance physician usability.</p>
</sec>
<sec>
<title>Results</title>
<p>(1) Dual-modality input significantly outperformed single-modality input, achieving a classification AUC of 0.944 (vs. 0.812 for single-modality), validating the clinical value of multi-parameter collaborative interpretation; (2) The SK module simulated obstetricians' multi-scale cognition, achieving 95.88% accuracy with 100% recall for abnormal cases; (3) The system effectively reduced subjective interpretation variability, providing technical support for minimizing overtreatment.</p>
</sec>
<sec>
<title>Discussion</title>
<p>This study achieves a balance between clinical interpretability and high performance through lightweight AI design (only 8.3 million parameters) and dual-modality data fusion, making it particularly suitable for resource-constrained primary care settings. Future work should further optimize generalization capabilities through multicenter validation and explore integration with large language models to generate standardized reports. These findings provide important references for optimizing perinatal healthcare resources and AI-assisted decision-making.</p>
</sec>
</abstract>
<kwd-group>
<kwd>fetal heart rate monitoring</kwd>
<kwd>artificial intelligence</kwd>
<kwd>bimodal analysis</kwd>
<kwd>DenseNet121-SK</kwd>
<kwd>AI-assisted decision-making</kwd>
</kwd-group>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Computational Physiology and Medicine</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>FHR monitoring is a widely used method to assess the condition of the fetus during pregnancy, labor and delivery. In high-income countries, continuous fetal heart rate monitoring with a fetal heart monitor (CTGs) is commonly performed for deliveries classified as high-risk. In contrast, in low-income and lower-middle-income countries (LMICs), intermittent measurements are the usual method for all deliveries. Intermittent measurements are usually performed using a Pinard stethoscope or a hand-held Doppler device. Guidelines recommend (<xref ref-type="bibr" rid="B20">Lewis et al., 2015</xref>) that auscultation of the fetal heart rate should be performed every 15&#x2013;30 min during the first stage of labor, and every 5&#x2013;15 min during the second stage of labor, and each auscultation should also last at least 1 min. However, due to the complexity of fetal physiologic dynamics (<xref ref-type="bibr" rid="B4">Costa Santos et al., 2005</xref>; <xref ref-type="bibr" rid="B13">Hruban et al., 2015</xref>), common standards for visual interpretation of fetal heart rate signals can lead to significant subjective variability. To minimize diagnostic errors, obstetricians perform multiple subjective assessments. As a result, the incidence of untimely cesarean sections (CS) is increasing, largely due to subjective errors (<xref ref-type="bibr" rid="B35">Steer, 2008</xref>). This is the main significance of designing an automatic analysis of fetal heart rate signals in this study.</p>
<p>In recent years, with the rapid development of machine learning and deep learning, artificial intelligence (AI)-based fetal heart rate monitoring and analyzing systems have provided new ideas to address untimely cesarean deliveries caused by subjective interpretation bias in traditional monitoring. Traditional fetal heart monitoring relies on physicians&#x2019; experience in interpreting fetal heart rate curves (e.g., baseline variability, deceleration type, etc.), but the consistency of interpretation among different physicians is not high due to individual differences and visual fatigue, and it is prone to triggering over-intervention (<xref ref-type="bibr" rid="B30">Madiraju et al., 2025</xref>). In machine learning approaches, a process of signal processing, feature extraction, salient feature selection, training, and final classification of the model is usually used. Complex manually introduced features are used in these methods. For example, <xref ref-type="bibr" rid="B5">Czabanski et al. (2012)</xref> used weighted fuzzy scoring (WFS) combined with support vector (SVM) to predict neonatal acidosis and obtained 92% accuracy and 88% quality index. <xref ref-type="bibr" rid="B32">O&#x2019;sullivan et al. (2021)</xref> proposed a method for detecting fetal distress based on autoregressive sliding average (ARMA) modeling and machine learning, achieving a 0.86 AUC. <xref ref-type="bibr" rid="B7">Fanelli et al. (2013)</xref> introduced a phase-corrected signal averaging nonlinearity parameter for the quantitative assessment of fetal anomalies and achieved an AUC of 75%. <xref ref-type="bibr" rid="B3">C&#xf6;mert et al. (2018)</xref> applied a neural network and obtained an accuracy of 92.40%, a sensitivity of 95.89% and a specificity of 74.75%, as well as the method recently proposed by <xref ref-type="bibr" rid="B17">Karmakar et al. (2025)</xref> recently proposed an automated classification model for fetal health status by integrating machine learning algorithms such as gradient boosting classifiers and random forests obtained 93.41% accuracy.</p>
<p>In contrast to traditional machine learning methods, more research is currently being conducted based on Convolutional Neural Networks (CNNs) and Long Short-Term Memory Networks (LSTMs) in deep learning. Since fetal heartbeat maps are time-series data, but often presented as two-dimensional images (time on the horizontal axis, fetal heart rate and contractions on the vertical axis), using CNNs to automatically extract spatio-temporal features (e.g., local fluctuations, cyclic patterns) through multilayered convolutional kernels and relying on the sliding-window mechanism to capture local temporal dependencies can dramatically improve the recognition accuracy of fetal heartbeat maps. Due to these advantages, CNNs have been used to design various screening and assistive tools, e.g., <xref ref-type="bibr" rid="B21">Li et al. (2018)</xref> proposed 1D-CNN and obtained 93.24% accuracy to classify FHR signals. <xref ref-type="bibr" rid="B27">Liu M. et al. (2021)</xref> designed a hybrid CNN-BiLSTM network based on the attention mechanism. <xref ref-type="bibr" rid="B26">Lin et al. (2024)</xref> developed the first automated long term prenatal FHR analysis system LARA, which is based on deep learning analysis system LARA, which generates risk distribution maps (RDM) and overall risk index (RI) through 1D-CNN model combined with sliding-window information fusion technique, which has an AUC of 0.872 on the test set.</p>
<p>Although the above methods through machine learning or deep learning have achieved more or less good results, researchers are not uniform in the standard of the data, for example, some people artificially introduce features to let the model learn, or use a one-dimensional array of fetal heart rate as the input of the model to learn, but usually doctors use the intuitive graph of the change curves of the fetal heart rate and the contraction rate to interpret. Therefore, in this study, in order to minimize the criteria for distinguishing normal and abnormal fetal heart rate, we innovatively use images as the dataset, which contain two curves of fetal heart rate and contraction rate, in order to be closer to the needs of clinical practice.</p>
</sec>
<sec id="s2">
<title>2 CTG interpretation standard</title>
<p>This chapter systematically describes the core interpretation criteria of CTG, which is divided into three parts: firstly, it clarifies the terms and definitions of CTG (<xref ref-type="sec" rid="s2-1">section 2.1</xref>), which lays the foundation for the subsequent analysis; secondly, it explains in detail the categorization and interpretation of the CTG graphs during labor (<xref ref-type="sec" rid="s2-2">section 2.2</xref>), including the characteristics of the typical waveforms and their clinical significance; and finally, it discusses the key role of UC in the fetal heart rate (<xref ref-type="sec" rid="s2-3">section 2.3</xref>), and analyzes the potential mechanism of its impact on the changes of the fetal heart rate rate changes.</p>
<sec id="s2-1">
<title>2.1 CTG terms and definitions</title>
<p>Baseline: the average fetal heart rate that fluctuates within 5 beats/min in 10 min, excluding acceleration, deceleration and significant variability; the normal FHR baseline range is 110&#x2013;160 beats/min; the baseline must be a graph that lasts for more than 2 min in any 10 min, and the graph can be discontinuous; if the baseline is uncertain during the observation stage, the baseline can be determined by referring to the graph of the previous 10 min; of which (1) fetal tachycardia (tachycardia): refers to the fetal heart baseline &#x3e;160 beats/min lasting &#x2265;10 min. If the baseline is uncertain during the observation phase, the baseline can be determined by referring to the graph of the previous 10 min; where: (1) fetal tachycardia (tachycardia): refers to a fetal heart baseline &#x3e;160 beats/min for &#x2265;10 min; (2) fetal bradycardia (bradycardia): refers to a fetal heart baseline &#x3c;110 beats/min for &#x2265;10 min.</p>
<p>Baseline variability: refers to the change in amplitude of the fetal heart rate per minute from the peak to the trough, which can be visualized and quantified, of which: (1) absent variability: refers to the disappearance of amplitude fluctuations, as shown in <xref ref-type="fig" rid="F1">Figure 1c</xref>; (2) minimal variability: refers to amplitude fluctuations of &#x2264;5 times/min, as shown in <xref ref-type="fig" rid="F1">Figure 1b</xref>; (3) normal/moderat evariability: refers to amplitude fluctuations of 6&#x2013;25 times/min, as shown in <xref ref-type="fig" rid="F1">Figure 1a</xref>.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Baseline variation and sinusoidal pattern. <bold>(a)</bold>. Normal variability; <bold>(b)</bold>. Minimal variability; <bold>(c)</bold>. Absent variability; <bold>(d)</bold>. Sinusoidal pattern.</p>
</caption>
<graphic xlink:href="fphys-16-1638788-g001.tif">
<alt-text content-type="machine-generated">Four graphs display fetal heart rate (FHR) and uterine contraction (UC) patterns. (a) Shows FHR with normal variability and amplitude fluctuations of 6-25 times per minute. (b) Displays minimal variability with amplitude fluctuations of 5 times per minute or less. (c) Illustrates absent variability with disappearance of amplitude fluctuations. (d) Presents a sinusoidal-like pattern in the FHR. Each graph includes time stamps and signal areas for FHR and UC.</alt-text>
</graphic>
</fig>
<p>Acceleration: refers to a sudden and significant increase in baseline fetal heart rate with a start-to-peak time of &#x3c;30 s. The time from the start of the acceleration of the fetal heart rate to its return to the baseline fetal heart rate level is the time of acceleration. (1) Before 32 weeks of gestation, acceleration is &#x2265; 10 beats/min at the baseline level and lasts &#x2265;10 s, but &#x3c;2 min; (2) At 32 weeks of gestation and later, acceleration is &#x2265; 15 beats/min at the baseline level and lasts &#x2265;15 s, but &#x3c;2 min; (3) prolonged acceleration: it refers to an increase in the fetal heart rate that lasts &#x2265; 2 min, but &#x3c;10 min; (4) if acceleration lasts &#x2265;10 min, the baseline change in fetal heart rate is taken into consideration.</p>
<p>Deceleration: (1) early deceleration (ED): deceleration accompanied by contractions, usually symmetrical, slow decline to the nadir and then return to the baseline, the time from the beginning to the nadir &#x2265;30 s, the nadir of deceleration is often coincident with the peak of contractions; in general, the beginning of deceleration, the nadir, the recovery Generally, the onset, nadir, and recovery of deceleration are synchronized with the onset, peak, and end of contractions; (2) late deceleration (LD): deceleration accompanied by contractions, usually symmetrically and slowly decreasing to the nadir and then recovering to baseline, with the onset to nadir time &#x2265;30 s, and the nadir of deceleration is usually delayed from the peak of contractions. In general, the onset, nadir, and recovery of deceleration lag behind the onset, peak, and end of contractions, respectively; (3) Variable deceleration (VD): refers to a sudden, significant, and rapid decline in fetal heart rate, with an onset-to-nadir time of &#x3c;30 s, a decline of &#x2265;15 beats/min, and a duration of &#x2265;15 s, but &#x3c; 2 min. When varied deceleration is accompanied by contractions, the onset of deceleration is usually delayed by the peak of contractions. Deceleration is accompanied by contractions, and there is no fixed pattern between the onset, depth and duration of deceleration and contractions, as shown in <xref ref-type="fig" rid="F2">Figure 2</xref>.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Several complex variable deceleration. <bold>(a)</bold>. No posterior &#x201c;shoulder peak&#x201d;; <bold>(b)</bold>. No front &#x201c;shoulder peak&#x201d;; <bold>(c)</bold>. &#x2018;W&#x2019; shape variable deceleration; <bold>(d)</bold>. Wide variable deceleration.</p>
</caption>
<graphic xlink:href="fphys-16-1638788-g002.tif">
<alt-text content-type="machine-generated">Four cardiotocography tracings labeled a, b, c, and d. Each shows a different pattern of fetal heart rate (blue line) and uterine activity (red line). (a) No posterior &#x22;shoulder peak.&#x22; (b) No front &#x22;shoulder peak.&#x22; (c) 'W' shape variable deceleration. (d) Wide variable deceleration.</alt-text>
</graphic>
</fig>
<p>Uterine contraction: (1) normal uterine contraction (normal uterine activity): &#x2264;5 times/10 min uterine contraction, observe for 30 min, and take the average; (2) uterine contraction is too frequent (tachysystole) (2) tachysystole: &#x3e;5 contractions/10 min, 30 min of observation and take the average value.</p>
<p>Sinusoidal pattern: clearly visible, smooth, sinusoidal-like pattern, long variant of 3-5 cycles/min, lasting &#x2265;20 min, and no acceleration exists, as shown in <xref ref-type="fig" rid="F1">Figure 1d</xref>.</p>
</sec>
<sec id="s2-2">
<title>2.2 Interpretation and classification of CTG graphics during delivery</title>
<p>Class I graphs: The following conditions must be met: (1) the baseline fetal heart rate is 110&#x2013;160 beats/min; (2) the baseline variation is normal variation; (3) there is no late deceleration and variant deceleration; (4) there is the presence or lack of early deceleration; and (5) there is the presence or lack of acceleration, which suggests that fetal acid-base balance is normal, as shown in <xref ref-type="fig" rid="F3">Figures 3a,b</xref>.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Class &#x2160; graphs (The x-axis represents the time and the y-axis is the corresponding FHR/UC signal). <bold>(a)</bold> Class I graphics with UC without acceleration/deceleration, <bold>(b)</bold> Class I graphics with UC and deceleration.</p>
</caption>
<graphic xlink:href="fphys-16-1638788-g003.tif">
<alt-text content-type="machine-generated">Graphs illustrating fetal heart rate (FHR) and uterine contraction (UC) patterns. Panel (a) shows FHR with no deceleration or acceleration, and UC over time. Panel (b) highlights synchronization of early deceleration in FHR with UC. Text boxes classify both graphs under Class I, listing conditions like fetal heart rate baseline, variability, deceleration, and acceleration presence.</alt-text>
</graphic>
</fig>
<p>Class II graphs: All cases other than Class I and Class III electronic fetal heart rate monitoring graphs are classified as Class II. It is not possible to interpret the presence of fetal acid-base balance disorders, but a combination of the clinical situation, continuous fetal heart rate monitoring, and other methods of assessment should be used to determine the presence or absence of fetal hypoxia, and intrauterine resuscitation may be required to improve the condition of the fetus, as shown in <xref ref-type="fig" rid="F4">Figure 4a</xref>.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p> Class II/III graphs (The x-axis represents the time and the y-axis is the corresponding FHR/UC signal). <bold>(a)</bold> Class II graphics with UC and late deceleration, <bold>(b)</bold> Class II graphics with minimal variability, <bold>(c)</bold> Class III graphics with UC and absent variability.</p>
</caption>
<graphic xlink:href="fphys-16-1638788-g004.tif">
<alt-text content-type="machine-generated">Diagram showing graphs of fetal heart rate (FHR) and uterine contraction (UC) patterns classifying them into Class II and Class III. Graph (a) depicts late deceleration; graph (b) shows W-shaped variable deceleration with normal FHR fluctuations; graph (c) illustrates absence of variability with no posterior shoulder peak. Annotations explain the observations, such as late deceleration and variable deceleration types.</alt-text>
</graphic>
</fig>
<p>Class III graphs: (1) Fetal heart rate baseline absent variability and any of the following conditions are present: &#x2460; recurrent late decelerations; &#x2461; recurrent variable decelerations; &#x2462; fetal bradycardia (fetal heart rate baseline &#x3c;110 beats/min). (2) Sinusoidal pattern: It suggests that there is an acid-base balance imbalance in the fetus, i.e., fetal hypoxia, and appropriate measures should be taken immediately to correct the fetal hypoxia, including changing the position of the pregnant woman, administering oxygen, discontinuing the use of oxytocin, suppressing contractions, and correcting the hypotension of the pregnant woman, etc. If none of these measures work, the pregnancy should be terminated in an emergency, as shown in <xref ref-type="fig" rid="F4">Figure 4</xref>.</p>
</sec>
<sec id="s2-3">
<title>2.3 The role of UC in fetal heart rate monitoring interpretation criteria</title>
<p>When interpreting class I graphs, it is necessary to combine with UC to determine whether there is ED or LD (<xref ref-type="bibr" rid="B31">Mendis et al., 2025</xref>), as well as the absent variability plus recurrent late deceleration in class III graphs, and failure to combine with contractions may lead to the misclassification of many graphs that should be classified as class I or class III as class II graphs. For example, we know that <xref ref-type="fig" rid="F3">Figure 3b</xref> is a Class I graph and <xref ref-type="fig" rid="F4">Figure 4a</xref> is a Class II graph, but it is difficult to differentiate between the two if we only look at the fetal heart rate without looking at that curve of contractions, and there is a possibility of misclassifying a Class I graph as a Class II graph and thus triggering unnecessary intervention, or on the contrary, misclassifying a Class II graph as a Class I graph and failing to intervene in a timely manner. Therefore, the temporal relationship between deceleration pattern and contraction is essential in the interpretation criteria of fetal cardiac monitoring charts. The FHR signal alone will increase the rate of misjudgment, resulting in a unimodal model that is prone to misjudging physiological fluctuations as pathological decelerations; for the two types of b and c in <xref ref-type="fig" rid="F4">Figure 4</xref>, which only require a single signal from the FHR, can be identified, but for the pathological conditions of a, b in <xref ref-type="fig" rid="F3">Figure 3</xref>, and a in <xref ref-type="fig" rid="F4">Figure 4</xref> and the absence of variability plus late decelerations in the class III graph, the pathological conditions cannot be accurately identified.</p>
<p>In terms of pathophysiological mechanisms, the synergistic changes of FHR and UC directly reflect the compensatory state of the fetal-placental unit, for example, the sudden decline of variant deceleration (VD) is associated with vagal reflexes due to cord compression, but its clinical significance needs to be combined with the timing of the occurrence of the out-of-contraction cycle to differentiate between episodic compression or persistent hypoxia, and the bimodal data can capture this dynamic interaction feature through time-domain alignment, whereas the single FHR signal provides only isolated information on heart rate fluctuations.</p>
</sec>
</sec>
<sec sec-type="methods" id="s3">
<title>3 Methods</title>
<p>This chapter describes the datasets, network models, and attention mechanisms used in the experiment. First, the public dataset (<xref ref-type="sec" rid="s3-1">Section 3.1</xref>) is summarized, the experimental data screening method (<xref ref-type="sec" rid="s3-2">Section 3.2</xref>) is described, and then the network structure (<xref ref-type="sec" rid="s3-3">Section 3.3</xref>) and its core attention mechanism (<xref ref-type="sec" rid="s3-4">Section 3.4</xref>) are described in detail.</p>
<sec id="s3-1">
<title>3.1 Publicly available dataset descriptions</title>
<p>The publicly available dataset CTU-CHB (published by the Czech Technical University and Brno University Hospital) is widely used as a baseline data source in the current field of fetal heart monitoring research. This dataset was created by screening 9,164 original fetal monitoring records collected during 2010&#x2013;2012, and 552 CTG samples with complete clinical annotation were retained (<xref ref-type="bibr" rid="B33">Romagnoli et al., 2020</xref>). Although its data size and openness facilitate algorithm development, the following key shortcomings constrain its clinical value:</p>
<sec id="s3-1-1">
<title>3.1.1 Insufficient racial generalization</title>
<p>The CTU-CHB dataset contains data from only a single population of white European pregnant women, whose FHR and UC signaling characteristics show a high degree of homogeneity. However, the physiologic dynamics of the target clinical scenario (a group of Chinese pregnant women) may have geographic or population-specific patterns (e.g., baseline heart rate offset, differences in contraction pressure response, etc.). This data distribution bias leads to difficulties in generalizing models trained on a single population to heterogeneous populations, which in turn triggers cross-domain decision bias.</p>
</sec>
<sec id="s3-1-2">
<title>3.1.2 Lack of multimodal data integrity</title>
<p>The vast majority of samples in the dataset had incomplete or missing UC signals, forcing studies using this dataset to analyze only a single FHR channel (<xref ref-type="bibr" rid="B8">Francis et al., 2024</xref>). This unimodal modeling approach is a serious deviation from the clinical guideline of &#x201c;two-parameter synergistic assessment&#x201d; (i.e., simultaneous monitoring of the time domain/frequency domain coupling of FHR and UC). For example, early signs of fetal hypoxia are often characterized by a rise in contraction pressure accompanied by an absence of accelerated fetal heart rate, and a single FHR signal cannot capture such dynamic correlations.</p>
</sec>
<sec id="s3-1-3">
<title>3.1.3 Introduction of bias by data preprocessing methods</title>
<p>For the missing values of FHR signals, existing studies commonly use interpolation methods such as linear interpolation or spline interpolation to fill them (<xref ref-type="bibr" rid="B25">Li et al., 2025</xref>). Although such methods can achieve sequence continuity, their mathematical smoothing process destroys the original features of biological signals, and there is a certain degree of subjectivity in the filling of missing values of data, which results in the inability of the model to learn the real physiological response patterns.</p>
</sec>
</sec>
<sec id="s3-2">
<title>3.2 Datasets used</title>
<p>In order to break through the bottleneck of adaptability of public datasets in regional clinical applications, this study, in conjunction with the Obstetrics Center of Chengdu Tertiary Hospital, constructed a dedicated dataset for Chinese pregnant women. This dataset consecutively included 326 singleton pregnant women who underwent routine fetal heart rate monitoring in the obstetrics outpatient clinic of the First People&#x2019;s Hospital of Longquanyi District, Chengdu City, Sichuan Province, China, from April 2019 to November 2023, with the information of the pregnant women as shown in <xref ref-type="fig" rid="F5">Figure 5</xref> those with severe fetal abnormalities, maternal arrhythmia, and signal loss &#x3e;5 min were excluded. Independently labeled by two obstetricians with &#x2265;5 years of experience according to FIGO 2015 guidelines; disagreements were arbitrated by a third person. Category distribution: 224 normal, 102 pathological. The study was approved by the Ethics Committee of the First People&#x2019;s Hospital of Longquanyi District, Chengdu City, Sichuan Province, China (approval number: AF-KY-2024014), and all subjects signed informed consent.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Information of pregnant women used in this study.</p>
</caption>
<graphic xlink:href="fphys-16-1638788-g005.tif">
<alt-text content-type="machine-generated">Two bar charts compare average, maximum, and minimum data across four categories: age of gravidas, gestational week, number of gravidity, and number of parity. The color key identifies pink for average, blue for maximum, and purple for minimum.</alt-text>
</graphic>
</fig>
<p>Compared with the CTU-CHB dataset, its core advantages are reflected in three aspects: first, optimizing the signal acquisition parameters and evaluation thresholds for the unique physiological characteristics of Chinese pregnant women (e.g., the baseline mean fetal heart rate of 142 &#xb1; 8 bpm is significantly lower than that of 148 &#xb1; 10 bpm in the European population); second, realizing 100% synchronous acquisition of the FHR and UC signals with time alignment (sampling frequency of 4 Hz, time stamp error of Secondly, 100% synchronous acquisition and time alignment of FHR and UC signals (sampling frequency 4 Hz, time stamp error &#x2264;0.25 s) was achieved to support Coupling Oscillation Analysis (COA), which meets the requirements of the clinical guidelines on the joint interpretation of multi-parameters; thirdly, data interpolation and filling techniques were strictly prohibited to maximally retain the original nonlinear characteristics of the biological signals. It provides infrastructure support for the subsequent multi-center validation and assessment of model generalization ability.</p>
</sec>
<sec id="s3-3">
<title>3.3 Dataset preprocessing</title>
<p>Aiming at the grid shadow interference problem in CTGs collected from hospitals, this study proposes a noise suppression method based on image processing and template matching, and the complete flow is shown in <xref ref-type="fig" rid="F6">Figure 6</xref>. The algorithm takes the original fetal monitor image set <italic>I</italic> &#x3d; {<italic>I</italic>
<sub>1</sub>,<italic>I</italic>
<sub>2</sub>, &#x2026; ,<italic>I</italic>
<sub>
<italic>N</italic>
</sub>}&#x2208; <inline-formula id="inf1">
<mml:math id="m1">
<mml:mrow>
<mml:msup>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>n</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> (resolution m &#x3d; 1,653, n &#x3d; 2,339) as the input, and achieves the accurate extraction and standardization of the signal trajectory through the following steps:</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Flow chart of extracting FHR and UC signals from raw data.</p>
</caption>
<graphic xlink:href="fphys-16-1638788-g006.tif">
<alt-text content-type="machine-generated">Flowchart illustrating fetal heart monitoring records. Out of 326 records, 224 are normal (68.71%) and 102 are abnormal (31.29%). Each category shows a fetal heart rate and contraction curve, both original and redrawn after data extraction, depicting normal and abnormal patterns.</alt-text>
</graphic>
</fig>
<p>Step 1: Image preprocessing and region segmentation: The color image <italic>I</italic>
<sub>
<italic>k</italic>
</sub> is first grayscaled by reading the guardianship recording image in grayscale format with a resolution of 1,653 &#xd7; 2,339 pixels and binarized by setting a fixed threshold <italic>&#x3c4;</italic> &#x3d; 50 to convert the original image into a black and white binary image (<xref ref-type="bibr" rid="B16">Jia et al., 2023</xref>), where the fetal heart rate and contraction curve regions are labeled with foreground value of 1 and the background region of 0. The grayscale value <italic>G</italic>
<sub>
<italic>k</italic>
</sub> (<italic>x</italic>,<italic>y</italic>)is computed followedby an empirical thresholding for binarized segmentation <xref ref-type="disp-formula" rid="e1">Equations 1</xref>, <xref ref-type="disp-formula" rid="e2">2</xref> illustrate the computational process of its segmentation:<disp-formula id="e1">
<mml:math id="m2">
<mml:mrow>
<mml:msub>
<mml:mi>G</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:mfrac>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mrow>
<mml:mfenced open="{" close="}" separators="|">
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>G</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>B</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:mrow>
<mml:msub>
<mml:mi>I</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>y</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>
<disp-formula id="e2">
<mml:math id="m3">
<mml:mrow>
<mml:msub>
<mml:mi>B</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="{" close="" separators="|">
<mml:mrow>
<mml:mtable columnalign="left">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mtext>&#x2009;</mml:mtext>
<mml:msub>
<mml:mrow>
<mml:mtext>&#x2003;</mml:mtext>
<mml:mi>G</mml:mi>
</mml:mrow>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3e;</mml:mo>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mtext>&#x2003;</mml:mtext>
<mml:mi>o</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>h</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>w</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>where, <inline-formula id="inf2">
<mml:math id="m4">
<mml:mrow>
<mml:msub>
<mml:mi>I</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>n</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> denotes the original fetal monitor color image (the k-th), <italic>G</italic>
<sub>
<italic>k</italic>
</sub> (<italic>x</italic>,<italic>y</italic>)&#x2208;[0,255] denotes the pixel values after grayscaling, with coordinates of the x-th row and y-th column in the image, and <italic>B</italic>
<sub>
<italic>k</italic>
</sub>(<italic>x</italic>,<italic>y</italic>)&#x2208;{0,1} denotes the binarized mask, which is used for segmenting the signal track region, and<italic>&#x3c4;</italic> &#x3d; 50 is the empirical threshold, which is an operation that can efficiently preserve the FHR and UC signal trajectory region, while filtering out the background grid interference.</p>
<p>Step 2: Physiological signal template modeling: two types of physiologic signal templates are defined based on the mapping relationship between clinical ranges and image scales:<list list-type="simple">
<list-item>
<p>1. Vertical scanning of the image in the region of rows 281 to 569, which corresponds to the band of the fetal heart rate curve in the paper record chart. The clinical range of 60&#x2013;210 bpm was simulated by a preset linear template to match the binarized image column by column, and the weighted average of the valid signal points in each column was calculated, and the final output of the standardized fetal heart rate signal sequence, FHR template <italic>T</italic>
<sub>
<italic>FHR</italic>
</sub>(<italic>S</italic>) the FHR signal template is shown in <xref ref-type="disp-formula" rid="e3">Equation 3</xref>.</p>
</list-item>
</list>
<disp-formula id="e3">
<mml:math id="m5">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>H</mml:mi>
<mml:mi>R</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>k</mml:mi>
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>H</mml:mi>
<mml:mi>R</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#xb7;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi mathvariant="italic">max</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi mathvariant="italic">min</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi mathvariant="italic">min</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>H</mml:mi>
<mml:mi>R</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>where <italic>k</italic>
<sub>
<italic>FHR</italic>
</sub> &#x3d; &#x2212;1 denotes signal reflection (image longitudinal coordinates are opposite to the physical range), <italic>C</italic>
<sub>
<italic>FHR</italic>
</sub> &#x3d; <italic>y</italic>
<sub>max</sub> &#x2b; <italic>y</italic>
<sub>min</sub> is a compensation constant used to align the baseline after inverse mapping to the image coordinate system,the pixel range of the scale region for FHR is <italic>S</italic>&#x2208;[<italic>S</italic>
<sub>
<italic>start</italic>
</sub>, <italic>S</italic>
<sub>
<italic>end</italic>
</sub>), and the corresponding clinical range is <italic>S</italic>&#x2208;[<italic>S</italic>
<sub>
<italic>start</italic>
</sub>, <italic>S</italic>
<sub>
<italic>end</italic>
</sub>).<list list-type="simple">
<list-item>
<p>2. Similarly, the contraction pressure curve bands in the region of rows 628 to 770 of the scanned image were combined with a linear template of 0&#x2013;100 mmHg to extract the contraction signals for each column, UC template<italic>T</italic>
<sub>
<italic>UC</italic>
</sub>(<italic>S</italic>) the UC signal template is shown in <xref ref-type="disp-formula" rid="e4">Equation 4</xref>.</p>
</list-item>
</list>
<disp-formula id="e4">
<mml:math id="m6">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>U</mml:mi>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>k</mml:mi>
<mml:mrow>
<mml:mi>U</mml:mi>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#xb7;</mml:mo>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mi>y</mml:mi>
<mml:mi mathvariant="italic">max</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mi>y</mml:mi>
<mml:mi mathvariant="italic">min</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
</mml:mrow>
<mml:mrow>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>e</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:mfrac>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mi>U</mml:mi>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>where <italic>k</italic>
<sub>
<italic>UC</italic>
</sub> &#x3d; &#x2212;1 denotes signal reflection, <italic>k</italic>
<sub>
<italic>UC</italic>
</sub> &#x3d; &#x2212;1 ensures baseline zeroing, the pixel range of the scale region of the UC is <italic>k</italic>
<sub>
<italic>UC</italic>
</sub> &#x3d; &#x2212;1, and the corresponding clinical range is y <inline-formula id="inf3">
<mml:math id="m7">
<mml:mrow>
<mml:mo>&#x2208;</mml:mo>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:msubsup>
<mml:mi>y</mml:mi>
<mml:mi mathvariant="italic">min</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi>y</mml:mi>
<mml:mi mathvariant="italic">max</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>Step 3: Longitudinal Signal Extraction and Noise Suppression: For each time point i, the longitudinal column of pixel data is extracted vertically, the binary signal is multiplied with the physiological template, and the mean value is computed only for the valid data points, i.e., those with <italic>B</italic>
<sub>
<italic>k</italic>
</sub> &#x3d; 1, to suppress the noise, which is given by the following the calculation process is shown in <xref ref-type="disp-formula" rid="e5">Equations 5</xref>, <xref ref-type="disp-formula" rid="e6">6</xref>.<disp-formula id="e5">
<mml:math id="m8">
<mml:mrow>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3b5;</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:mrow>
<mml:msub>
<mml:mi>B</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#xb7;</mml:mo>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>H</mml:mi>
<mml:mi>R</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(5)</label>
</disp-formula>
<disp-formula id="e6">
<mml:math id="m9">
<mml:mrow>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mn>2</mml:mn>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3b5;</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:mrow>
<mml:msub>
<mml:mi>B</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#xb7;</mml:mo>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>U</mml:mi>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(6)</label>
</disp-formula>where, <italic>i</italic>&#x2208;[0,n]denotes the timeline pixel position, <inline-formula id="inf4">
<mml:math id="m10">
<mml:mrow>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mi>p</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> denotes the p-th class signal in the k-th image (p &#x3d; 1: FHR, p &#x3d; 2: UC), &#x3a6;<sub>
<italic>p</italic>
</sub> denotes the set of valid pixels for the p-th class signal, and <italic>&#x3b5;</italic> &#x3d; 10<sup>&#x2212;6</sup> avoids division by zero error. This operation generates a normalized time series by suppressing the random noise in the non-track region.</p>
<p>Step 4: Time series matrix construction; Perform character area localization the constructed matrix is given by <xref ref-type="disp-formula" rid="e7">Equation 7</xref>.<disp-formula id="e7">
<mml:math id="m11">
<mml:mrow>
<mml:mi>X</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mo>&#x22ee;</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mtd>
<mml:mtd>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mn>2</mml:mn>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mn>2</mml:mn>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mo>&#x22ee;</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
</mml:mtd>
<mml:mtd>
<mml:msubsup>
<mml:mi>S</mml:mi>
<mml:mn>2</mml:mn>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mi>N</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
<label>(7)</label>
</disp-formula>where <italic>X</italic> denotes the two-channel time series matrix after signal extraction, <italic>N</italic> denotes the total number of images, 2<italic>N</italic> is the number of rows (each recorded image has two signal channels, FHR and UC), and n is the number of columns i.e., the length of the time series.</p>
</sec>
<sec id="s3-4">
<title>3.4 Network model</title>
<p>Compared with other domains, medical image data usually has a small data volume, so the models should be prioritized to lightweight type to fit the data missingness (<xref ref-type="bibr" rid="B1">Chen et al., 2024</xref>). Although many image processing models such as Vision Transformer (<xref ref-type="bibr" rid="B18">Khan et al., 2022</xref>) and Swin Transformer (<xref ref-type="bibr" rid="B28">Liu Z. et al., 2021</xref>) have achieved good results in recent years, they require large data volumes to support them. Obviously, the use of large models leads to their overfitting problems on small datasets and high computational resource requirements, which makes it difficult to be efficiently deployed in resource-constrained healthcare scenarios.</p>
<p>DenseNet121 (<xref ref-type="bibr" rid="B15">Huang et al., 2017</xref>) (Densely connected Convolutional Networks) is a deep convolutional neural network whose core idea is to enhance feature propagation by means of dense connections in a Dense Block. First, each layer in the Dense Block is connected to all previous layers, and given an input image<italic>X</italic>
<sub>0</sub>, it is forward propagated through a convolutional neural network (DenseLayer) containing<italic>L</italic>layers. Each layer <inline-formula id="inf5">
<mml:math id="m12">
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> (1&#x2264; <inline-formula id="inf6">
<mml:math id="m13">
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> &#x2264;L) performs a nonlinear mapping <inline-formula id="inf7">
<mml:math id="m14">
<mml:mrow>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mi mathvariant="script">L</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mo>&#xb7;</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, which consists of a combination of the basic blocks of batch normalization (BN), ReLU activation function, and convolutional operations. The feature output of the <inline-formula id="inf8">
<mml:math id="m15">
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>-th layer is denoted as <inline-formula id="inf9">
<mml:math id="m16">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>X</mml:mi>
</mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>This connection makes the features fully reused and fused between different layers, enhances the feature transfer efficiency, and the gradient can be transferred more efficiently in the backpropagation process. <xref ref-type="fig" rid="F7">Figure 7</xref> shows a DenseBlock containing 2 DenseLayers, the <inline-formula id="inf10">
<mml:math id="m17">
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>-th layer receives <inline-formula id="inf11">
<mml:math id="m18">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mtext>&#x2009;</mml:mtext>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> from all previous layers as input as shown in <xref ref-type="disp-formula" rid="e8">Equation 8</xref>: <disp-formula id="e8">
<mml:math id="m19">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mi mathvariant="script">L</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>H</mml:mi>
<mml:mi mathvariant="script">L</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mn>0</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi mathvariant="script">L</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(8)</label>
</disp-formula>
</p>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>The internal structure of DenseBlock.</p>
</caption>
<graphic xlink:href="fphys-16-1638788-g007.tif">
<alt-text content-type="machine-generated">Diagram showing a neural network architecture with three main parts: Dense Layer 1, Dense Layer 2, and Transition Layer 2. Dense Layer 1 includes Conv(1x1)&#x2b;BN&#x2b;ReLU, Conv(3x3)&#x2b;BN&#x2b;ReLU, and SK. Dense Layer 2 has Conv(3x3)&#x2b;BN&#x2b;ReLU and SK. Transition Layer 2 contains Conv(1x1) and Average pool(2x2). Feature maps are indicated as inputs and outputs, with arrows connecting the components and layers.</alt-text>
</graphic>
</fig>
<p>This structural design allows the network to converge faster during the training process and reduces the occurrence of the gradient vanishing problem. At the same time, it is also characterized by high parameter efficiency; compared with other convolutional neural networks of the same type, DenseNet121 has fewer parameters at the same performance level (<xref ref-type="bibr" rid="B11">He et al., 2016</xref>; <xref ref-type="bibr" rid="B37">Szegedy et al., 2016</xref>), which is only 7.98M, as shown in <xref ref-type="fig" rid="F8">Figure 8</xref>. In addition, there is a transition layer in the middle of every two Dense Block blocks, which contains 1 &#xd7; 1 convolution with average pooling, actively reduces the feature map dimension through channel compression, and compression suppresses overfitting and enhances noise robustness. The DenseNet121 network structure is shown in <xref ref-type="fig" rid="F9">Figure 9</xref>.</p>
<fig id="F8" position="float">
<label>FIGURE 8</label>
<caption>
<p>Parameter comparison of the model.</p>
</caption>
<graphic xlink:href="fphys-16-1638788-g008.tif">
<alt-text content-type="machine-generated">Bar chart comparing model parameters of six networks. DenseNet-121: 7.98M, ResNet-50: 25.6M, ResNet-101: 44.5M, Inception-v3: 23.8M, DenseNet-169: 14.3M, DenseNet-201: 20.1M.</alt-text>
</graphic>
</fig>
<fig id="F9" position="float">
<label>FIGURE 9</label>
<caption>
<p>The DenseNet network structure.</p>
</caption>
<graphic xlink:href="fphys-16-1638788-g009.tif">
<alt-text content-type="machine-generated">Diagram of a convolutional neural network architecture for data classification. It shows layers including Convolution, Max Pooling, Dense Blocks, Transition Layers, Global Average Pool, and Fully Connected layers. Input data progresses from left to right through increasing complexity: 224x224x64 to 1x1x2. Final output distinguishes between normal (green) and abnormal (red) classifications. Legend explains symbols for each layer type.</alt-text>
</graphic>
</fig>
<p>In the fetal heart map classification task, DenseNet121 fuses the shallow features of the signal with the deeper features through dense connections, enabling the network to extract rich feature information from different levels. This dense connectivity structure enables the network to better capture subtle features in fetal heart maps, such as short-term details: instantaneous fluctuations in fetal heart rate (e.g., variable deceleration) or impulse noise in contraction signals (maternal motion interference) (<xref ref-type="bibr" rid="B2">Chen et al., 2025</xref>), and long-time trends: contraction cycles (10&#x2013;15 min) with baseline variability in fetal heart rate,</p>
<p>DenseNet121 progressively fuses features at different scales through 3 &#xd7; 3 convolution cascaded in multiple layers within a dense block without relying on complex data enhancement or pre-training strategies, a feature that is crucial for capturing the synchronization of contraction peaks with fetal heart rate deceleration (Deceleration-Contraction Coupling).</p>
<p>This feature fusion approach not only improves the representativeness of the features, but also allows the network to better adapt to the complexity of fetal heart maps and improve the classification accuracy.</p>
<p>In the fetal heart map classification task, this means that DenseNet121 is able to achieve higher classification accuracy without increasing the computational burden. This is especially important for practical clinical applications, as its lightweight design not only reduces the computational resource requirements, but also improves the generalization ability of the model, making it ideal for scenarios with limited fetal heart image data.</p>
</sec>
<sec id="s3-5">
<title>3.5 Attention</title>
<p>Although the dense connectivity of DenseNet can aggregate multi-scale features (e.g., transient fluctuation and baseline drift) across layers, the fixed receptive field of its convolutional kernel makes it difficult to dynamically adapt to pathological patterns with different spatiotemporal characteristics. Fetal heart deceleration during the UC Peak Phase requires a large receptive field to capture cyclic correlations, while Beat-to-beat Variability relies on local detail extraction, but the fixed size of conventional convolution kernels limits the model&#x2019;s ability to capture multi-scale physiological dynamics (<xref ref-type="bibr" rid="B24">Li et al., 2023</xref>; <xref ref-type="bibr" rid="B43">Zhang et al., 2023</xref>), and the noise of the fetal cardiogram is not consistent with the physiological events (deceleration) interference differs significantly from key physiological events (delayed deceleration) in the channel dimension, but traditional dense connectivity assigns equal weight to all feature channels, resulting in insufficient sensitivity of the model to low signal-to-noise ratio regions.</p>
<p>To address the problem of limited data volume and complex pathology features in fetal heart maps, this study further introduces the Selective Kernel (SK) (<xref ref-type="bibr" rid="B22">Li et al., 2019a</xref>) attention module, which is inserted after each DenseLayer of the DenseBlock. The dense connectivity of DenseNet121 provides an ideal architectural foundation for this purpose. The multi-scale feature maps (abstraction layers of different Dense Blocks) output by the dense connectivity provide rich inputs for the dynamic sense field selection of the SK module, which enhances the feature response to key phases of contractions (e.g., peak periods) through Channel Recalibration, while the dense connectivity ensures that local details (e.g., small fluctuations in fetal heart rate) are not forgotten by the deep network forgotten by the deep network. SK convolution is implemented by three operations, Split, Fuse and Select, the process of which is depicted in <xref ref-type="fig" rid="F10">Figure 10</xref>.</p>
<fig id="F10" position="float">
<label>FIGURE 10</label>
<caption>
<p>Selective kernel convolution.</p>
</caption>
<graphic xlink:href="fphys-16-1638788-g010.tif">
<alt-text content-type="machine-generated">Diagram illustrating a neural network module with convolution layers and operations. Input X undergoes splitting and is processed through 3x3 and 5x5 convolutions to form Y1 and Y2. These are fused using element-wise summation and product operations, leading to feature manipulation with functions \(F_{GAP}\) and \(F_{FC}\). The elements are selected and combined into output Z, maintaining dimensions \(H \times W \times C\). Symbols for element-wise summation and product are noted at the bottom.</alt-text>
</graphic>
</fig>
<p>
<italic>Split</italic>: two convolution kernels of sizes 3 and 5 are used to perform convolution operations on the input features (each convolution operation is a set of GBRs), i.e., <inline-formula id="inf12">
<mml:math id="m20">
<mml:mrow>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>:</mml:mo>
<mml:mi>X</mml:mi>
<mml:mo>&#x2192;</mml:mo>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:mrow>
<mml:mi>H</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>W</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf13">
<mml:math id="m21">
<mml:mrow>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>:</mml:mo>
<mml:mi>X</mml:mi>
<mml:mo>&#x2192;</mml:mo>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:mrow>
<mml:mi>H</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>W</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> to obtain two-scale feature representations and, for efficiency, a 3 &#xd7; 3 convolution kernel with a null size of 2 is used instead of the 5 &#xd7; 5 conventional convolution kernel. The same input is fed into two &#x201c;stethoscopes&#x201d; simultaneously: 3 &#xd7; 3 convolution &#x2192; captures the instantaneous variation (beat-to-beat) over 0.2&#x2013;0.4 s; 5 &#xd7; 5 (null &#x3d; 2) convolution &#x2192; covers the contraction cycle over 0.8&#x2013;1.2 s correlation. The two branch outputs <italic>Y</italic>
<sub>1</sub> and <italic>Y</italic>
<sub>2</sub>were identical in shape, facilitating subsequent pixel-by-pixel fusion.</p>
<p>
<italic>Fuse</italic>: in order to enable neurons to adaptively adjust the size of their receptive fields according to the content of the stimulus, the results of the two branches are first fused by elemental summation, i.e., the corresponding elements in a tensor of the same shape are summed:<disp-formula id="e9">
<mml:math id="m22">
<mml:mrow>
<mml:mi>Y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(9)</label>
</disp-formula>then the global information is then embedded through global average pooling to generate the channel statistics for s &#x2208; R&#x5e;C, specifically, the cth element of s computed through the spatial dimensions <italic>H</italic> &#xd7; <italic>W</italic> shrinking <italic>Y</italic>.<disp-formula id="e10">
<mml:math id="m23">
<mml:mrow>
<mml:msub>
<mml:mi>s</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mi>A</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mrow>
<mml:mi>H</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>W</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>H</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>W</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(10)</label>
</disp-formula>this step is equivalent to an obstetrician quickly going through the entire curve and noting which bands are abnormal in energy.</p>
<p>Then, a fully connected operation is performed on the channel statistic S containing global information to obtain the low-dimensional eigenvector <italic>z</italic> after dimensionality reduction and abstraction, which retains the key information of the input and significantly reduces the dimension, so as to reduce the parameters of the subsequent attention layer and improve the inference speed:<disp-formula id="e11">
<mml:math id="m24">
<mml:mrow>
<mml:mi>z</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3b4;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>B</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi mathvariant="bold-italic">W</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(11)</label>
</disp-formula>where <inline-formula id="inf14">
<mml:math id="m25">
<mml:mrow>
<mml:mi mathvariant="bold">W</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, B denotes batch normalization, <italic>&#x3b4;</italic>is the ReLU function, and z has dimension <italic>d</italic>. The formula is as follows as shown in <xref ref-type="disp-formula" rid="e12">Equation 12</xref>:<disp-formula id="e12">
<mml:math id="m26">
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>max</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
<mml:mi>L</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(12)</label>
</disp-formula>where r is the ratio of dimensionality reduction, when r is larger sacrifice part of the expression ability to improve efficiency, suitable for small models/lightweighting, so in this study, <italic>r</italic> is set to 16 and <italic>L</italic> &#x3d; 32 is the lower limit value, in order to prevent over-compression of the information, to ensure that the ability of expression.</p>
<p>
<italic>Select</italic>: the input is the feature compact descriptor <italic>z</italic>, through the cross-channel soft attention mechanism, that is, through the attention weight, the information that dynamically determines which branch each channel should focus on, through exponential operation and normalization, the score is converted into a probability value <italic>a</italic>
<sub>
<italic>c</italic>
</sub>and <italic>b</italic>
<sub>
<italic>c</italic>
</sub>, satisfying <italic>a</italic>
<sub>
<italic>c</italic>
</sub> &#x2b; <italic>b</italic>
<sub>
<italic>c</italic>
</sub> &#x3d; 1, and its mathematical expression is:<disp-formula id="e13">
<mml:math id="m27">
<mml:mrow>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:msup>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mrow>
<mml:msup>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2b;</mml:mo>
<mml:msup>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>B</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:msup>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>B</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mrow>
<mml:msup>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2b;</mml:mo>
<mml:msup>
<mml:mi>e</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>B</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mi>z</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(13)</label>
</disp-formula>where <inline-formula id="inf15">
<mml:math id="m28">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>B</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> represent the learnable parameter matrix, and the learnable matrix <italic>A</italic>,<italic>B</italic> maps <italic>z</italic> into two probabilities <italic>a</italic> and <italic>b</italic>. <italic>a</italic>&#x2248;1, <italic>b</italic> &#x2248; 0: the model believes that the current channel should be dominated by instantaneous details, such as at the starting point of mutation deceleration; <italic>a</italic>&#x2248;0, <italic>b</italic> &#x2248; 1: the model pays more attention to long-term trends, such as determining whether the baseline continues to decline at peak contractions; a and b between 0 and 1: the model blends the two scales to adapt to the transition interval. Each row of each matrix corresponds to a channel of weight calculation, with a, b representing the <italic>Y</italic>
<sub>1</sub> and <italic>Y</italic>
<sub>2</sub> soft attention vectors. Note: <inline-formula id="inf16">
<mml:math id="m29">
<mml:mrow>
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mo>,</mml:mo>
<mml:mi>B</mml:mi>
</mml:mrow>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> represents the c row of the matrix <italic>A</italic>,<italic>B</italic> corresponding to the weight parameter of the c channel, and <italic>a</italic>
<sub>
<italic>c</italic>
</sub>, <italic>b</italic>
<sub>
<italic>c</italic>
</sub> represents the c element of <italic>a</italic>,<italic>b</italic>. Finally, feature fusion:<disp-formula id="e14">
<mml:math id="m30">
<mml:mrow>
<mml:msub>
<mml:mi>V</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>a</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mo>&#xb7;</mml:mo>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mo>&#xb7;</mml:mo>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(14)</label>
</disp-formula>where <italic>V</italic> &#x3d; [<italic>V</italic>
<sub>1</sub>,<italic>V</italic>
<sub>2</sub>, &#x2026; &#x2026; ,<italic>V</italic>
<sub>
<italic>c</italic>
</sub>], <inline-formula id="inf17">
<mml:math id="m31">
<mml:mrow>
<mml:msub>
<mml:mi>V</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:mrow>
<mml:mi>H</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>W</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, the final output <italic>V</italic>
<sub>
<italic>c</italic>
</sub> is equivalent to adjusting the volume of the two stethoscopes in real time according to &#x201c;clinical importance&#x201d;.</p>
<p>The Selective Kernel (SK) attention mechanism used in this study achieves intelligent focusing on key pathological features in fetal heart-contraction bimodal signals through dynamic gating weights, and its core innovation is to mimic the process of diagnostic cognition of irregular physiological events by obstetricians. During contraction stress, the SK module generates feature mappings of differentiated receptive fields through parallel processing of multibranch convolutional kernels (<xref ref-type="disp-formula" rid="e9">Equation 9</xref>) - 3 &#xd7; 3 kernels capture transient variability (e.g., subtle fluctuations in beat-to-beat variability), whereas null convolution of equivalent 5 &#xd7; 5 kernels captures cyclic associations (e.g., lag phase difference between deceleration and contraction). Global average pooling (<xref ref-type="disp-formula" rid="e10">Equation 10</xref>) compresses the spatiotemporal features into a channel statistic s, which essentially quantifies the energy distribution of different frequency components. The fully connected layer (<xref ref-type="disp-formula" rid="e11">Equation 11</xref>) constructs in effect a low-dimensional streaming representation of the dynamic properties of the signal by means of an abstract feature vector z extracted from the bottleneck structure (<italic>r</italic> &#x3d; 16), where each dimension corresponds to a typical pathological pattern.</p>
<p>The calculation of gating weights (<xref ref-type="disp-formula" rid="e13">Equation 13</xref>) realizes the embedding of clinical <italic>a priori</italic> knowledge through the learnable parameter matrix <italic>A</italic>,<italic>B</italic>- when the input signal has a contraction-triggered steep drop (e.g., a W-type valley of variability deceleration), the weights of the larger receptive field branches (5 &#xd7; 5 equivalent kernels) <italic>a</italic>
<sub>
<italic>c</italic>
</sub> are automatically augmented by the <italic>Softmax</italic> function (&#x3e;0.7), allowing the model to prioritize the temporal relationship between the overall pattern of deceleration and the contraction cycle; conversely, when subtle fluctuations are detected (e.g., baseline variability decay), the weights <italic>b</italic>
<sub>
<italic>c</italic>
</sub> of the smaller receptive field branches (3 &#xd7; 3 kernels) are elevated, focusing on local slope changes. This adaptive selection mechanism (<xref ref-type="disp-formula" rid="e14">Equation 14</xref>) achieves a triple optimization at the physiological level: 1) in the time domain, the dynamic weight assignment strengthens the characteristic response of the critical phase of contraction (15 s after the peak), and weakens the redundant information of the inter-contraction interval; 2) in the frequency domain, the interference of the maternal motion artifacts (high-frequency noise) is suppressed by the channel re-calibration, and the hypoxia-associated fluctuation in the frequency band of 0.04&#x2013;0.15 Hz is enhanced; 3) Spatially, multi-scale feature fusion ensures that transient but clinically significant signal transitions (e.g., W-shaped double valleys of variable deceleration) are not smoothed by the fixed receptive fields of conventional convolution, and automatically enhances the detection sensitivity of subtle but prognostically critical signal turning points (e.g., deceleration recovery slopes &#x3c;1 bpm/s) during the contraction stress phase to maximally mimic the obstetrician&#x2019;s interpretation process.</p>
</sec>
</sec>
<sec sec-type="results|discussion" id="s4">
<title>4 Experimental results and discussion</title>
<sec id="s4-1">
<title>4.1 Experimental setup</title>
<p>This study is based on PyTorch 2.6.0 and CUDA 12.0 as a deep learning framework to build neural network models, the ratio of the training set to the test set is 7:3, and the experiments are all run on the NVIDIA RTX4060 equipped with AMD 7735, and 16 GB DDR5, and the hyperparameters are shown in <xref ref-type="table" rid="T1">Table 1</xref>.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Hyperparameters for proposed method.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Hyperparameter</th>
<th align="left">Value</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">K</td>
<td align="left">32</td>
</tr>
<tr>
<td align="left">Epochs</td>
<td align="left">100</td>
</tr>
<tr>
<td align="left">Learning rate</td>
<td align="left">0.001</td>
</tr>
<tr>
<td align="left">Batch Size</td>
<td align="left">32</td>
</tr>
<tr>
<td align="left">Loss Function</td>
<td align="left">FocalLoss</td>
</tr>
<tr>
<td align="left">Optimisation</td>
<td align="left">Adam</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s4-2">
<title>4.2 Evaluation indicators</title>
<p>Because of the uneven proportion of data, in order to more fully validate the performance of the model in this study, several metrics such as precision, recall, F1 score, confusion matrix and subjects&#x2019; work characteristic curves (ROCs) and area under the ROC curve (AUCs) were introduced for assessment.</p>
<p>In this assessment framework, CTG plot normal is defined as positive category and abnormal as negative category. Based on this setting, the model prediction results were defined as follows: cases in which the model correctly predicted fetal normality were called True Positive (TP); cases in which the model incorrectly predicted fetal abnormality as a positive category were called False Positive (FP); cases in which the model correctly predicted fetal abnormality as a negative category were called True Negative (TN); and the situation where the model incorrectly predicts fetal normal as a negative category is referred to as False Negative (FN).</p>
<p>The precision rate indicates the proportion of samples predicted to be in the normal/abnormal category that are actually in the normal/abnormal category and measures the ability of the model to avoid misdiagnosis the calculation method for precision is shown in <xref ref-type="disp-formula" rid="e15">Equations 15</xref>, <xref ref-type="disp-formula" rid="e16">16</xref>.<disp-formula id="e15">
<mml:math id="m32">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(15)</label>
</disp-formula>
<disp-formula id="e16">
<mml:math id="m33">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>N</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(16)</label>
</disp-formula>
</p>
<p>Recall represents the proportion of true normal/abnormal samples correctly identified by the model to the total number of actual normal/abnormal samples, which reflects the model&#x2019;s ability to capture normal/abnormal categories. In the fetal heart rate monitoring scenario, this metric is of key clinical significance: false positives will lead to missed high-risk cases and delayed necessary interventions (emergency cesarean section), thus jeopardizing the safety of mother and baby, while false negatives, although they may lead to over-medical interventions, have a significantly lower risk of adverse clinical outcomes than false-positive scenarios. Therefore, minimizing the proportion of FP by optimizing the recall rate is a central goal to guarantee the safety of decision-making in high-risk pregnancies and is highly consistent with the guideline of clinical priority to reduce the rate of missed diagnoses the recall rate is calculated as shown in <xref ref-type="disp-formula" rid="e17">Equations 17</xref>, <xref ref-type="disp-formula" rid="e18">18</xref>.<disp-formula id="e17">
<mml:math id="m34">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(17)</label>
</disp-formula>
<disp-formula id="e18">
<mml:math id="m35">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mi>b</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>N</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(18)</label>
</disp-formula>
</p>
<p>The F1 score represents the reconciled mean of precision and recall and is used to balance the two the calculation method for F1 scores is shown in <xref ref-type="disp-formula" rid="e19">Equation 19</xref>.<disp-formula id="e19">
<mml:math id="m36">
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>s</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>R</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>R</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(19)</label>
</disp-formula>
</p>
<p>Accuracy indicates the number of correctly predicted samples as a proportion of the total number of samples the calculation method for accuracy is shown in <xref ref-type="disp-formula" rid="e20">Equation 20</xref>.<disp-formula id="e20">
<mml:math id="m37">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>T</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>T</mml:mi>
<mml:mi>N</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(20)</label>
</disp-formula>
</p>
<p>Confusion Matrix is a matrix structure for evaluating the performance of classification models (<xref ref-type="bibr" rid="B39">Valero-Carreras et al., 2023</xref>), which quantitatively presents the accuracy and error distribution of classification results by cross-referencing the true categories of the samples with the predicted categories of the model the confusion matrix is shown in <xref ref-type="disp-formula" rid="e21">Equation 21</xref>.<disp-formula id="e21">
<mml:math id="m38">
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>f</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>M</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>x</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mtable columnalign="center">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:mtd>
<mml:mtd>
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:mtd>
<mml:mtd>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(21)</label>
</disp-formula>
</p>
<p>ROC is a visualization tool for evaluating the performance of a binary classification model, with the False Positive Rate (FPR) on the horizontal axis and the True Positive Rate (TPR) on the vertical axis. The ROC depicts the model&#x2019;s ability to discriminate between positive and negative categories by traversing all the classification thresholds: the closer the curve is to the upper left corner (FPR approaches 0, TPR approaches 1), the better the classification performance, and the AUC is the area enclosed by the ROC curve and the coordinate axis to quantify the overall classification effectiveness of the model. The ROC curve depicts the model&#x2019;s ability to discriminate between positive and negative categories by traversing all classification thresholds: the closer the curve is to the upper left corner (FPR tends to 0, TPR tends to 1), the better the model&#x2019;s classification performance is, and the AUC is the area bounded by the ROC curve and the axes, which is used to quantify the model&#x2019;s overall classification effectiveness. &#x201c;When AUC &#x3d; 0.5, the model is equivalent to a random guess; when AUC &#x3d; 1, the model has the ability to classify perfectly, and its formula is the calculation method for AUC is shown in <xref ref-type="disp-formula" rid="e22">Equation 22</xref>.<disp-formula id="e22">
<mml:math id="m39">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mi>U</mml:mi>
<mml:mi>C</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x222b;</mml:mo>
<mml:mn>0</mml:mn>
<mml:mn>1</mml:mn>
</mml:msubsup>
</mml:mstyle>
<mml:mi>R</mml:mi>
<mml:mi>O</mml:mi>
<mml:mi>C</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi>d</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:math>
<label>(22)</label>
</disp-formula>
</p>
</sec>
<sec id="s4-3">
<title>4.3 Results</title>
<p>
<xref ref-type="table" rid="T2">Table 2</xref> compares the side-by-side comparisons using the DenseNet121 backbone network and fusion of eight mainstream attention mechanisms, in which SK achieves the optimal performance with an accuracy rate of 0.9588, which is tied for first place with classical SE attention, but demonstrates significant advantages in key clinical metrics: the normal samples achieve a 100% precision rate (Precision &#x3d; 1.00), which effectively avoids the risk of misdiagnosing the normal The normal samples achieved 100% precision (Precision &#x3d; 1.00), effectively avoiding the risk of misdiagnosing normal fetal heart as abnormal; the abnormal samples achieved 100% recall (Recall &#x3d; 1.00), ensuring that all abnormal cases were effectively detected. In terms of the comprehensive assessment indexes, both the normal category F1-score (0.97) and the abnormal category F1-score (1.00) were significantly better than the comparison scheme, with an improvement of 5.0% and 16.0%, respectively, compared with the baseline method, and the confusion matrices are shown in <xref ref-type="fig" rid="F11">Figure 11</xref>. The experimental results show that the SK module enhances the model&#x2019;s hierarchical characterization of fetal heart fluctuation features through the strategy of dynamically selecting multi-scale convolutional kernels, and its channel attention mechanism precisely focuses on the pathology-related features, which results in a clearer decision boundary for the normal/abnormal category. This performance advantage is valuable in clinical scenarios to eliminate the waste of medical resources caused by false-positive diagnosis and avoid the medical risks caused by false-negative missed diagnosis.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Comparison between using DenseNet alone and integrating other attention.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th rowspan="2" align="left">Attention</th>
<th rowspan="2" align="left">Accuracy</th>
<th colspan="3" align="center">Normal</th>
<th colspan="3" align="center">Abnormal</th>
</tr>
<tr>
<th align="left">Precision</th>
<th align="left">Recall</th>
<th align="left">F1-score</th>
<th align="left">Precision</th>
<th align="left">Recall</th>
<th align="left">F1-score</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">DenseNet121 (<xref ref-type="bibr" rid="B15">Huang et al., 2017</xref>)</td>
<td align="left">0.8969</td>
<td align="left">0.92</td>
<td align="left">0.92</td>
<td align="left">0.92</td>
<td align="left">0.84</td>
<td align="left">0.84</td>
<td align="left">0.84</td>
</tr>
<tr>
<td align="left">ECA (<xref ref-type="bibr" rid="B40">Wang et al., 2020</xref>)</td>
<td align="left">0.8660</td>
<td align="left">0.91</td>
<td align="left">0.89</td>
<td align="left">0.90</td>
<td align="left">0.79</td>
<td align="left">0.81</td>
<td align="left">0.80</td>
</tr>
<tr>
<td align="left">SimAM (<xref ref-type="bibr" rid="B42">Yang et al., 2021</xref>)</td>
<td align="left">0.8969</td>
<td align="left">0.90</td>
<td align="left">0.95</td>
<td align="left">0.93</td>
<td align="left">0.89</td>
<td align="left">0.78</td>
<td align="left">0.83</td>
</tr>
<tr>
<td align="left">SGE (<xref ref-type="bibr" rid="B23">Li et al., 2019b</xref>)</td>
<td align="left">0.9175</td>
<td align="left">0.91</td>
<td align="left">0.97</td>
<td align="left">0.94</td>
<td align="left">0.93</td>
<td align="left">0.81</td>
<td align="left">0.87</td>
</tr>
<tr>
<td align="left">CoorAtt (<xref ref-type="bibr" rid="B12">Hou et al., 2021</xref>)</td>
<td align="left">0.9278</td>
<td align="left">0.94</td>
<td align="left">0.95</td>
<td align="left">0.95</td>
<td align="left">0.90</td>
<td align="left">0.88</td>
<td align="left">0.89</td>
</tr>
<tr>
<td align="left">CBAM (<xref ref-type="bibr" rid="B41">Woo et al., 2018</xref>)</td>
<td align="left">0.9381</td>
<td align="left">0.92</td>
<td align="left">1.00</td>
<td align="left">0.96</td>
<td align="left">1.00</td>
<td align="left">0.81</td>
<td align="left">0.90</td>
</tr>
<tr>
<td align="left">DAN (<xref ref-type="bibr" rid="B9">Fu et al., 2019</xref>)</td>
<td align="left">0.9485</td>
<td align="left">0.98</td>
<td align="left">0.94</td>
<td align="left">0.96</td>
<td align="left">0.89</td>
<td align="left">0.97</td>
<td align="left">0.93</td>
</tr>
<tr>
<td align="left">SE (<xref ref-type="bibr" rid="B14">Hu et al., 2018</xref>)</td>
<td align="left">0.9588</td>
<td align="left">0.98</td>
<td align="left">0.95</td>
<td align="left">0.97</td>
<td align="left">0.91</td>
<td align="left">0.97</td>
<td align="left">0.94</td>
</tr>
<tr>
<td align="left">SK (<xref ref-type="bibr" rid="B22">Li et al., 2019a</xref>)</td>
<td align="left">0.9588</td>
<td align="left">1.00</td>
<td align="left">0.94</td>
<td align="left">0.97</td>
<td align="left">0.89</td>
<td align="left">1.00</td>
<td align="left">0.94</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="F11" position="float">
<label>FIGURE 11</label>
<caption>
<p>Confusion matrix for different attentions: <bold>(a)</bold> Only DenseNet121 <bold>(b)</bold> ECA <bold>(c)</bold> SimAM <bold>(d)</bold> SGE <bold>(e)</bold> CoorAtt <bold>(f)</bold> CBAM <bold>(g)</bold> DAN <bold>(h)</bold> SE <bold>(i)</bold> SK.</p>
</caption>
<graphic xlink:href="fphys-16-1638788-g011.tif">
<alt-text content-type="machine-generated">Nine confusion matrices labeled (a) to (i) compare different models' predictions with actual data. Each matrix shows true labels on the vertical axis and predicted labels on the horizontal axis, divided into normal and abnormal categories. Percentages and counts indicate accuracy for each classification. Models include DenseNet121, ECA, SimAM, SGE, CoorAtt, CBAM, DAN, SE, and SK. CBAM and SK show highest accuracy for normal and abnormal predictions, respectively. Colors range from dark purple to light pink, representing varying data density.</alt-text>
</graphic>
</fig>
<p>
<xref ref-type="table" rid="T3">Table 3</xref> summarize the results of the evaluation of existing methods for fetal heartbeat monitoring classification, covering the performance of different models in machine learning, deep learning on their respective datasets. It can be seen that this study achieved AUC: 94.4/Acc: 95.88/F1: 97 on the self-constructed bimodal dataset, whereas the performance of unimodal (FHR only) dropped to AUC 81.2/Acc 87.69/F1 0.79 under the same model structure, a result that validates the value of contraction signals as an auxiliary feature. The comparison results in <xref ref-type="table" rid="T3">Table 3</xref> do not constitute a strict performance ranking, and there are limitations in directly comparing the performance of these methods due to the following reasons: first, the cited studies each used a different private or public dataset, with sample sizes ranging from 83 to 4,473 cases, and the difference in sample sizes may affect the assessment of the model&#x2019;s generalization ability. Second, some of the methods were designed based on 1D fetal heart rate signals, whereas our DenseNet121-SK model deals with 2D images after bimodal signal conversion. Even if the reproduction on the same dataset is forced, the signal needs to be resampled, windowed, or spectrally transformed, which introduces additional preprocessing bias and leads to less rigorous performance comparisons. Therefore, the comparison results in <xref ref-type="table" rid="T3">Table 3</xref> are more of a reference for method trends rather than a strict performance ranking.</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Aggregate of existing methods and proposed methods use only the effect of monomodality.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th rowspan="2" align="left">Reference</th>
<th rowspan="2" align="center">Method</th>
<th colspan="3" align="center">Evaluating indicator</th>
<th rowspan="2" align="left">Dataset used</th>
<th rowspan="2" align="left">Sample size</th>
<th rowspan="2" align="left">Performance (%)</th>
</tr>
<tr>
<th align="left">AUC</th>
<th align="left">Acc</th>
<th align="left">F1</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">
<xref ref-type="bibr" rid="B19">Krupa et al. (2011)</xref>
</td>
<td align="left">SVM</td>
<td align="left"/>
<td align="left">&#x221a;</td>
<td align="left"/>
<td align="left">Private</td>
<td align="left">90</td>
<td align="left">87</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B34">Spilka et al. (2014)</xref>
</td>
<td align="left">NB,SVM,DT</td>
<td align="left"/>
<td align="left"/>
<td align="left">&#x221a;</td>
<td align="left">Private</td>
<td align="left">217</td>
<td align="left">71.5</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B5">Czabanski et al. (2012)</xref>
</td>
<td align="left">WFS &#x2b; LS-SVM</td>
<td align="left"/>
<td align="left">&#x221a;</td>
<td align="left"/>
<td align="left">Private</td>
<td align="left">186</td>
<td align="left">92.0</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B7">Fanelli et al. (2013)</xref>
</td>
<td align="left">ST</td>
<td align="left">&#x221a;</td>
<td align="left"/>
<td align="left"/>
<td align="left">Private</td>
<td align="left">122</td>
<td align="left">75</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B6">Dash et al. (2014)</xref>
</td>
<td align="left">GM,NB</td>
<td align="left"/>
<td align="left"/>
<td align="left">&#x221a;</td>
<td align="left">Private</td>
<td align="left">83</td>
<td align="left">69.9</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B36">Stylios et al. (2016)</xref>
</td>
<td align="left">LS-SVM</td>
<td align="left">&#x221a;</td>
<td align="left"/>
<td align="left"/>
<td align="left">CTU-UHB</td>
<td align="left">552</td>
<td align="left">72.81</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B10">Georgoulas et al. (2017)</xref>
</td>
<td align="left">LS-SVM</td>
<td align="left">&#x221a;</td>
<td align="left"/>
<td align="left"/>
<td align="left">CTU-UHB</td>
<td align="left">552</td>
<td align="left">68.54</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B3">C&#xf6;mert et al. (2018)</xref>
</td>
<td align="left">LS-SVM</td>
<td align="left">&#x221a;</td>
<td align="left"/>
<td align="left"/>
<td align="left">CTU-UHB</td>
<td align="left">552</td>
<td align="left">64.64</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B21">Li et al. (2018)</xref>
</td>
<td align="left">CNN</td>
<td align="left"/>
<td align="left">&#x221a;</td>
<td align="left"/>
<td align="left">Private</td>
<td align="left">4,473</td>
<td align="left">93.24</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B32">O&#x2019;sullivan et al. (2021)</xref>
</td>
<td align="left">ARMA &#x2b; SVM</td>
<td align="left">&#x221a;</td>
<td align="left"/>
<td align="left"/>
<td align="left">CTU-UHB</td>
<td align="left">552</td>
<td align="left">86</td>
</tr>
<tr>
<td align="left">
<xref ref-type="bibr" rid="B26">Lin et al. (2024)</xref>
</td>
<td align="left">LARA</td>
<td align="left">&#x221a;</td>
<td align="left"/>
<td align="left"/>
<td align="left">Private</td>
<td align="left">114</td>
<td align="left">87.2</td>
</tr>
<tr>
<td align="left">Ours (Only FHR)</td>
<td align="left">DenseNet121&#x2b;SK</td>
<td align="left">&#x221a;</td>
<td align="left">&#x221a;</td>
<td align="left">&#x221a;</td>
<td align="left">Private</td>
<td align="left">326</td>
<td align="left">81.2/87.69/79</td>
</tr>
<tr>
<td align="left">Ours (FHR &#x2b; UC)</td>
<td align="left">DenseNet121&#x2b;SK</td>
<td align="left">&#x221a;</td>
<td align="left">&#x221a;</td>
<td align="left">&#x221a;</td>
<td align="left">Private</td>
<td align="left">326</td>
<td align="left">94.4/95.88/97</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s4-4">
<title>4.4 Disscussion</title>
<p>The following limitations of this study need to be accounted for: first, the limitations of the dataset size and geographic origin (single center in Southwest China) may lead to the model&#x2019;s insufficient ability to generalize to specific populations (e.g., obese pregnant women); second, although the SK Attention module significantly improves the model&#x2019;s performance (3.2% improvement in accuracy), its computational complexity is increased by approximately 15% compared to the base DenseNet121 (<xref ref-type="bibr" rid="B29">Liu et al., 2023</xref>; <xref ref-type="bibr" rid="B38">Tang et al., 2023</xref>), and the Optimization measures such as quantization compression may be required in extreme resource-constrained environments; third, due to the lack of publicly available bimodal fetal heart monitoring benchmark datasets, existing comparison experiments can only be compared with unimodal methods and traditional machine learning baselines, and this benchmark discrepancy may affect the objectivity of the performance evaluation; lastly, there is a lack of standardized signal preprocessing and annotation specifications in the current field of fetal heart monitoring, which makes it difficult to directly compare the results of different studies with each other. Results are difficult to compare directly. These limitations suggest the need for further research in multi-center large sample validation, computational efficiency optimization, and standardized baseline establishment.</p>
</sec>
</sec>
<sec id="s5">
<title>5 Future plans</title>
<sec id="s5-1">
<title>5.1 Multi-center retrospective validation</title>
<p>To assess the generalization ability of the model, this study plans to conduct multi-center validation jointly with several tertiary hospitals in the future. Each center independently collected 150 fetal cardiac monitoring samples (including 10% extreme pathology cases) to ensure no overlap with the training set. The following metrics were used to quantify model performance decay the calculation equation for GDR is shown in <xref ref-type="disp-formula" rid="e23">Equation 23</xref>.<disp-formula id="e23">
<mml:math id="m40">
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mi>D</mml:mi>
<mml:mi>R</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mi>U</mml:mi>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>g</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>s</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mi>U</mml:mi>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mi>x</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>s</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mi>U</mml:mi>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>g</mml:mi>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi>s</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mfrac>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>100</mml:mn>
<mml:mo>%</mml:mo>
</mml:mrow>
</mml:math>
<label>(23)</label>
</disp-formula>where GDR (Generalization Decay Rate) reflects the model cross-center performance decay rate. When GDR &#x3e;15%, a domain adaptation fine-tuning strategy based on Maximum Mean Discrepancy (MMD) is initiated: the Adam optimizer (learning rate 1 &#xd7; 10<sup>&#x2212;4</sup>, batch size 16) is used to minimize the feature distribution of the source domain and the target domain in the RKHS space difference.</p>
</sec>
<sec id="s5-2">
<title>5.2 Special testing for high-risk groups</title>
<p>In order to verify the applicability of the model in high-risk pregnancy populations, special validation is planned to be carried out in the future for three high-risk subpopulations: gestational diabetes, preeclampsia and fetal growth restriction. The stratified sampling strategy is used to ensure that the samples of each subset are representative, and the ability of the model to identify pathologically-specific patterns (such as loss of acceleration in the gestational diabetes group and variation deceleration in the preeclampsia group) is emphasized. When the F1 value of a specific subset is verified to be less than 0.80, the Focal Loss function (&#x3b3; &#x3d; 2) will be used to retrain the subset samples to alleviate the problem of class imbalance. At the same time, an adversarial discriminator (gradient penalty coefficient &#x3bb; &#x3d; 0.3) was introduced to minimize the distribution differences between the source domain and the target domain in the feature space, and improve the generalization ability of the model to the characteristics of high-risk groups.</p>
</sec>
<sec id="s5-3">
<title>5.3 Future research directions</title>
<p>In the future, we will also study image segmentation models in the field of fetal heart rate monitoring, in order to use image segmentation technology to identify important parts such as acceleration, deceleration, and baseline in the fetal heart rate monitoring chart, as well as the most popular large language model in the field of AI recently.</p>
</sec>
</sec>
<sec sec-type="conclusion" id="s6">
<title>6 Conclusion</title>
<p>The model proposed in this study uses a bimodal (FHR &#x2b; UC) signal fusion design to simulate the clinical habits of the two dynamically related clinical habits (such as the timing coupling of peak contractions and fetal heart rate deceleration), and its input form (including hyperbolic images) is highly consistent with the clinical interpretation scenario. It provides obstetricians with reliable objective opinions during CTG monitoring during childbirth and reduces misinterpretation caused by subjective differences. The introduction of the SK attention module dynamically adjusts the receptive field: small-scale convolution captures details for transient fetal heart rate fluctuations (e.g., beat-to-beat variations) and large-scale convolutional trends for contraction cycle associations (e.g., the lag relationship between late deceleration and contractions), which is consistent with the clinical focus on different pathological patterns and compensates for the shortcomings of DenseNet121s fixed receptive fields. The DenseNet121-SK architecture is only 8.3M (7.98M for the base DenseNet121 and 0.32M for the SK module), which guarantees 95.88% accuracy and 100% abnormal sample recall while low computational cost, allowing it to run on mid-range GPUs or high-performance CPUs without relying on high-end computing clusters. For primary medical institutions lacking GPUs, model quantization (such as INT8 precision) can further compress the computational requirements while reducing memory footprint, and the dataset is not filled with interpolation, retaining the original signal characteristics, reducing the dependence on complex preprocessing processes, and facilitating reuse in scenarios with simple data acquisition conditions (such as the original image output by fetal heart rate monitors in primary hospitals). These characteristics enable the system to effectively assist in clinical decision-making and provide a reliable basis for timely intervention in high-risk pregnancies, and <xref ref-type="fig" rid="F12">Figure 12</xref> shows the process of the proposed model assisting in the diagnosis of intrapartum CTG maps. However, it should be emphasized that the system output should always be used in conjunction with clinical evaluation to form a complete diagnosis and treatment decision-making chain.</p>
<fig id="F12" position="float">
<label>FIGURE 12</label>
<caption>
<p>Flow chart of using the proposed method to assist diagnosis.</p>
</caption>
<graphic xlink:href="fphys-16-1638788-g012.tif">
<alt-text content-type="machine-generated">Diagram showing a three-stage process for fetal heart rate (FHR) and uterine contraction (UC) monitoring. Stage 1: Gravidas undergo fetal heart monitoring, producing FHR and UC images for signal preprocessing via an extraction algorithm. Stage 2: Experts input domain knowledge to assess labeled images marked 'A' and 'N'. Stage 3: The processed signals pass through convolution, dense blocks, transition layers, and pooling, leading to binary output classifications.</alt-text>
</graphic>
</fig>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s7">
<title>Data availability statement</title>
<p>The data analyzed in this study is subject to the following licenses/restrictions: The data involves patient privacy, and a data confidentiality agreement has been signed with the hospital. Requests to access these datasets should be directed to Tianxin Qiu, 307397327@qq.com.</p>
</sec>
<sec sec-type="ethics-statement" id="s8">
<title>Ethics statement</title>
<p>Written informed consent was obtained from the subject and/or guardian. This research project has been approved by the Ethics Committee of Longquanyi District People&#x2019;s Hospital in Chengdu, Sichuan, China (AF-KY-2024014).</p>
</sec>
<sec sec-type="author-contributions" id="s9">
<title>Author contributions</title>
<p>TQ: Conceptualization, Data curation, Formal Analysis, Investigation, Methodology, Software, Visualization, Writing &#x2013; original draft, Writing &#x2013; review and editing. XZ: Writing &#x2013; review and editing, Methodology. JZ: Data curation, Resources, Writing &#x2013; review and editing. CL: Data curation, Writing &#x2013; review and editing. SJ: Data curation, Writing &#x2013; review and editing. HC: Writing &#x2013; review and editing. XW: Methodology, Writing &#x2013; review and editing. QY: Supervision, Validation, Writing &#x2013; review and editing, Methodology.</p>
</sec>
<sec sec-type="funding-information" id="s10">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research and/or publication of this article. Sichuan Provincial Administration of Traditional Chinese Medicine Scientific Research Special Grant: Sichuan Provincial Administration of Traditional Chinese Medicine 2024 Chinese Medicine Scientific Research Special Project (Project No.: 2024MS306).</p>
</sec>
<sec sec-type="COI-statement" id="s11">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="s12">
<title>Generative AI statement</title>
<p>The author(s) declare that no Generative AI was used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="s13">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Ren</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>F.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <article-title>Review of lightweight deep convolutional neural networks</article-title>. <source>Archives Comput. Methods Eng.</source> <volume>31</volume> (<issue>4</issue>), <fpage>1915</fpage>&#x2013;<lpage>1937</lpage>. <pub-id pub-id-type="doi">10.1007/s11831-023-10032-z</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>DIFF-FECG: a conditional diffusion-based method for fetal ECG extraction from abdominal ECG</article-title>. <source>IEEE Trans. Artif. Intell.</source>, <fpage>1</fpage>&#x2013;<lpage>13</lpage>. <pub-id pub-id-type="doi">10.1109/tai.2025.3578007</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>C&#xf6;mert</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Kocamaz</surname>
<given-names>A. F.</given-names>
</name>
<name>
<surname>Subha</surname>
<given-names>V.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Prognostic model based on image-based time-frequency features and genetic algorithm for fetal hypoxia assessment</article-title>. <source>Comput. Biol. Med.</source> <volume>99</volume>, <fpage>85</fpage>&#x2013;<lpage>97</lpage>. <pub-id pub-id-type="doi">10.1016/j.compbiomed.2018.06.003</pub-id>
<pub-id pub-id-type="pmid">29894897</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Costa Santos</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Costa Pereira</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Bernardes</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2005</year>). <article-title>Agreement studies in obstetrics and gynaecology: inappropriateness, controversies and consequences</article-title>. <source>BJOG Int. J. Obstetrics and Gynaecol.</source> <volume>112</volume> (<issue>5</issue>), <fpage>667</fpage>&#x2013;<lpage>669</lpage>. <pub-id pub-id-type="doi">10.1111/j.1471-0528.2004.00505.x</pub-id>
<pub-id pub-id-type="pmid">15842294</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Czabanski</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Jezewski</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Matonia</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Jezewski</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Computerized analysis of fetal heart rate signals as the predictor of neonatal acidemia</article-title>. <source>Expert Syst. Appl.</source> <volume>39</volume> (<issue>15</issue>), <fpage>11846</fpage>&#x2013;<lpage>11860</lpage>. <pub-id pub-id-type="doi">10.1016/j.eswa.2012.01.196</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dash</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Quirk</surname>
<given-names>J. G.</given-names>
</name>
<name>
<surname>Djuri&#x107;</surname>
<given-names>P. M.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Fetal heart rate classification using generative models</article-title>. <source>IEEE Trans. Biomed. Eng.</source> <volume>61</volume> (<issue>11</issue>), <fpage>2796</fpage>&#x2013;<lpage>2805</lpage>. <pub-id pub-id-type="doi">10.1109/TBME.2014.2330556</pub-id>
<pub-id pub-id-type="pmid">24951678</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fanelli</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Magenes</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Campanile</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Signorini</surname>
<given-names>M. G.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>Quantitative assessment of fetal well-being through CTG recordings: a new parameter based on phase-rectified signal average</article-title>. <source>IEEE J. Biomed. Health Inf.</source> <volume>17</volume> (<issue>5</issue>), <fpage>959</fpage>&#x2013;<lpage>966</lpage>. <pub-id pub-id-type="doi">10.1109/JBHI.2013.2268423</pub-id>
<pub-id pub-id-type="pmid">25055375</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Francis</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Luz</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Stock</surname>
<given-names>S. J.</given-names>
</name>
<name>
<surname>Townsend</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Machine learning on cardiotocography data to classify fetal outcomes: a scoping review</article-title>. <source>Comput. Biol. Med.</source> <volume>172</volume>, <fpage>108220</fpage>. <pub-id pub-id-type="doi">10.1016/j.compbiomed.2024.108220</pub-id>
<pub-id pub-id-type="pmid">38489990</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Fu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Tian</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Bao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Fang</surname>
<given-names>Z.</given-names>
</name>
<etal/>
</person-group> (<year>2019</year>). &#x201c;<article-title>Dual attention network for scene segmentation</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE/CVF conference on computer vision and pattern recognition</conf-name>, <fpage>3146</fpage>&#x2013;<lpage>3154</lpage>.</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Georgoulas</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Karvelis</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Spilka</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Chud&#xe1;&#x10d;ek</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Stylios</surname>
<given-names>C. D.</given-names>
</name>
<name>
<surname>Lhotska</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Investigating pH based evaluation of fetal heart rate (FHR) recordings</article-title>. <source>Health Technol.</source> <volume>7</volume>, <fpage>241</fpage>&#x2013;<lpage>254</lpage>. <pub-id pub-id-type="doi">10.1007/s12553-017-0201-7</pub-id>
<pub-id pub-id-type="pmid">29201590</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>He</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Ren</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>Deep residual learning for image recognition</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE conference on computer vision and pattern recognition</conf-name>, <conf-loc>Las Vegas, NV, USA</conf-loc>, <conf-date>27-30 June 2016</conf-date> (<publisher-name>IEEE</publisher-name>), <fpage>770</fpage>&#x2013;<lpage>778</lpage>.</citation>
</ref>
<ref id="B12">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Hou</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Feng</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Coordinate attention for efficient mobile network design</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE/CVF conference on computer vision and pattern recognition</conf-name>, <fpage>13713</fpage>&#x2013;<lpage>13722</lpage>.</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hruban</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Spilka</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Chud&#xe1;&#x10d;ek</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Jank&#x16f;</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Huptych</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Bur&#x161;a</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2015</year>). <article-title>Agreement on intrapartum cardiotocogram recordings between expert obstetricians</article-title>. <source>J. Eval. Clin. Pract.</source> <volume>21</volume> (<issue>4</issue>), <fpage>694</fpage>&#x2013;<lpage>702</lpage>. <pub-id pub-id-type="doi">10.1111/jep.12368</pub-id>
<pub-id pub-id-type="pmid">26011725</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Hu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Squeeze-and-excitation networks</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE conference on computer vision and pattern recognition</conf-name>, <fpage>7132</fpage>&#x2013;<lpage>7141</lpage>.</citation>
</ref>
<ref id="B15">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Huang</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Van Der Maaten</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Weinberger</surname>
<given-names>K. Q.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Densely connected convolutional networks</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE conference on computer vision and pattern recognition</conf-name>, <conf-loc>Honolulu, HI, USA</conf-loc>, <conf-date>21-26 July 2017</conf-date> (<publisher-name>IEEE</publisher-name>), <fpage>4700</fpage>&#x2013;<lpage>4708</lpage>.</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jia</surname>
<given-names>Y. J.</given-names>
</name>
<name>
<surname>Ghi</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Pereira</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Perez-Bonfils</surname>
<given-names>A. G.</given-names>
</name>
<name>
<surname>Chandraharan</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Pathophysiological interpretation of fetal heart rate tracings in clinical practice</article-title>. <source>Am. J. obstetrics Gynecol.</source> <volume>228</volume> (<issue>6</issue>), <fpage>622</fpage>&#x2013;<lpage>644</lpage>. <pub-id pub-id-type="doi">10.1016/j.ajog.2022.05.023</pub-id>
<pub-id pub-id-type="pmid">37270259</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Karmakar</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Ray</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Basak</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Chatterjee</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Kundu</surname>
<given-names>S. K.</given-names>
</name>
</person-group> (<year>2025</year>). &#x201c;<article-title>Fetal health classification using machine learning on cardiotocography data</article-title>,&#x201d; in <conf-name>2025 8th International Conference on Electronics, Materials Engineering and Nano-Technology (IEMENTech)</conf-name>, <conf-loc>Kolkata, India</conf-loc>, <conf-date>31 January 2025 - 02 February 2025</conf-date> (<publisher-name>IEEE</publisher-name>), <fpage>1</fpage>&#x2013;<lpage>6</lpage>.</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Khan</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Naseer</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Hayat</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Zamir</surname>
<given-names>S. W.</given-names>
</name>
<name>
<surname>Khan</surname>
<given-names>F. S.</given-names>
</name>
<name>
<surname>Shah</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Transformers in vision: a survey</article-title>. <source>ACM Comput. Surv. (CSUR)</source> <volume>54</volume> (<issue>10s</issue>), <fpage>1</fpage>&#x2013;<lpage>41</lpage>. <pub-id pub-id-type="doi">10.1145/3505244</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Krupa</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Zahedi</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Ahmed</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Hassan</surname>
<given-names>F. M.</given-names>
</name>
</person-group> (<year>2011</year>). <article-title>Antepartum fetal heart rate feature extraction and classification using empirical mode decomposition and support vector machine</article-title>. <source>Biomed. Eng. online</source> <volume>10</volume>, <fpage>6</fpage>&#x2013;<lpage>15</lpage>. <pub-id pub-id-type="doi">10.1186/1475-925x-10-6</pub-id>
<pub-id pub-id-type="pmid">21244712</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lewis</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Downe</surname>
<given-names>S.</given-names>
</name>
</person-group>
<collab>FIGO Intrapartum Fetal Monitoring Expert Consensus Panel</collab> (<year>2015</year>). <article-title>FIGO consensus guidelines on intrapartum fetal monitoring: intermittent auscultation</article-title>. <source>Int. J. Gynecol. and Obstertrics</source> <volume>131</volume> (<issue>1</issue>), <fpage>9</fpage>&#x2013;<lpage>12</lpage>. <pub-id pub-id-type="doi">10.1016/j.ijgo.2015.06.019</pub-id>
<pub-id pub-id-type="pmid">26433400</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Z. Z.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Fang</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Fu</surname>
<given-names>X.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>Automatic classification of fetal heart rate based on convolutional neural network</article-title>. <source>IEEE Internet Things J.</source> <volume>6</volume> (<issue>2</issue>), <fpage>1394</fpage>&#x2013;<lpage>1401</lpage>. <pub-id pub-id-type="doi">10.1109/jiot.2018.2845128</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Li X.</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2019a</year>). &#x201c;<article-title>Selective kernel networks</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE/CVF conference on computer vision and pattern recognition</conf-name>, <fpage>510</fpage>&#x2013;<lpage>519</lpage>.</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2019b</year>). <article-title>Spatial group-wise enhance: improving semantic feature learning in convolutional networks</article-title>. <pub-id pub-id-type="doi">10.48550/arXiv.1905.09646</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Yan</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>H.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>Deep learning attention mechanism in medical image analysis: basics and beyonds</article-title>. <source>Int. J. Netw. Dyn. Intell.</source>, <fpage>93</fpage>&#x2013;<lpage>116</lpage>. <pub-id pub-id-type="doi">10.53941/ijndi0201006</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Cai</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Shi</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Fu</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2025</year>). &#x201c;<article-title>CTGDiff: a conditional diffusion model for cardiotocography signal synthesis</article-title>,&#x201d; in <conf-name>ICASSP 2025-2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</conf-name>, <conf-loc>Hyderabad, India</conf-loc>, <conf-date>06-11 April 2025</conf-date> (<publisher-name>IEEE</publisher-name>), <fpage>1</fpage>&#x2013;<lpage>5</lpage>.</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lin</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>J.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <article-title>Deep learning with information fusion and model interpretation for health monitoring of fetus based on long-term prenatal electronic fetal heart rate monitoring data</article-title>. <pub-id pub-id-type="doi">10.48550/arXiv.2401.15337</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu M.</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Long</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Bai</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Lian</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>An attention-based CNN-BiLSTM hybrid neural network enhanced with features of discrete wavelet transformation for fetal acidosis classification</article-title>. <source>Expert Syst. Appl.</source> <volume>186</volume>, <fpage>115714</fpage>. <pub-id pub-id-type="doi">10.1016/j.eswa.2021.115714</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Liu Z.</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Cao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Wei</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Z.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). &#x201c;<article-title>Swin transformer: hierarchical vision transformer using shifted windows</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE/CVF international conference on computer vision</conf-name>, <conf-loc>Montreal, QC, Canada</conf-loc>, <conf-date>10-17 October 2021</conf-date> (<publisher-name>IEEE</publisher-name>), <fpage>10012</fpage>&#x2013;<lpage>10022</lpage>.</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Deep residual convolutional neural network based on hybrid attention mechanism for ecological monitoring of marine fishery</article-title>. <source>Ecol. Inf.</source> <volume>77</volume>, <fpage>102204</fpage>. <pub-id pub-id-type="doi">10.1016/j.ecoinf.2023.102204</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Madiraju</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Upadhyay</surname>
<given-names>U.</given-names>
</name>
<name>
<surname>C</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Bharati</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2025</year>). &#x201c;<article-title>Fetal health analysis based on CTG</article-title>,&#x201d; in <conf-name>2025 11th International Conference on Communication and Signal Processing (ICCSP)</conf-name> (<publisher-name>IEEE</publisher-name>), <fpage>1706</fpage>&#x2013;<lpage>1711</lpage>.</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mendis</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Karmakar</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Palaniswami</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Brownfoot</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Keenan</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Cross-database evaluation of deep learning methods for intrapartum cardiotocography classification</article-title>. <source>IEEE J. Transl. Eng. Health Med.</source> <volume>13</volume>, <fpage>123</fpage>&#x2013;<lpage>135</lpage>. <pub-id pub-id-type="doi">10.1109/JTEHM.2025.3548401</pub-id>
<pub-id pub-id-type="pmid">40657532</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>O&#x27;sullivan</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Gabruseva</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Boylan</surname>
<given-names>G. B.</given-names>
</name>
<name>
<surname>O&#x27;Riordan</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Lightbody</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Marnane</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Classification of fetal compromise during labour: signal processing and feature engineering of the cardiotocograph</article-title>,&#x201d; in <conf-name>2021 29th European signal processing conference (EUSIPCO)</conf-name>, <conf-loc>Dublin, Ireland</conf-loc>, <conf-date>23-27 August 2021</conf-date> (<publisher-name>IEEE</publisher-name>), <fpage>1331</fpage>&#x2013;<lpage>1335</lpage>.</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Romagnoli</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Sbrollini</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Burattini</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Marcantoni</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Morettini</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Burattini</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Annotation dataset of the cardiotocographic recordings constituting the &#x201c;CTU-CHB intra-partum CTG database&#x201d;</article-title>. <source>Data brief</source> <volume>31</volume>, <fpage>105690</fpage>. <pub-id pub-id-type="doi">10.1016/j.dib.2020.105690</pub-id>
<pub-id pub-id-type="pmid">32490069</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Spilka</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Chud&#xe1;&#x10d;ek</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Jank&#x16f;</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Hruban</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Bur&#x161;a</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Huptych</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2014</year>). <article-title>Analysis of obstetricians&#x2019; decision making on CTG recordings</article-title>. <source>J. Biomed. Inf.</source> <volume>51</volume>, <fpage>72</fpage>&#x2013;<lpage>79</lpage>. <pub-id pub-id-type="doi">10.1016/j.jbi.2014.04.010</pub-id>
<pub-id pub-id-type="pmid">24747355</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Steer</surname>
<given-names>P. J.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Has electronic fetal heart rate monitoring made a difference?</article-title> <source>Seminars Fetal Neonatal Med.</source> <volume>13</volume> (<issue>1</issue>), <fpage>2</fpage>&#x2013;<lpage>7</lpage>. <pub-id pub-id-type="doi">10.1016/j.siny.2007.09.005</pub-id>
<pub-id pub-id-type="pmid">18271079</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Stylios</surname>
<given-names>C. D.</given-names>
</name>
<name>
<surname>Georgoulas</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Karvelis</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Spilka</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Chud&#xe1;&#x10d;ek</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Lhotska</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>Least squares support vector machines for FHR classification and assessing the pH based categorization</article-title>,&#x201d; in <conf-name>XIV Mediterranean Conference on Medical and Biological Engineering and Computing 2016</conf-name>, <conf-loc>Paphos, Cyprus</conf-loc>, <conf-date>March 31st-April 2nd 2016</conf-date> (<publisher-name>Springer International Publishing</publisher-name>), <fpage>1211</fpage>&#x2013;<lpage>1215</lpage>.</citation>
</ref>
<ref id="B37">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Szegedy</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Vanhoucke</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Ioffe</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Shlens</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wojna</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>Rethinking the inception architecture for computer vision</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)</conf-name>, <conf-loc>Las Vegas, NV, USA</conf-loc>, <conf-date>27-30 June 2016</conf-date> (<publisher-name>IEEE</publisher-name>), <fpage>2818</fpage>&#x2013;<lpage>2826</lpage>.</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tang</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Jiang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>A hybrid neural network model with attention mechanism for state of health estimation of lithium-ion batteries</article-title>. <source>J. Energy Storage</source> <volume>68</volume>, <fpage>107734</fpage>. <pub-id pub-id-type="doi">10.1016/j.est.2023.107734</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Valero-Carreras</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Alcaraz</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Landete</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Comparing two SVM models through different metrics based on the confusion matrix</article-title>. <source>Comput. and Operations Res.</source> <volume>152</volume>, <fpage>106131</fpage>. <pub-id pub-id-type="doi">10.1016/j.cor.2022.106131</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Zuo</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>Q.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>ECA-Net: efficient channel attention for deep convolutional neural networks</article-title>,&#x201d; in <conf-name>Proceedings of the IEEE/CVF conference on computer vision and pattern recognition</conf-name>, <fpage>11534</fpage>&#x2013;<lpage>11542</lpage>.</citation>
</ref>
<ref id="B41">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Woo</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Park</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>J. Y.</given-names>
</name>
<name>
<surname>Kweon</surname>
<given-names>I. S.</given-names>
</name>
</person-group> (<year>2018</year>). &#x201c;<article-title>Cbam: convolutional block attention module</article-title>,&#x201d; in <conf-name>Proceedings of the European conference on computer vision (ECCV)</conf-name>, <fpage>3</fpage>&#x2013;<lpage>19</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-030-01234-2_1</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>R. Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>L. S.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>A simple, parameter-free attention module for convolutional neural networks</article-title>,&#x201d; in <conf-name>Proceedings of the International Conference on Machine Learning</conf-name>, <fpage>11863</fpage>&#x2013;<lpage>11874</lpage>.</citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Jin</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Bai</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Selective kernel convolution deep residual network based on channel-spatial attention mechanism and feature fusion for mechanical fault diagnosis</article-title>. <source>ISA Trans.</source> <volume>133</volume>, <fpage>369</fpage>&#x2013;<lpage>383</lpage>. <pub-id pub-id-type="doi">10.1016/j.isatra.2022.06.035</pub-id>
<pub-id pub-id-type="pmid">35798589</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>