<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="brief-report" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Energy Res.</journal-id>
<journal-title>Frontiers in Energy Research</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Energy Res.</abbrev-journal-title>
<issn pub-type="epub">2296-598X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1647197</article-id>
<article-id pub-id-type="doi">10.3389/fenrg.2025.1647197</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Energy Research</subject>
<subj-group>
<subject>Brief Research Report</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Robust fault detection in electrochemical energy storage systems under label noise: applications to lithium-ion batteries and transformer windings</article-title>
<alt-title alt-title-type="left-running-head">He et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fenrg.2025.1647197">10.3389/fenrg.2025.1647197</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>He</surname>
<given-names>Tao</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/3067783/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Liu</surname>
<given-names>Wei</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Wu</surname>
<given-names>Xin</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Wei</surname>
<given-names>Yu</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>State Grid Anhui Electric Power Co., Ltd.</institution>, <institution>Ma&#x2019;anshan Power Supply Company</institution>, <addr-line>Anhui</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>State Grid Anhui Electric Power Research Institute</institution>, <addr-line>Anhui</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1612273/overview">Shuang Zhao</ext-link>, Hefei University of Technology, China</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3104504/overview">Jin Zhang</ext-link>, Anhui University, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3104824/overview">Longlei Bai</ext-link>, Harbin Engineering University, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3106840/overview">Junfei Jiang</ext-link>, Guangdong Electric Power Design and Research Institute, China</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Tao He, <email>1650578210@qq.com</email>
</corresp>
</author-notes>
<pub-date pub-type="epub">
<day>22</day>
<month>08</month>
<year>2025</year>
</pub-date>
<pub-date pub-type="collection">
<year>2025</year>
</pub-date>
<volume>13</volume>
<elocation-id>1647197</elocation-id>
<history>
<date date-type="received">
<day>15</day>
<month>06</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>16</day>
<month>07</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2025 He, Liu, Wu and Wei.</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>He, Liu, Wu and Wei</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Reliable fault detection is essential for ensuring the safe and efficient operation of electrochemical energy storage systems, including lithium-ion batteries and transformer. However, the performance of machine learning-based fault diagnosis models is often degraded in practice due to label noise in training data, caused by sensor inaccuracies, ambiguous fault transitions, and imperfect labeling processes. This paper proposes a lightweight and effective kernel-based data rectification framework to improve the robustness of fault detection under noisy label conditions. The method identifies and discards low-density data points that are statistically more likely to be mislabeled, using kernel density estimation and a tunable data discarding strategy. The approach is computationally efficient, classifier-agnostic, and easily applicable to existing fault diagnosis pipelines. We evaluate the proposed method on two datasets: simulated lithium-ion battery voltage data under various fault scenarios, and transformer winding oscillation wave data under multiple winding fault conditions. The results demonstrate that the rectification framework significantly improves classification accuracy across both Support Vector Machine (SVM) and Extreme Learning Machine (ELM) classifiers. Furthermore, the choice of discarding ratio is shown to be critical, with optimal performance achieved when the ratio is tuned close to the underlying noise level. These results highlight the potential of the proposed method to enhance the reliability of fault diagnosis in electrochemical energy storage systems. Future work will explore adaptive strategies to automatically optimize the rectification strength without requiring prior knowledge of the noise rate, and extend the framework to multi-sensor and multi-modal monitoring scenarios.</p>
</abstract>
<kwd-group>
<kwd>fault diagnosis</kwd>
<kwd>robust classification</kwd>
<kwd>kernel density estimation</kwd>
<kwd>label noise</kwd>
<kwd>lithium-ion batteries</kwd>
<kwd>transformer windings</kwd>
</kwd-group>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Electrochemical Energy Storage</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>Ensuring the safe and reliable operation of electrochemical energy storage systems is of critical importance across a wide range of industrial, transportation, and grid applications. Among these systems, lithium-ion batteries and transformer windings represent two key components with extensive deployments. Lithium-ion batteries are widely used in electric vehicles (<xref ref-type="bibr" rid="B14">&#x15e;en et al., 2024</xref>), renewable energy storage systems (<xref ref-type="bibr" rid="B17">Wali et al., 2024</xref>; <xref ref-type="bibr" rid="B9">Hasan et al., 2025</xref>), and portable electronics (<xref ref-type="bibr" rid="B23">Zubi et al., 2018</xref>), while power transformers are essential assets for stable and efficient electric power transmission and distribution (??). Faults in lithium-ion batteries, such as short circuits, overcharging, and over-discharging, can cause severe performance degradation, accelerate aging, and in extreme cases, trigger thermal runaway and fire hazards (<xref ref-type="bibr" rid="B18">Wang et al., 2024</xref>; <xref ref-type="bibr" rid="B16">Tahir and Tenbohlen, 2023</xref>). Likewise, transformer winding faults, including axial displacement, local buckling, inter-disc short circuits, and inter-turn short circuits, can compromise insulation integrity and lead to catastrophic transformer failures (<xref ref-type="bibr" rid="B13">Pei et al., 2023</xref>). Therefore, timely and accurate fault detection is a crucial function to ensure the safety, reliability, and longevity of these electrochemical energy storage systems in practical applications.</p>
<p>Recent advances in data-driven fault diagnosis leverage sensor measurements and machine learning techniques to automatically classify the states of electrochemical energy storage systems, including lithium-ion batteries and transformer windings (<xref ref-type="bibr" rid="B11">Kouhestani et al., 2023</xref>; <xref ref-type="bibr" rid="B1">Abdolrasol et al., 2024</xref>; <xref ref-type="bibr" rid="B18">Wang et al., 2024</xref>; <xref ref-type="bibr" rid="B16">Tahir and Tenbohlen, 2023</xref>; <xref ref-type="bibr" rid="B13">Pei et al., 2023</xref>; <xref ref-type="bibr" rid="B3">Deng et al., 2023</xref>; <xref ref-type="bibr" rid="B10">Hong et al., 2021</xref>). However, in practical applications, the quality of labeled training data is often compromised. Sensor noise, ambiguous fault transitions, and manual or heuristic labeling processes introduce <italic>label noise</italic>, where a significant fraction of training labels may be incorrect or inconsistent (<xref ref-type="bibr" rid="B4">Fan et al., 2025</xref>). Such label noise severely degrades the performance and reliability of supervised learning models (<xref ref-type="bibr" rid="B6">Goodfellow et al., 2016</xref>), posing a major obstacle to deploying robust fault detection frameworks in real-world energy storage systems. In the case of lithium-ion batteries, mislabeling may arise from overlapping voltage patterns during early-stage faults or human annotation errors. Likewise, for transformer windings, data-driven classifiers trained on frequency response analysis (FRA) or vibration signals are also vulnerable to labeling errors, given the subtle and complex nature of winding deformation and short-circuit phenomena. These challenges motivate the development of robust fault detection methods that can tolerate mislabeled data and preserve high diagnostic accuracy.</p>
<p>Although various robust learning techniques have been developed in the machine learning literature to address label noise, many of these approaches suffer from high computational complexity or require prior knowledge of the noise rate (<xref ref-type="bibr" rid="B22">Zhang et al., 2021</xref>; <xref ref-type="bibr" rid="B7">Han et al., 2018a</xref>; <xref ref-type="bibr" rid="B5">Goldberger and Ben-Reuven, 2017</xref>; <xref ref-type="bibr" rid="B21">Yao et al., 2020</xref>; <xref ref-type="bibr" rid="B15">Shen et al., 2024</xref>), which is typically unknown in practice. Moreover, these methods are often difficult to tune and deploy in resource-constrained hardware for battery or transformer winding fault detection (<xref ref-type="bibr" rid="B20">Wu et al., 2025</xref>).</p>
<p>In this paper, we propose a simple and efficient kernel-based data rectification framework for robust battery fault detection under noisy label conditions. Our method leverages kernel density estimation (KDE) to identify and discard data points located in low-density regions of the feature space, where noisy labels are statistically more likely to occur. The approach is computationally lightweight, classifier-agnostic.</p>
<p>We conduct comprehensive experiments on both simulated lithium-ion battery voltage data and transformer winding fault data, covering normal and various fault scenarios, with different synthetic label noise patterns. Our results demonstrate that the proposed rectification method consistently improves classification accuracy across both Support Vector Machine (SVM) and Extreme Learning Machine (ELM) classifiers. Furthermore, we analyze the sensitivity of the method to the rectification strength (controlled by a discarding ratio <inline-formula id="inf1">
<mml:math id="m1">
<mml:mrow>
<mml:mi>&#x3b4;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>), and provide practical insights on its application to Lithium-Ion Batteries and Transformer Windings. The main contributions of this paper are summarized as follows:<list list-type="simple">
<list-item>
<p>
<inline-formula id="inf2">
<mml:math id="m2">
<mml:mrow>
<mml:mo>&#x2022;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> We propose a lightweight kernel-based data rectification method to enhance the robustness of fault detection under label noise.</p>
</list-item>
<list-item>
<p>
<inline-formula id="inf3">
<mml:math id="m3">
<mml:mrow>
<mml:mo>&#x2022;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> We demonstrate the effectiveness of the method across different classifiers and noise scenarios, without requiring knowledge of the true noise rate.</p>
</list-item>
<list-item>
<p>
<inline-formula id="inf4">
<mml:math id="m4">
<mml:mrow>
<mml:mo>&#x2022;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> We provide practical guidance on tuning the rectification process, and discuss its applicability to real-world fault detection problems in electrochemical energy storage systems.</p>
</list-item>
</list>
</p>
</sec>
<sec sec-type="methods" id="s2">
<title>2 Methods</title>
<sec id="s2-1">
<title>2.1 Challenging issue of fault diagnosis with noisy labels</title>
<p>Formally, let<disp-formula id="e1">
<mml:math id="m5">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">norm</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2254;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>denote a dataset comprising sensor readings <inline-formula id="inf5">
<mml:math id="m6">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="script">X</mml:mi>
<mml:mo>&#x2282;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and their corresponding ground-truth labels <inline-formula id="inf6">
<mml:math id="m7">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="script">Y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
<mml:mo>&#x2282;</mml:mo>
<mml:mi mathvariant="double-struck">N</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, which indicate whether the system is in a normal, minor fault, severe fault, or another state. This dataset <inline-formula id="inf7">
<mml:math id="m8">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">norm</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is typically used to train a parameterized classification model <inline-formula id="inf8">
<mml:math id="m9">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>
<xref ref-type="fn" rid="fn1">
<sup>1</sup>
</xref> by solving the following optimization problem:<disp-formula id="e2">
<mml:math id="m10">
<mml:mrow>
<mml:mtable class="cases">
<mml:mtr>
<mml:mtd columnalign="left">
<mml:msub>
<mml:mrow>
<mml:mi>min</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="normal">&#x398;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:munderover>
</mml:mstyle>
<mml:mi>&#x2113;</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">est</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="left">
<mml:mi mathvariant="sans-serif">s</mml:mi>
<mml:mo>.</mml:mo>
<mml:mi mathvariant="sans-serif">t</mml:mi>
<mml:mo>.</mml:mo>
<mml:mo>&#x2009;</mml:mo>
<mml:mo>&#x2009;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">est</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>.</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>
</p>
<p>Here, <inline-formula id="inf9">
<mml:math id="m11">
<mml:mrow>
<mml:mi>&#x2113;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> denotes a loss function, such as the squared error or cross-entropy. Let <inline-formula id="inf10">
<mml:math id="m12">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">norm</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x22c6;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> be the solution of problem <xref ref-type="disp-formula" rid="e2">Equation 2</xref>. Note that <inline-formula id="inf11">
<mml:math id="m13">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">norm</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x22c6;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> will vary depending on the dataset used. Therefore, the training process can be viewed as a mapping from a dataset family <inline-formula id="inf12">
<mml:math id="m14">
<mml:mrow>
<mml:mi mathvariant="script">F</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> to the optimal parameter <inline-formula id="inf13">
<mml:math id="m15">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">norm</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x22c6;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>, denoted by <inline-formula id="inf14">
<mml:math id="m16">
<mml:mrow>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mo>:</mml:mo>
<mml:mi mathvariant="script">F</mml:mi>
<mml:mo>&#x2192;</mml:mo>
<mml:mi mathvariant="normal">&#x398;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>However, in practical deployments, the fault labels <inline-formula id="inf15">
<mml:math id="m17">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> are often corrupted due to the following reasons:<list list-type="simple">
<list-item>
<p>
<inline-formula id="inf16">
<mml:math id="m18">
<mml:mrow>
<mml:mo>&#x2022;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> Ambiguity in defining fault boundaries (e.g., gradual degradation processes).</p>
</list-item>
<list-item>
<p>
<inline-formula id="inf17">
<mml:math id="m19">
<mml:mrow>
<mml:mo>&#x2022;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> Sensor noise and latency, which can lead to a mismatch between the actual fault occurrence and its recorded label.</p>
</list-item>
<list-item>
<p>
<inline-formula id="inf18">
<mml:math id="m20">
<mml:mrow>
<mml:mo>&#x2022;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> Manual or heuristic-based labeling procedures, which may introduce bias or inconsistencies.</p>
</list-item>
</list>
</p>
<p>As a result, the dataset may contain <italic>noisy labels</italic>, i.e., <inline-formula id="inf19">
<mml:math id="m21">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3b4;</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2260;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> with non-negligible probability. That is, the available dataset corresponds to a noisy-labeled version, defined as<disp-formula id="e3">
<mml:math id="m22">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">noisy</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2254;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>
</p>
<p>Using <inline-formula id="inf20">
<mml:math id="m23">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">noisy</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> for training yields a different model parameter, given by<disp-formula id="e4">
<mml:math id="m24">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">noisy</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">noisy</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>which may result in poor predictive accuracy and weak generalization due to overfitting the noisy labels. Consequently, the resulting classification model is unreliable in safety-critical applications such as system fault detection. The above-described problem and issue are summarized in an intuitive way presented in <xref ref-type="fig" rid="F1">Figure 1</xref>. To address this challenge, it is necessary to propose a robust classification framework that aims to learn accurate decision boundaries despite the presence of label noise.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Intuitive explanation of the challenging issue by the label noises.</p>
</caption>
<graphic xlink:href="fenrg-13-1647197-g001.tif">
<alt-text content-type="machine-generated">Diagram illustrating the impact of sensor noise and labeling bias on classifier training. On the left, clean data \( D_{norm} \) leads to high accuracy and good generalization with the model \( \theta^&#x2a;_{norm} \). On the right, noisy data \( D_{noisy} \) results in low accuracy, overfitting, and bad generalization with the model \( \tilde{\theta}^&#x2a;_{noisy} \).</alt-text>
</graphic>
</fig>
</sec>
<sec id="s2-2">
<title>2.2 Framework of the proposed robust fault diagnosis</title>
<p>As shown in <xref ref-type="fig" rid="F2">Figure 2</xref>, instead of directly using the noisy-labeled dataset <inline-formula id="inf21">
<mml:math id="m25">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">noisy</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> for classifier training, this paper introduce a dataset rectification process to filter or clean the data prior to training. This rectification is defined as a mapping <inline-formula id="inf22">
<mml:math id="m26">
<mml:mrow>
<mml:mi mathvariant="script">R</mml:mi>
<mml:mo>:</mml:mo>
<mml:mi mathvariant="script">F</mml:mi>
<mml:mo>&#x2192;</mml:mo>
<mml:mi mathvariant="script">F</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, which outputs a rectified dataset <inline-formula id="inf23">
<mml:math id="m27">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">rect</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> consisting of estimated clean data points. Let <inline-formula id="inf24">
<mml:math id="m28">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">rect</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> denote the number of samples in <inline-formula id="inf25">
<mml:math id="m29">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">rect</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. Importantly, the proposed robust fault diagnosis framework aims not only to optimize the parameter vector <inline-formula id="inf26">
<mml:math id="m30">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> but also to design the rectification algorithm <inline-formula id="inf27">
<mml:math id="m31">
<mml:mrow>
<mml:mi mathvariant="script">R</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, thereby enhancing the robustness of the diagnostic model. The classification (or regression) problem incorporating the rectification algorithm <inline-formula id="inf28">
<mml:math id="m32">
<mml:mrow>
<mml:mi mathvariant="script">R</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is formulated as follows:<disp-formula id="e5">
<mml:math id="m33">
<mml:mrow>
<mml:mtable class="cases">
<mml:mtr>
<mml:mtd columnalign="left">
<mml:msub>
<mml:mrow>
<mml:mi>min</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="normal">&#x398;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">rect</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:munderover>
</mml:mstyle>
<mml:mi>&#x2113;</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">est</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="left">
<mml:mi mathvariant="sans-serif">s</mml:mi>
<mml:mo>.</mml:mo>
<mml:mi mathvariant="sans-serif">t</mml:mi>
<mml:mo>.</mml:mo>
<mml:mspace width="2em"/>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">est</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="left">
<mml:mspace width="3em"/>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">rect</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mspace width="0.3333em"/>
<mml:mo>&#x2200;</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">rect</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="left">
<mml:mspace width="3em"/>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">rect</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="script">R</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">noisy</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>.</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
<label>(5)</label>
</disp-formula>
</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Framework of the proposed method.</p>
</caption>
<graphic xlink:href="fenrg-13-1647197-g002.tif">
<alt-text content-type="machine-generated">Flowchart illustrating a data processing pipeline. A noisy dataset is input into a dataset rectification process, producing a rectified dataset. This dataset is passed to a classifier trainer, resulting in a model parameter that is both accurate and robust.</alt-text>
</graphic>
</fig>
<p>The following subsections provide detailed explanations of the key components of our proposed framework:<list list-type="simple">
<list-item>
<p>
<inline-formula id="inf29">
<mml:math id="m34">
<mml:mrow>
<mml:mo>&#x2022;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> The construction of the rectification algorithm <inline-formula id="inf30">
<mml:math id="m35">
<mml:mrow>
<mml:mi mathvariant="script">R</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> using a kernel-based approach, along with a theoretical justification of how this rectification improves the robustness of fault diagnosis;</p>
</list-item>
<list-item>
<p>
<inline-formula id="inf31">
<mml:math id="m36">
<mml:mrow>
<mml:mo>&#x2022;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> A comparative analysis between the proposed kernel-based rectification method and several existing approaches, highlighting the practical advantages of our method for real-world deployment;</p>
</list-item>
<list-item>
<p>
<inline-formula id="inf32">
<mml:math id="m37">
<mml:mrow>
<mml:mo>&#x2022;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> A complete description of the robust fault detection algorithm that integrates the rectification process into the training pipeline.</p>
</list-item>
</list>
</p>
</sec>
<sec id="s2-3">
<title>2.3 Kernel-based rectification</title>
<sec id="s2-3-1">
<title>2.3.1 Preliminary assumption</title>
<p>Let <inline-formula id="inf33">
<mml:math id="m38">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">real</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>:</mml:mo>
<mml:mi mathvariant="script">X</mml:mi>
<mml:mo>&#x2192;</mml:mo>
<mml:mi mathvariant="script">Y</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> denote the function that represents the true underlying relationship between a sensor reading <inline-formula id="inf34">
<mml:math id="m39">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and its corresponding fault-level label <inline-formula id="inf35">
<mml:math id="m40">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> in a system. That is, for every <inline-formula id="inf36">
<mml:math id="m41">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="script">X</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf37">
<mml:math id="m42">
<mml:mrow>
<mml:mi>y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">real</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> holds. This paper refers to <inline-formula id="inf38">
<mml:math id="m43">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">real</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> as the <italic>real classifier</italic>. For <inline-formula id="inf39">
<mml:math id="m44">
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, define the input set <inline-formula id="inf40">
<mml:math id="m45">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> by<disp-formula id="e6">
<mml:math id="m46">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2254;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="script">X</mml:mi>
<mml:mo>:</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">real</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
<label>(6)</label>
</disp-formula>
</p>
<p>Note that <inline-formula id="inf41">
<mml:math id="m47">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mo>&#x22c3;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi mathvariant="script">X</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> holds. Following the setup in <xref ref-type="bibr" rid="B15">Shen et al. (2024)</xref>, this study assumes that noisy labels <inline-formula id="inf42">
<mml:math id="m48">
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula> are randomly assigned to samples <inline-formula id="inf43">
<mml:math id="m49">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> drawn from an independent and identically distributed (i.i.d.) process, which is a reasonable assumption in practical data collection settings. For any class <inline-formula id="inf44">
<mml:math id="m50">
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, let <inline-formula id="inf45">
<mml:math id="m51">
<mml:mrow>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mo stretchy="false">&#x2223;</mml:mo>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> denote the conditional probability density function of <inline-formula id="inf46">
<mml:math id="m52">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> given the noisy label <inline-formula id="inf47">
<mml:math id="m53">
<mml:mrow>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. This study makes the following assumption:<disp-formula id="e7">
<mml:math id="m54">
<mml:mrow>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mo stretchy="false">&#x2223;</mml:mo>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>k</mml:mi>
<mml:mo>,</mml:mo>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3e;</mml:mo>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mo stretchy="false">&#x2223;</mml:mo>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>k</mml:mi>
<mml:mo>,</mml:mo>
<mml:mtext>&#x2009;</mml:mtext>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mo>&#x2209;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
<label>(7)</label>
</disp-formula>
</p>
<p>This assumption states that, within the noisy dataset, the density of inputs <inline-formula id="inf48">
<mml:math id="m55">
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> that are correctly labeled is greater than that of inputs incorrectly labeled. Such an assumption is practically reasonable, as label noise in real-world datasets typically arises from measurement inaccuracies or labeling errors, yet correctly labeled data should still form the majority. Moreover, this condition is rather weak, as it merely requires that the correct-label density be marginally greater than the incorrect-label density.</p>
</sec>
<sec id="s2-3-2">
<title>2.3.2 Kernel-based data cleaning</title>
<p>Note that the normal dataset <inline-formula id="inf49">
<mml:math id="m56">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">norm</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and the noisy dataset <inline-formula id="inf50">
<mml:math id="m57">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">noisy</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> share a common component, namely, the set of sensor readings <inline-formula id="inf51">
<mml:math id="m58">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">data</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2254;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mfenced open="{" close="}">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>. Furthermore, <inline-formula id="inf52">
<mml:math id="m59">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">data</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> can be partitioned into <inline-formula id="inf53">
<mml:math id="m60">
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> disjoint subsets as follows:<disp-formula id="e8">
<mml:math id="m61">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="script">X</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">data</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2254;</mml:mo>
<mml:mfenced open="{" close="}">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">data</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x2223;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
<mml:mspace width="1em"/>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>c</mml:mi>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
<label>(8)</label>
</disp-formula>
</p>
<p>Consequently, the dataset <inline-formula id="inf54">
<mml:math id="m62">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">noisy</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> can also be partitioned into <inline-formula id="inf55">
<mml:math id="m63">
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> disjoint subsets as follows:<disp-formula id="e9">
<mml:math id="m64">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">noisy</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2254;</mml:mo>
<mml:mfenced open="{" close="}">
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">noisy</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x2223;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
<mml:mspace width="1em"/>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>c</mml:mi>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
<label>(9)</label>
</disp-formula>
</p>
<p>Specifically, <inline-formula id="inf56">
<mml:math id="m65">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mspace width="0.3333em"/>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is used to represent a point in <inline-formula id="inf57">
<mml:math id="m66">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="script">X</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">data</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> with <inline-formula id="inf58">
<mml:math id="m67">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> as the data point number of <inline-formula id="inf59">
<mml:math id="m68">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="script">X</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">data</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. Each set <inline-formula id="inf60">
<mml:math id="m69">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="script">X</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">data</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, for <inline-formula id="inf61">
<mml:math id="m70">
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, is assumed to be independently and identically drawn from a probability distribution with an unknown density function <inline-formula id="inf62">
<mml:math id="m71">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. Kernel density estimation (KDE) is employed to estimate this density <inline-formula id="inf63">
<mml:math id="m72">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> based on the dataset <inline-formula id="inf64">
<mml:math id="m73">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="script">X</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">data</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. Let <inline-formula id="inf65">
<mml:math id="m74">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">kde</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> denote the kernel density estimator computed from <inline-formula id="inf66">
<mml:math id="m75">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="script">X</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">data</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, defined by<disp-formula id="e10">
<mml:math id="m76">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">kde</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:munderover>
</mml:mstyle>
<mml:mi mathvariant="sans-serif">K</mml:mi>
<mml:mi mathvariant="sans-serif">e</mml:mi>
<mml:mi mathvariant="sans-serif">r</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
<mml:mspace width="0.3333em"/>
<mml:mo>&#x2200;</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mspace width="0.3333em"/>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="script">X</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">data</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(10)</label>
</disp-formula>where <inline-formula id="inf67">
<mml:math id="m77">
<mml:mrow>
<mml:mi mathvariant="sans-serif">K</mml:mi>
<mml:mi mathvariant="sans-serif">e</mml:mi>
<mml:mi mathvariant="sans-serif">r</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is a kernel function and <inline-formula id="inf68">
<mml:math id="m78">
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is a smoothing parameter known as the bandwidth.</p>
<p>The bandwidth parameter <inline-formula id="inf69">
<mml:math id="m79">
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> in the kernel density estimator was selected using Silverman&#x2019;s rule of thumb, which is a widely adopted, data-driven method for kernel bandwidth selection. Specifically, we used<disp-formula id="e11">
<mml:math id="m80">
<mml:mrow>
<mml:mi>h</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1.06</mml:mn>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>/</mml:mo>
<mml:mn>5</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(11)</label>
</disp-formula>where <inline-formula id="inf70">
<mml:math id="m81">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the standard deviation of the observed feature samples. Various kernel functions are commonly used, including uniform, triangular, biweight, triweight, Epanechnikov (parabolic), normal, and others. Owing to its desirable mathematical properties, the normal kernel is frequently adopted, with the kernel function given by the standard normal density:<disp-formula id="e12">
<mml:math id="m82">
<mml:mrow>
<mml:mi mathvariant="sans-serif">K</mml:mi>
<mml:mi mathvariant="sans-serif">e</mml:mi>
<mml:mi mathvariant="sans-serif">r</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
<mml:msqrt>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mi>&#x3c0;</mml:mi>
</mml:mrow>
</mml:msqrt>
</mml:mrow>
</mml:mfrac>
<mml:mi>exp</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:msup>
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
<mml:msup>
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(12)</label>
</disp-formula>where <inline-formula id="inf71">
<mml:math id="m83">
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
<mml:mo>&#x3e;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> denotes the standard deviation. In kernel-based data cleaning, the kernel density estimate <inline-formula id="inf72">
<mml:math id="m84">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">kde</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is used to determine whether each data point should be retained or discarded from the training set. Let <inline-formula id="inf73">
<mml:math id="m85">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">th</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> denote a density threshold for <inline-formula id="inf74">
<mml:math id="m86">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">kde</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. Then, the rectified dataset <inline-formula id="inf75">
<mml:math id="m87">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">rect</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is defined as follows:<disp-formula id="e13">
<mml:math id="m88">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">rect</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2254;</mml:mo>
<mml:mfenced open="{" close="}">
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">noisy</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x2223;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>k</mml:mi>
<mml:mo>,</mml:mo>
<mml:mspace width="0.3333em"/>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">kde</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3e;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">th</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
<label>(13)</label>
</disp-formula>
</p>
<p>For any given density threshold <inline-formula id="inf76">
<mml:math id="m89">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">th</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, the corresponding empirical outlier ratio is defined as<disp-formula id="e14">
<mml:math id="m90">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">out</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">th</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x2254;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">out</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">ths</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>/</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(14)</label>
</disp-formula>where <inline-formula id="inf77">
<mml:math id="m91">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">out</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">th</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is the number of samples in <inline-formula id="inf78">
<mml:math id="m92">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="script">X</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">data</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> whose estimated density is below <inline-formula id="inf79">
<mml:math id="m93">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">th</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. Note that <inline-formula id="inf80">
<mml:math id="m94">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">rect</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> satisfies the property <inline-formula id="inf81">
<mml:math id="m95">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">rect</mml:mi>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2229;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">rect</mml:mi>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x2205;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> if <inline-formula id="inf82">
<mml:math id="m96">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2260;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, since the dataset is partitioned according to <inline-formula id="inf83">
<mml:math id="m97">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. The complete rectified dataset is then defined by<disp-formula id="e15">
<mml:math id="m98">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">rect</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:munderover accentunder="false" accent="true">
<mml:mrow>
<mml:mo>&#x22c3;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">rect</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
<label>(15)</label>
</disp-formula>
</p>
<p>This paper adopts the following binary search procedure to determine the density threshold <inline-formula id="inf84">
<mml:math id="m99">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">th</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>:<list list-type="simple">
<list-item>
<p>
<inline-formula id="inf85">
<mml:math id="m100">
<mml:mrow>
<mml:mo>&#x2022;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> Set a discarding ratio <inline-formula id="inf86">
<mml:math id="m101">
<mml:mrow>
<mml:mi>&#x3b4;</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mn>0,1</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. Specifically, a proportion of <inline-formula id="inf87">
<mml:math id="m102">
<mml:mrow>
<mml:mi>&#x3b4;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> in each <inline-formula id="inf88">
<mml:math id="m103">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">noisy</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, for <inline-formula id="inf89">
<mml:math id="m104">
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, should be discarded.</p>
</list-item>
<list-item>
<p>
<inline-formula id="inf90">
<mml:math id="m105">
<mml:mrow>
<mml:mo>&#x2022;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> Initialize <inline-formula id="inf91">
<mml:math id="m106">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">th</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">min</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf92">
<mml:math id="m107">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">th</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">max</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> such that</p>
</list-item>
</list>
<disp-formula id="e16">
<mml:math id="m108">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">out</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">th</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">min</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3c;</mml:mo>
<mml:mi>&#x3b4;</mml:mi>
<mml:mo>&#x3c;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">out</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">th</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">max</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(16)</label>
</disp-formula>
<list list-type="simple">
<list-item>
<p>
<inline-formula id="inf93">
<mml:math id="m109">
<mml:mrow>
<mml:mo>&#x2022;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> Iteratively update the midpoint</p>
</list-item>
</list>
<disp-formula id="e17">
<mml:math id="m110">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">th</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">mid</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2254;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">th</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">min</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2b;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">th</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">max</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>/</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:math>
<label>(17)</label>
</disp-formula>and evaluate <inline-formula id="inf94">
<mml:math id="m111">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">out</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">th</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">mid</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>;<list list-type="simple">
<list-item>
<p>
<inline-formula id="inf95">
<mml:math id="m112">
<mml:mrow>
<mml:mo>&#x2022;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> If <inline-formula id="inf96">
<mml:math id="m113">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">out</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">th</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">mid</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3e;</mml:mo>
<mml:mi>&#x3b4;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, update <inline-formula id="inf97">
<mml:math id="m114">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">th</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">max</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2254;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">th</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">mid</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>; otherwise, set <inline-formula id="inf98">
<mml:math id="m115">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">th</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">min</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2254;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">th</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">mid</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</list-item>
</list>
</p>
<p>After a fixed number of iterations, the binary search converges to a threshold <inline-formula id="inf99">
<mml:math id="m116">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">th</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b4;</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> such that <inline-formula id="inf100">
<mml:math id="m117">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">out</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">th</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b4;</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x2248;</mml:mo>
<mml:mi>&#x3b4;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. It is worth noting that the above algorithm achieves effective data cleaning performance comparable to the method presented in <xref ref-type="bibr" rid="B15">Shen et al. (2024)</xref>, while offering significantly greater computational efficiency. The key mechanism by which the proposed method enhances robustness against label noise lies in its use of kernel density estimation to identify and retain data points located in regions of high data density. Intuitively, in high-density regions of the feature space, the probability of encountering incorrectly labeled samples is relatively low, as these regions are well-supported by the true data distribution corresponding to each class. Conversely, mislabeled or noisy samples are more likely to appear in low-density regions, where the overlap between classes or inconsistencies in the labeling process are more prevalent. By explicitly discarding samples whose estimated density falls below a carefully selected threshold, the proposed method effectively filters out a significant proportion of potential label noise, while preserving the core structure of each class in the training data. This selective data retention substantially reduces the risk of overfitting to noisy labels and improves the generalization ability of the resulting classifier&#x2014;an important property for safety-critical applications such as fault detection.</p>
</sec>
</sec>
<sec id="s2-4">
<title>2.4 Robust fault detection algorithm</title>
<p>We summarize the method in the way of giving the algorithm in this subsection. To mitigate the adverse impact of label noise and enhance the reliability of fault diagnosis, we propose a robust classification framework that incorporates a data rectification step prior to model training. The core idea is to leverage kernel density estimation (KDE) (<xref ref-type="bibr" rid="B2">Botev et al., 2010</xref>) to identify and discard samples likely to be mislabeled, based on the observation that true labeled data tends to concentrate in high-density regions of the feature space. The procedure of Robust Fault Detection Algorithm is summarized in <xref ref-type="statement" rid="Algorithm_1">Algorithm 1</xref>.</p>
<p>
<statement content-type="algorithm" id="Algorithm_1">
<label>Algorithm 1</label>
<p>Robust Fault Detection Algorithm.<list list-type="simple">
<list-item>
<p>
<bold>Require:</bold> Noisy labeled dataset <inline-formula id="inf101">
<mml:math id="m118">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">noisy</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>; &#x2003;&#x2003;desired discarding ratio <inline-formula id="inf102">
<mml:math id="m119">
<mml:mrow>
<mml:mi>&#x3b4;</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mn>0,1</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>; kernel function &#x2003;&#x2003;<inline-formula id="inf103">
<mml:math id="m120">
<mml:mrow>
<mml:mi mathvariant="sans-serif">K</mml:mi>
<mml:mi mathvariant="sans-serif">e</mml:mi>
<mml:mi mathvariant="sans-serif">r</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>; bandwidth <inline-formula id="inf104">
<mml:math id="m121">
<mml:mrow>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</list-item>
<list-item>
<p>
<bold>Ensure</bold> Trained robust classifier <inline-formula id="inf105">
<mml:math id="m122">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</list-item>
<list-item>
<p>1:&#x2003;Partition <inline-formula id="inf106">
<mml:math id="m123">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">noisy</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> into class-wise subsets: <inline-formula id="inf107">
<mml:math id="m124">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="script">X</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">data</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;<inline-formula id="inf1107">
<mml:math id="m1124">
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x2223;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> for <inline-formula id="inf108">
<mml:math id="m125">
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</list-item>
<list-item>
<p>2:&#x2003;<bold>for</bold> each class <inline-formula id="inf109">
<mml:math id="m126">
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> to <inline-formula id="inf110">
<mml:math id="m127">
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> <bold>do</bold>
</p>
</list-item>
<list-item>
<p>3:&#x2003;&#x2003;Estimate density <inline-formula id="inf111">
<mml:math id="m128">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">kde</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> via KDE on &#x2003;&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;<inline-formula id="inf112">
<mml:math id="m129">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="script">X</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">data</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> using <xref ref-type="disp-formula" rid="e10">Equation 10</xref>.</p>
</list-item>
<list-item>
<p>4:&#x2003;&#x2003;Initialize <inline-formula id="inf113">
<mml:math id="m130">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">th</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">min</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf114">
<mml:math id="m131">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">th</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">max</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> such that <inline-formula id="inf115">
<mml:math id="m132">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">out</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">th</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">min</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3c;</mml:mo>
<mml:mi>&#x3b4;</mml:mi>
<mml:mo>&#x3c;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;<inline-formula id="inf1115">
<mml:math id="m1132">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">out</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">th</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">max</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</list-item>
<list-item>
<p>5:&#x2003;&#x2003;<bold>while</bold> stopping criterion not met <bold>do</bold>
</p>
</list-item>
<list-item>
<p>6:&#x2003;&#x2003;&#x2003;Compute midpoint: <inline-formula id="inf116">
<mml:math id="m133">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">th</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">mid</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2254;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">th</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">min</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2b;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">th</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">max</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>/</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</list-item>
<list-item>
<p>7:&#x2003;&#x2003;&#x2003;Evaluate <inline-formula id="inf117">
<mml:math id="m134">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">out</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">th</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">mid</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</list-item>
<list-item>
<p>8:&#x2003;&#x2003;&#x2003;<bold>if</bold> <inline-formula id="inf118">
<mml:math id="m135">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">out</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">th</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">mid</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3e;</mml:mo>
<mml:mi>&#x3b4;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> <bold>then</bold>
</p>
</list-item>
<list-item>
<p>9:&#x2003;&#x2003;&#x2003;&#x2003;Update <inline-formula id="inf119">
<mml:math id="m136">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">th</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">max</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2254;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">th</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">mid</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</list-item>
<list-item>
<p>10:&#x2003;&#x2003;&#x2003;<bold>else</bold>
</p>
</list-item>
<list-item>
<p>11:&#x2003;&#x2003;&#x2003;&#x2003;Update <inline-formula id="inf120">
<mml:math id="m137">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">th</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">min</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2254;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">th</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">mid</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</list-item>
<list-item>
<p>12:&#x2003;&#x2003;&#x2003;<bold>end if</bold>
</p>
</list-item>
<list-item>
<p>13:&#x2003;&#x2003;<bold>end while</bold>
</p>
</list-item>
<list-item>
<p>14:&#x2003;&#x2003;Construct rectified dataset for class <inline-formula id="inf121">
<mml:math id="m138">
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>: &#x2003;&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;<inline-formula id="inf122">
<mml:math id="m139">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">rect</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2254;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">noisy</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">&#x2223;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mo>&#x303;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>k</mml:mi>
<mml:mo>,</mml:mo>
<mml:mspace width="0.3333em"/>
<mml:msub>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">&#x302;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">kde</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x3e;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mover accent="true">
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mo>&#x304;</mml:mo>
</mml:mover>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">th</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b4;</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>15:&#x2003;<bold>end for</bold>
</p>
</list-item>
<list-item>
<p>16:&#x2003;Aggregate rectified dataset: &#x2003;&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;<inline-formula id="inf123">
<mml:math id="m140">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">rect</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:munderover accentunder="false" accent="true">
<mml:mrow>
<mml:mo>&#x22c3;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">rect</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>17:&#x2003;Train classifier <inline-formula id="inf124">
<mml:math id="m141">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> on <inline-formula id="inf125">
<mml:math id="m142">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">rect</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> by solving &#x2003;&#x2003;&#x2003;&#x2003;&#x2003;&#x2003;optimization problem <xref ref-type="disp-formula" rid="e5">Equation 5</xref>.</p>
</list-item>
<list-item>
<p>18:&#x2003;<bold>return</bold> Trained robust classifier <inline-formula id="inf126">
<mml:math id="m143">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>.</mml:mo>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mspace width="-0.2em"/>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
</list>
</p>
</statement>
</p>
</sec>
</sec>
<sec sec-type="results" id="s3">
<title>3 Results</title>
<sec id="s3-1">
<title>3.1 Validation scenario and data acquisition</title>
<sec id="s3-1-1">
<title>3.1.1 Dataset for LIB battery</title>
<p>Voltage data for both healthy and faulty conditions were collected from simulation models developed in the MATLAB/Simulink environment. The simulation utilizes the LIB battery pack system designed for two-wheel electric vehicles, operating under both normal and fault-induced driving conditions. Data were acquired from voltage sensors installed within the battery pack, capturing system behavior under various scenarios. Faulty conditions were simulated by introducing short-circuit, overcharge, and over-discharge faults using the thermal resistive fault block. These faults were triggered at 0.2 s under different resistive load settings. The collected dataset includes multiple parameters such as state of charge (SOC), temperature, voltage, and current. In this study, only voltage data from both normal and faulty conditions were used, with the objective of contributing to the prevention of fire hazards in lithium-ion battery systems.</p>
</sec>
<sec id="s3-1-2">
<title>3.1.2 Fault detections in transformer windings</title>
<p>In addition to the lithium-ion battery dataset, a second dataset was considered for evaluating fault detection in transformer windings. This dataset was acquired via Oscillating Wave Testing (OWT), a non-invasive diagnostic technique that captures high-voltage oscillation signals to characterize winding deformations (<xref ref-type="bibr" rid="B19">Wu et al., 2020</xref>). This dataset originates from a 10 kV transformer winding fault simulation platform, where four types of winding faults&#x2014;axial displacement, local buckling, inter-disc short circuit, and inter-turn short circuit were systematically considered. Each fault scenario was labeled based on the known fault type and its severity, as defined during the experimental setup, and repeated under controlled conditions to ensure labeling consistency. The resulting classification dataset includes labeled oscillation wave measurements for these four fault types as well as healthy conditions, enabling evaluation of the proposed robust classification framework in more applications in energy storage systems.</p>
</sec>
<sec id="s3-1-3">
<title>3.1.3 Methods for label noise</title>
<p>To evaluate the robustness of the proposed method under realistic noise conditions, we consider the following label noise generation strategies:<list list-type="simple">
<list-item>
<p>
<inline-formula id="inf127">
<mml:math id="m144">
<mml:mrow>
<mml:mo>&#x2022;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> (Symm.) Symmetric noise: Label noise is generated according to the symmetric noise model described in <xref ref-type="bibr" rid="B12">Patrini et al. (2017)</xref>, where each label is flipped uniformly at random to any other class with a specified noise rate.</p>
</list-item>
<list-item>
<p>
<inline-formula id="inf128">
<mml:math id="m145">
<mml:mrow>
<mml:mo>&#x2022;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> (Pair.) Pair flipping noise: Label noise is generated according to the pair flipping model described in <xref ref-type="bibr" rid="B8">Han et al. (2018b)</xref>, where labels are flipped to a single specific incorrect class (typically the next class) with a given noise probability.</p>
</list-item>
<list-item>
<p>
<inline-formula id="inf129">
<mml:math id="m146">
<mml:mrow>
<mml:mo>&#x2022;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> (Rand.) Random noise: Label noise is generated by sampling from a Dirichlet distribution and combining the resulting label confusion matrix with the identity matrix to achieve a target noise rate. This allows for flexible and realistic noise patterns.</p>
</list-item>
</list>
</p>
<p>The above three types comprehensively represent a range of practically relevant label noise patterns in electrochemical energy storage systems&#x2019; fault diagnosis. In this study, we consider noise rates of <inline-formula id="inf130">
<mml:math id="m147">
<mml:mrow>
<mml:mn>15</mml:mn>
<mml:mi>%</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf131">
<mml:math id="m148">
<mml:mrow>
<mml:mn>45</mml:mn>
<mml:mi>%</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> to examine the performance of the proposed method under both moderate and severe label noise scenarios.</p>
</sec>
</sec>
<sec id="s3-2">
<title>3.2 Benchmark algorithms</title>
<p>To evaluate the effectiveness of the proposed robust battery fault detection algorithm, we compare its performance with several baseline and benchmark methods. In particular, we systematically examine how different levels of kernel-based data cleaning affect the performance of two representative classifiers: Support Vector Machine (SVM) and Extreme Learning Machine (ELM).</p>
<p>The following benchmark algorithms are considered:<list list-type="simple">
<list-item>
<p>
<inline-formula id="inf132">
<mml:math id="m149">
<mml:mrow>
<mml:mo>&#x2022;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> ELM-D: Extreme Learning Machine (ELM) classifier trained directly on the noisy dataset <inline-formula id="inf133">
<mml:math id="m150">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">noisy</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> without data cleaning.</p>
</list-item>
<list-item>
<p>
<inline-formula id="inf134">
<mml:math id="m151">
<mml:mrow>
<mml:mo>&#x2022;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> ELM-C-10%, ELM-C-20%, ELM-C-30%, ELM-C-40%, ELM-C-50%, ELM-C-60%: ELM classifiers trained on the rectified datasets in which 10%, 20%, 30%, 40%, 50%, and 60% of low-density data points are discarded using the proposed kernel-based rectification method.</p>
</list-item>
<list-item>
<p>
<inline-formula id="inf135">
<mml:math id="m152">
<mml:mrow>
<mml:mo>&#x2022;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> SVM-D: Support Vector Machine (SVM) classifier trained directly on the noisy dataset <inline-formula id="inf136">
<mml:math id="m153">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">noisy</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> without data cleaning.</p>
</list-item>
<list-item>
<p>
<inline-formula id="inf137">
<mml:math id="m154">
<mml:mrow>
<mml:mo>&#x2022;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> SVM-C-10%, SVM-C-20%, SVM-C-30%, SVM-C-40%, SVM-C-50%, SVM-C-60%: SVM classifiers trained on the rectified datasets in which 10%, 20%, 30%, 40%, 50% and 60% of low-density data points are discarded using the proposed kernel-based rectification method.</p>
</list-item>
</list>
</p>
<p>This experimental design enables a comprehensive analysis of the robustness and accuracy gains provided by the proposed data rectification framework across different classification models and varying levels of data cleaning. By comparing the <italic>Direct</italic> and <italic>Clean</italic> variants of both ELM and SVM, we can clearly assess the practical benefits of incorporating the rectification step into the battery fault diagnosis pipeline.</p>
</sec>
<sec id="s3-3">
<title>3.3 Performance metric</title>
<p>This section provides an overview of the performance metric used to evaluate the effectiveness of the proposed model. The selected evaluation metric is the classification accuracy, defined as:<disp-formula id="e18">
<mml:math id="m155">
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
<mml:mo>&#x2254;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:munderover>
</mml:mstyle>
<mml:msub>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">acc</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>/</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:munderover>
</mml:mstyle>
<mml:msub>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">all</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
<label>(18)</label>
</disp-formula>where <inline-formula id="inf138">
<mml:math id="m156">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">acc</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> denotes the number of correctly classified test samples in class <inline-formula id="inf139">
<mml:math id="m157">
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula id="inf140">
<mml:math id="m158">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="sans-serif">all</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> denotes the total number of test samples in class <inline-formula id="inf141">
<mml:math id="m159">
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</sec>
<sec id="s3-4">
<title>3.4 Validation results of LIB battery fault detection</title>
<p>
<xref ref-type="fig" rid="F3">Figures 3</xref>, <xref ref-type="fig" rid="F4">4</xref> present the classification accuracy results of ELM and SVM models trained either directly on the noisy dataset or on the rectified dataset obtained using different discarding ratios <inline-formula id="inf142">
<mml:math id="m160">
<mml:mrow>
<mml:mi>&#x3b4;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. As shown in <xref ref-type="fig" rid="F3">Figures 3</xref>, <xref ref-type="fig" rid="F4">4</xref>, both ELM and SVM classifiers exhibit significantly degraded accuracy when trained directly on the noisy dataset, highlighting the detrimental impact of label noise on model performance. In contrast, the proposed kernel-based rectification method substantially improves classification accuracy across both models and under various noise scenarios, demonstrating its effectiveness in mitigating the influence of noisy labels. An important observation is that the choice of discarding ratio <inline-formula id="inf143">
<mml:math id="m161">
<mml:mrow>
<mml:mi>&#x3b4;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> plays a critical role in achieving optimal performance. In particular, when <inline-formula id="inf144">
<mml:math id="m162">
<mml:mrow>
<mml:mi>&#x3b4;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is set close to the true underlying noise rate (e.g., <inline-formula id="inf145">
<mml:math id="m163">
<mml:mrow>
<mml:mn>15</mml:mn>
<mml:mi>%</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> or <inline-formula id="inf146">
<mml:math id="m164">
<mml:mrow>
<mml:mn>45</mml:mn>
<mml:mi>%</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> in our experiments), the rectification process is able to remove a majority of mislabeled samples while preserving the informative structure of the clean data, thereby leading to superior classification results. It is important to note that in practical applications, the true noise rate is typically unknown. Therefore, developing adaptive strategies to optimize <inline-formula id="inf147">
<mml:math id="m165">
<mml:mrow>
<mml:mi>&#x3b4;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> without requiring prior knowledge of the noise level represents an important direction for future research on robust battery fault detection.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Classification accuracy of ELM. Mean value of 1,000 trials is reported. <bold>(A)</bold> Accuracy at <inline-formula id="inf148">
<mml:math id="m166">
<mml:mrow>
<mml:mn>15</mml:mn>
<mml:mi>%</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> noise rate; <bold>(B)</bold> Accuracy at <inline-formula id="inf149">
<mml:math id="m167">
<mml:mrow>
<mml:mn>45</mml:mn>
<mml:mi>%</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> noise rate; <bold>(C)</bold> Accuracy improvement after data cleaning at <inline-formula id="inf150">
<mml:math id="m168">
<mml:mrow>
<mml:mn>15</mml:mn>
<mml:mi>%</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> noise rate; <bold>(D)</bold> Accuracy improvement after data cleaning at <inline-formula id="inf151">
<mml:math id="m169">
<mml:mrow>
<mml:mn>45</mml:mn>
<mml:mi>%</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> noise rate.</p>
</caption>
<graphic xlink:href="fenrg-13-1647197-g003.tif">
<alt-text content-type="machine-generated">Four graphs comparing accuracy and improvement of different noise types over &#x3B4; values. Graphs (a) and (b) display accuracy for 15% and 45% noise, respectively. Graphs (c) and (d) show improvement at 15% and 45% noise. Three methods, labeled Symm, Pair, and Rand, are plotted with distinct markers and colors. The y-axis represents accuracy or improvement percentage, while the x-axis represents &#x3B4; values.</alt-text>
</graphic>
</fig>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Classification accuracy of SVM. Mean value of 1,000 trials is reported. <bold>(A)</bold> Accuracy at <inline-formula id="inf152">
<mml:math id="m170">
<mml:mrow>
<mml:mn>15</mml:mn>
<mml:mi>%</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> noise rate; <bold>(B)</bold> Accuracy at <inline-formula id="inf153">
<mml:math id="m171">
<mml:mrow>
<mml:mn>45</mml:mn>
<mml:mi>%</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> noise rate; <bold>(C)</bold> Accuracy improvement after data cleaning at <inline-formula id="inf154">
<mml:math id="m172">
<mml:mrow>
<mml:mn>15</mml:mn>
<mml:mi>%</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> noise rate; <bold>(D)</bold> Accuracy improvement after data cleaning at <inline-formula id="inf155">
<mml:math id="m173">
<mml:mrow>
<mml:mn>45</mml:mn>
<mml:mi>%</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> noise rate.</p>
</caption>
<graphic xlink:href="fenrg-13-1647197-g004.tif">
<alt-text content-type="machine-generated">Four graphs depicting accuracy and improvement percentages related to noise levels. Graphs (a) and (b) show accuracy versus parameter delta at 15% and 45% noise levels, respectively, with &#x22;Symm.&#x22;, &#x22;Pair&#x22;, and &#x22;Rand&#x22; series. Graphs (c) and (d) illustrate corresponding improvements for 15% and 45% noise. The &#x22;Rand&#x22; series shows significant improvement compared to &#x22;Symm.&#x22; and &#x22;Pair.&#x22;</alt-text>
</graphic>
</fig>
</sec>
<sec id="s3-5">
<title>3.5 Validation results of transformer winding fault detection</title>
<p>In addition to the battery fault detection experiments, the proposed kernel-based rectification method was validated on transformer winding fault detection tasks, with four representative fault types: axial displacement (AD), local buckling (LB), inter-disc short circuit (IDSC), and inter-turn short circuit (ITSC). <xref ref-type="fig" rid="F5">Figure 5</xref> provides visual evidence of the discriminative oscillating wave signatures used in our fault diagnosis framework. The high-voltage oscillating wave test (OWT) captures these transient responses by applying a damped AC voltage pulse to the transformer winding and recording the resulting oscillation decay profile. These physically interpretable patterns form the basis of the feature vectors processed by our kernel-based rectification framework. The signal preprocessing pipeline, including noise suppression via wavelet thresholding and feature extraction through resonance frequency analysis, follows the methodology established in <xref ref-type="bibr" rid="B20">Wu et al. (2025)</xref>. Consistent with the battery case study, setting the discarding ratio <inline-formula id="inf156">
<mml:math id="m174">
<mml:mrow>
<mml:mi>&#x3b4;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> slightly larger than the true label noise rate led to robust performance improvements, which is practically feasible because conservative estimates of labeling quality are usually available. Here, a severe label noise scenario with a 45% noise rate was evaluated to rigorously test the method. As shown in <xref ref-type="fig" rid="F6">Figure 6</xref>, both ELM and SVM classifiers without rectification (ELM-D, SVM-D) suffered major accuracy drops across all fault types under this high noise condition. In contrast, applying the kernel-based rectification with <inline-formula id="inf157">
<mml:math id="m175">
<mml:mrow>
<mml:mi>&#x3b4;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>50</mml:mn>
<mml:mi>%</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> (ELM-C-50%, SVM-C-50%) recovered high classification accuracy, exceeding 80% in all cases. These results confirm that the rectification approach effectively filters out noisy samples while preserving the core structure of each class, maintaining reliable classification performance consistent with the battery case study. This demonstrates the framework&#x2019;s applicability across electrochemical energy storage systems in severe label noise scenarios.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Representative oscillating wave signals under four fault conditions (axial displacement, local buckling, inter-disc short circuit, inter-turn short circuit).</p>
</caption>
<graphic xlink:href="fenrg-13-1647197-g005.tif">
<alt-text content-type="machine-generated">Line graph showing amplitude in kilovolts (kV) over time in seconds (s), with four methods: AD, LP, IDSC, and ITSC. The AD and LP methods follow similar patterns, while IDSC shows greater fluctuation. ITSC stabilizes more quickly.</alt-text>
</graphic>
</fig>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Classification accuracy for transformer winding fault detection. Mean value of 1,000 trials is reported.</p>
</caption>
<graphic xlink:href="fenrg-13-1647197-g006.tif">
<alt-text content-type="machine-generated">Bar chart comparing accuracy percentages across different methods and conditions: ELM-D, ELM-C-20%, ELM-C-50%, SVM-D, SVM-C-20%, and SVM-C-50% for AD, LB, IDSC, and JTSC. ELM-C-50% generally shows the highest accuracy.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s3-6">
<title>3.6 Computational considerations</title>
<p>In terms of computational cost, the proposed kernel-based rectification framework was implemented in MATLAB on a standard laptop (Intel Core i7 processor), where the average runtime of the KDE-based data cleaning step was measured at approximately 15 ms per dataset partition containing 1,000 samples. Although we have not yet ported the algorithm to an embedded hardware platform, this processing time is well within the capabilities of modern embedded processors, especially considering that fault diagnosis generally operates on time scales of seconds to minutes. This supports our description of the method as computationally lightweight, while acknowledging that future work will further validate its runtime characteristics in actual embedded environments.</p>
</sec>
</sec>
<sec sec-type="discussion" id="s4">
<title>4 Discussion</title>
<p>This study presents a robust fault detection framework for electrochemical energy storage systems, integrating a kernel-based data rectification process into the standard classifier training pipeline. The motivation stems from the observation that real-world fault diagnosis systems often face label noise due to measurement errors, labeling inconsistencies, and the gradual nature of certain fault phenomena. Our method systematically addresses this challenge by discarding data points located in low-density regions of the feature space, where mislabeled samples are more likely to occur. Through comprehensive experiments on simulated lithium-ion battery voltage data as well as transformer winding fault data with synthetic label noise, we demonstrate that both ELM and SVM classifiers trained directly on noisy data suffer from substantial accuracy degradation. In contrast, applying the proposed kernel-based rectification step prior to training significantly improves classification performance across various noise scenarios and classifier types. Our results further indicate that tuning the discarding ratio <inline-formula id="inf158">
<mml:math id="m176">
<mml:mrow>
<mml:mi>&#x3b4;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> to be close to the true underlying noise rate yields the best performance, as it effectively balances noise removal with the preservation of useful information. From an application perspective, this finding is particularly relevant for electrochemical energy storage systems, where ensuring reliable and robust fault diagnosis is critical for operational safety. By improving the generalization capability of classifiers in the presence of label noise, the proposed framework can enhance the reliability of real-time fault monitoring and help mitigate risks such as catastrophic failures or safety hazards. One limitation of the current approach is that selecting an optimal <inline-formula id="inf159">
<mml:math id="m177">
<mml:mrow>
<mml:mi>&#x3b4;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> requires knowledge of the noise rate, which is typically unknown in practical settings. Developing adaptive mechanisms to automatically estimate or tune <inline-formula id="inf160">
<mml:math id="m178">
<mml:mrow>
<mml:mi>&#x3b4;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> during training is an important direction for future work. Moreover, extending the framework to incorporate additional sensor modalities (e.g., temperature, current, vibration signals) and to support online learning scenarios will further broaden its applicability across advanced energy storage systems. Overall, the proposed method provides a computationally efficient, easy-to-integrate, and practically effective solution for enhancing fault diagnosis in noisy real-world environments.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s5">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec sec-type="author-contributions" id="s6">
<title>Author contributions</title>
<p>TH: Conceptualization, Formal Analysis, Investigation, Methodology, Validation, Writing &#x2013; original draft, Writing &#x2013; review and editing. WL: Formal Analysis, Methodology, Validation, Visualization, Writing &#x2013; original draft, Writing &#x2013; review and editing. XW: Formal Analysis, Validation, Writing &#x2013; original draft, Writing &#x2013; review and editing. YW: Validation, Writing &#x2013; original draft, Writing &#x2013; review and editing.</p>
</sec>
<sec sec-type="funding-information" id="s7">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research and/or publication of this article. This work was supported by the State Grid Company Ltd. Science and Technology Program under Grant SGAHMA00YJJS2400635.</p>
</sec>
<sec sec-type="COI-statement" id="s8">
<title>Conflict of interest</title>
<p> Authors TH, XW, and YW were employed by State Grid Anhui Electric Power Co., Ltd., Ma&#x2019;anshan Power Supply Company.</p>
<p>The remaining author declares that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
<p>The authors declare that this study received funding from State Grid Company Ltd.. The funder had the following involvement in the study: data collection and analysis.</p>
</sec>
<sec sec-type="ai-statement" id="s9">
<title>Generative AI statement</title>
<p>The author(s) declare that no Generative AI was used in the creation of this manuscript.</p>
</sec>
<sec sec-type="disclaimer" id="s10">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<fn-group>
<fn id="fn1">
<label>1</label>
<p>
<inline-formula id="inf161">
<mml:math id="m179">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> may represent a support vector machine, polynomial function, deep neural network, or another model class.</p>
</fn>
</fn-group>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Abdolrasol</surname>
<given-names>M. G. M.</given-names>
</name>
<name>
<surname>Ayob</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Lipu</surname>
<given-names>M. S. H.</given-names>
</name>
<name>
<surname>Ansari</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Kiong</surname>
<given-names>T. S.</given-names>
</name>
<name>
<surname>Saad</surname>
<given-names>M. H. M.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <article-title>Advanced data-driven fault diagnosis in lithium-ion battery management systems for electric vehicles: progress, challenges, and future perspectives</article-title>. <source>eTransportation</source> <volume>22</volume>, <fpage>100374</fpage>. <pub-id pub-id-type="doi">10.1016/j.etran.2024.100374</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Botev</surname>
<given-names>Z. I.</given-names>
</name>
<name>
<surname>Grotowski</surname>
<given-names>J. F.</given-names>
</name>
<name>
<surname>Kroese</surname>
<given-names>D. P.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Kernel density estimation via diffusion</article-title>. <source>Ann. Statistics</source> <volume>38</volume>, <fpage>2916</fpage>&#x2013;<lpage>2957</lpage>. <pub-id pub-id-type="doi">10.1214/10-AOS799</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Deng</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Yan</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Early fault diagnosis of transformer winding based on leakage magnetic field and dsan learning method</article-title>. <source>Front. Energy Res.</source> <volume>10</volume>, <fpage>1058378</fpage>. <pub-id pub-id-type="doi">10.3389/fenrg.2022.1058378</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fan</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Yuan</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Yan</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2025</year>). <article-title>Fault detection for li-ion batteries of electric vehicles with feature-augmented attentional autoencoder</article-title>. <source>Sci. Rep.</source> <volume>15</volume>, <fpage>18534</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-025-03227-w</pub-id>
<pub-id pub-id-type="pmid">40425701</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Goldberger</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Ben-Reuven</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Training deep neural networks using a noise adaption layer</article-title>,&#x201d; in <conf-name>Proceedings International Conference on Learning Representations</conf-name>.</citation>
</ref>
<ref id="B6">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Goodfellow</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Bengio</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Courville</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Bengio</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2016</year>). <source>Deep learning</source>. <publisher-loc>Cambridge, Massachusetts</publisher-loc>: <publisher-name>MIT press Cambridge</publisher-name>.</citation>
</ref>
<ref id="B7">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Han</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Yao</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Niu</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Tsang</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2018a</year>). &#x201c;<article-title>Masking: a new perspective of noisy supervision</article-title>,&#x201d; in <source>Advances in neural information processing systems</source>, <fpage>5835</fpage>&#x2013;<lpage>5846</lpage>.</citation>
</ref>
<ref id="B8">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Han</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Yao</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Niu</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>W.</given-names>
</name>
<etal/>
</person-group> (<year>2018b</year>). &#x201c;<article-title>Co-teaching: robust training of deep neural networks with extremely noisy labels</article-title>,&#x201d; in <source>Advances in neural information processing systems</source>, <fpage>8527</fpage>&#x2013;<lpage>8537</lpage>.</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hasan</surname>
<given-names>M. M.</given-names>
</name>
<name>
<surname>Haque</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Jahirul</surname>
<given-names>M. I.</given-names>
</name>
<name>
<surname>Rasul</surname>
<given-names>M. G.</given-names>
</name>
<name>
<surname>Fattah</surname>
<given-names>I. M. R.</given-names>
</name>
<name>
<surname>Hassan</surname>
<given-names>N. M. S.</given-names>
</name>
<etal/>
</person-group> (<year>2025</year>). <article-title>Advancing energy storage: the future trajectory of lithium-ion battery technologies</article-title>. <source>J. Energy Storage</source> <volume>120</volume>, <fpage>116511</fpage>. <pub-id pub-id-type="doi">10.1016/j.est.2025.116511</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hong</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Jin</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Transformer winding fault diagnosis using vibration image and deep learning</article-title>. <source>IEEE Trans. Power Deliv.</source> <volume>36</volume>, <fpage>676</fpage>&#x2013;<lpage>685</lpage>. <pub-id pub-id-type="doi">10.1109/TPWRD.2020.2988820</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kouhestani</surname>
<given-names>H. S.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Chandra</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Data-driven prognosis of failure detection and prediction of lithium-ion batteries</article-title>. <source>J. Energy Storage</source> <volume>70</volume>, <fpage>108045</fpage>. <pub-id pub-id-type="doi">10.1016/j.est.2023.108045</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Patrini</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Rozza</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Menon</surname>
<given-names>A. K.</given-names>
</name>
<name>
<surname>Nock</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Qu</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<article-title>Making deep neural networks robust to label noise: a loss correction approach</article-title>,&#x201d; in <conf-name>Proceedings IEEE Conference on Computer Vision and Pattern Recognition</conf-name>, <fpage>1944</fpage>&#x2013;<lpage>1952</lpage>.</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pei</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Bao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Fault diagnosis of transformer winding short circuit based on wkpca-wm and ipoa-cnn</article-title>. <source>Front. Energy Res.</source> <volume>11</volume>, <fpage>1151612</fpage>. <pub-id pub-id-type="doi">10.3389/fenrg.2023.1151612</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>&#x15e;en</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>&#xd6;zcan</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Eker</surname>
<given-names>Y. R.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>A review on the lithium-ion battery problems used in electric vehicles</article-title>. <source>Next Sustain.</source> <volume>3</volume>, <fpage>100036</fpage>. <pub-id pub-id-type="doi">10.1016/j.nxsust.2024.100036</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shen</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Ouyang</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Chance-constrained abnormal data cleaning for robust classification with noisy labels</article-title>. <source>IEEE Trans. Emerg. Top. Comput. Intell.</source>, <fpage>1</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1109/tetci.2024.3375518</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tahir</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Tenbohlen</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Transformer winding fault classification and condition assessment based on random forest using fra</article-title>. <source>Energies</source> <volume>16</volume>, <fpage>3714</fpage>. <pub-id pub-id-type="doi">10.3390/en16093714</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wali</surname>
<given-names>S. B.</given-names>
</name>
<name>
<surname>Hannan</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Ker</surname>
<given-names>P. J.</given-names>
</name>
<name>
<surname>Rahman</surname>
<given-names>S. A.</given-names>
</name>
<name>
<surname>Le</surname>
<given-names>K. N.</given-names>
</name>
<name>
<surname>Begum</surname>
<given-names>R. A.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <article-title>Grid-connected lithium-ion battery energy storage system towards sustainable energy: a patent landscape analysis and technology updates</article-title>. <source>J. Energy Storage</source> <volume>77</volume>, <fpage>109986</fpage>. <pub-id pub-id-type="doi">10.1016/j.est.2023.109986</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Qiu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Xie</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Diagnosing fault types and degrees of transformer winding combining fra method with soa-kelm</article-title>. <source>IEEE Access</source> <volume>12</volume>, <fpage>50287</fpage>&#x2013;<lpage>50299</lpage>. <pub-id pub-id-type="doi">10.1109/access.2024.3385229</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>A new testing method for the diagnosis of winding faults in transformer</article-title>. <source>IEEE Trans. Instrum. Meas.</source> <volume>69</volume>, <fpage>9203</fpage>&#x2013;<lpage>9214</lpage>. <pub-id pub-id-type="doi">10.1109/tim.2020.2998877</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Tao</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Detection of structure deformation and insulation condition for transformer windings based on high-voltage oscillating wave</article-title>. <source>IEEE Trans. Instrum. Meas.</source> <volume>74</volume>, <fpage>1</fpage>&#x2013;<lpage>12</lpage>. <pub-id pub-id-type="doi">10.1109/tim.2025.3545720</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Yao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Gong</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Deng</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Niu</surname>
<given-names>G.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Dual t: reducing estimation error for transition matrix in label-noise learning</article-title>. in <source>Advances in neural information processing systems</source>.</citation>
</ref>
<ref id="B22">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Niu</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Sugiyama</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Learning noise transition matrix from only noisy labels via total variation regularization</article-title>,&#x201d; in <conf-name>Proceedings International Conference on Machine Learning</conf-name>.</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zubi</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Dufo-L&#xf3;pez</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Carvalho</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Pasaoglu</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>The lithium-ion battery: state of the art and future perspectives</article-title>. <source>Renew. Sustain. Energy Rev.</source> <volume>89</volume>, <fpage>292</fpage>&#x2013;<lpage>308</lpage>. <pub-id pub-id-type="doi">10.1016/j.rser.2018.03.002</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>