<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article article-type="research-article" dtd-version="1.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Energy Res.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Energy Research</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Energy Res.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2296-598X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1635112</article-id>
<article-id pub-id-type="doi">10.3389/fenrg.2026.1635112</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Identification of unknown crack defects in wind turbine main shafts based on acoustic signature and multi-scale convolutional neural networks</article-title>
<alt-title alt-title-type="left-running-head">Zheng et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fenrg.2026.1635112">10.3389/fenrg.2026.1635112</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Zheng</surname>
<given-names>Liuyu</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing - original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Liu</surname>
<given-names>Fabing</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/3079318"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing - original draft</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zuo</surname>
<given-names>Shihai</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing - original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhu</surname>
<given-names>Xuefeng</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal Analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing - original draft</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Huang</surname>
<given-names>Guoyong</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing - original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
</contrib>
</contrib-group>
<aff id="aff1">
<label>1</label>
<institution>CGN New Energy Investment (Shenzhen) Co., Ltd., Yunnan Branch</institution>, <city>Kunming</city>, <country country="CN">China</country>
</aff>
<aff id="aff2">
<label>2</label>
<institution>Faculty of Civil Aviation and Aeronautics, Kunming University of Science and Technology</institution>, <city>Kunming</city>, <country country="CN">China</country>
</aff>
<author-notes>
<corresp id="c001">
<label>&#x2a;</label>Correspondence: Fabing Liu, <email xlink:href="mailto:1934760845@qq.com">1934760845@qq.com</email>
</corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-04">
<day>04</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>14</volume>
<elocation-id>1635112</elocation-id>
<history>
<date date-type="received">
<day>26</day>
<month>05</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>28</day>
<month>11</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>09</day>
<month>01</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2026 Zheng, Liu, Zuo, Zhu and Huang.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Zheng, Liu, Zuo, Zhu and Huang</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-04">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>Wind turbine main shaft crack detection is crucial for operational safety and maintenance planning. Conventional feature based diagnosis generalizes poorly to complex or unseen cracks, and deep learning is constrained by scarce and imbalanced defect data. This study proposes an acoustic signature driven multi-scale CNN (MSCNN) framework for identifying unknown main shaft crack defects.</p>
</sec>
<sec>
<title>Methods</title>
<p>A double threshold energy to zero-crossing (EZR) segmentation method is introduced to construct acoustic feature maps that capture both transient and steady-state crack characteristics, enhancing detection sensitivity and specificity. The MSCNN architecture automatically extracts multi-scale temporal features without manual feature engineering, while a novel segmentation strategy decomposes complex or unknown cracks into identifiable components for quantitative assessment.</p>
</sec>
<sec>
<title>Results</title>
<p>The proposed EZR-driven MSCNN framework achieves an average recognition accuracy of 90%, representing a 6.73% improvement over extreme learning machine (ELM) and a 3.36% improvement over single scale CNNs. Cross platform testing confirms robust adaptability, with accuracy ranging from 83.9% to 87.2% across different turbine models. Visualization analysis demonstrates improved separability of crack related acoustic features compared to conventional single-scale or handcrafted feature baselines.</p>
</sec>
<sec>
<title>Discussion</title>
<p>This work provides a practical and effective solution for wind turbine crack detection with enhanced capability for detecting diverse and previously unseen crack types in data scarce scenarios. The proposed framework demonstrates superior recognition stability and supports practical condition monitoring and early warning systems for wind turbine maintenance.</p>
</sec>
</abstract>
<kwd-group>
<kwd>acoustic signature</kwd>
<kwd>crack detection</kwd>
<kwd>multi-scale convolutional neural network (MSCNN)</kwd>
<kwd>unknown defect identification</kwd>
<kwd>wind turbine main shaft</kwd>
</kwd-group>
<funding-group>
<award-group id="gs1">
<funding-source id="sp1">
<institution-wrap>
<institution>Kunming University of Science and Technology</institution>
<institution-id institution-id-type="doi" vocab="open-funder-registry" vocab-identifier="10.13039/open_funder_registry">10.13039/501100007301</institution-id>
</institution-wrap>
</funding-source>
</award-group>
<award-group id="gs2">
<funding-source id="sp2">
<institution-wrap>
<institution>Natural Science Foundation of Yunnan Province</institution>
<institution-id institution-id-type="doi" vocab="open-funder-registry" vocab-identifier="10.13039/open_funder_registry">10.13039/501100005273</institution-id>
</institution-wrap>
</funding-source>
</award-group>
<funding-statement>The author(s) declared that financial support was received for this work and/or its publication. This work was supported by the Yunnan Provincial Natural Science Foundation (Grant No. 202401AU070158), the Research Fund of Kunming University of Science and Technology (Grant No. KKZ3202465076), and an Industry&#x2013;University Collaborative Research Project (Contract No. HZ2024K0212A).</funding-statement>
</funding-group>
<counts>
<fig-count count="10"/>
<table-count count="6"/>
<equation-count count="10"/>
<ref-count count="27"/>
<page-count count="00"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Wind Energy</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<label>1</label>
<title>Introduction</title>
<p>The global demand for safe, economical, and renewable energy has driven the rapid growth of wind energy (<xref ref-type="bibr" rid="B10">Hassan et al., 2024</xref>). By the end of 2025, global installed wind power capacity exceeded 700 GW (<xref ref-type="bibr" rid="B13">Kumar Dora et al., 2025</xref>). To meet rising energy demands, modern turbines are designed for higher power output, which subsequently requires higher reliability. Ensuring operational safety while controlling maintenance costs has become a critical focus (<xref ref-type="bibr" rid="B8">Gbashi et al., 2024</xref>). A robust condition monitoring and fault diagnosis system is essential to safeguard turbine reliability. As the core mechanical component, the main shaft endures complex forces, making it highly susceptible to fatigue cracks. A sudden fracture can cause severe equipment damage and economic losses (<xref ref-type="bibr" rid="B17">Santelo et al., 2022</xref>). Reliable crack detection is thus essential for safe operation, but it remains challenging. Cracks propagate unpredictably, interact with other defects, and often present as diverse or previously unclassified types, complicating the diagnostic process (<xref ref-type="bibr" rid="B15">Nejad et al., 2022</xref>).</p>
<p>Conventional crack detection methods typically involve signal acquisition, data analysis, and state classification. Ultrasonic sensors, for example, capture acoustic signals whose features are altered by defects (<xref ref-type="bibr" rid="B4">Cheng et al., 2020</xref>; <xref ref-type="bibr" rid="B20">Wang and Chen, 2023</xref>). Data analysis is critical for isolating these features under noise. Classical feature extraction methods include Wavelet Transform and Empirical Mode Decomposition (EMD) (<xref ref-type="bibr" rid="B5">Ding et al., 2019</xref>). More advanced approaches have focused on enhancing signals in noisy environments (<xref ref-type="bibr" rid="B24">Xia et al., 2020</xref>; <xref ref-type="bibr" rid="B9">Guo et al., 2019</xref>) or developing hybrid frameworks for complex patterns, such as extended cepstrum analysis or joint amplitude-frequency demodulation (<xref ref-type="bibr" rid="B18">Teng et al., 2019</xref>; <xref ref-type="bibr" rid="B6">Feng et al., 2019</xref>; <xref ref-type="bibr" rid="B21">Wang et al., 2019</xref>). Despite these advances, traditional methods often fail when confronting complex or unseen crack types. Their reliance on manually engineered features and predefined fault models limits their adaptability to the diverse defects found in real-world operations.</p>
<p>Deep learning (DL) has shown strong hierarchical feature learning capabilities, achieving success in complex recognition tasks (<xref ref-type="bibr" rid="B11">Hinton et al., 2006</xref>). Recent studies demonstrate that multi-scale convolutional neural networks (MSCNNs) are particularly effective at learning discriminative fault patterns from sensor data. Research shows MSCNNs can effectively capture features at different scales, achieving high accuracy and robustness against noise and varying loads (<xref ref-type="bibr" rid="B16">Peng et al., 2025</xref>; <xref ref-type="bibr" rid="B2">Chen et al., 2021</xref>; <xref ref-type="bibr" rid="B25">Zhao et al., 2025</xref>). However, applying DL to main shaft crack detection poses significant challenges. The operating environment of wind turbines yields datasets with limited defect diversity. Deep learning models, known for their high parameter counts, require large, balanced, and well-labeled datasets (<xref ref-type="bibr" rid="B26">Zhou and Wu, 2022</xref>). In practice: (1) Crack defects vary widely, making it impractical to collect all defect types; (2) Data acquisition and labeling are costly; and (3) Defect samples are far fewer than normal samples, leading to pronounced class imbalance. These factors hinder the development of generalizable models, especially for recognizing unknown crack types in data-scarce scenarios.</p>
<p>To address these challenges, this study proposes an acoustic-signature-driven multi-scale convolutional neural network (MSCNN) for identifying unknown crack defects in wind turbine main shafts. The key innovations are: (1) Acoustic Signature Construction: A double-threshold energy to zero-crossing segmentation method is introduced to construct acoustic feature maps that capture both transient and steady state characteristics of cracks, enhancing sensitivity and specificity. (2) MSCNN-Based Feature Learning: A multi-scale CNN architecture automatically extracts features from acoustic maps at different temporal resolutions, capturing both fine-grained details and global patterns without manual feature engineering. (3) Recognition of Unknown Cracks: A segmentation strategy decomposes complex or unknown cracks into smaller components, enabling quantitative assessment and improving recognition accuracy for previously unseen defect types.</p>
<p>The remainder of this paper is organized as follows: <xref ref-type="sec" rid="s2">Section 2</xref> presents the proposed methodology, <xref ref-type="sec" rid="s3">Section 3</xref> describes the experimental setup, <xref ref-type="sec" rid="s4">Section 4</xref> reports and discusses the results, and <xref ref-type="sec" rid="s5">Section 5</xref> concludes with a summary and future research directions.</p>
</sec>
<sec sec-type="methods" id="s2">
<label>2</label>
<title>Methods</title>
<sec id="s2-1">
<label>2.1</label>
<title>Individual acoustic signature extraction via energy to Zero Ratio (EZR)</title>
<p>The acoustic signature of a wind turbine main shaft is characterized using the EZR (<xref ref-type="bibr" rid="B26">Zhou and Wu, 2022</xref>; <xref ref-type="bibr" rid="B23">Wang et al., 2021</xref>), which effectively combines frame level energy and zero crossing information to highlight defect related patterns. A dual threshold segmentation algorithm is applied to localize high EZR regions, which are then normalized to form sample matrices for subsequent input into the MSCNN. The complete procedure is as follows.</p>
<p>Let <inline-formula id="inf1">
<mml:math id="m1">
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> denote the acquired acoustic waveform. After windowing and framing with frame length <italic>N</italic>, the <italic>i</italic>th frame signal <inline-formula id="inf2">
<mml:math id="m2">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is obtained. The frame energy is calculated as in <xref ref-type="disp-formula" rid="e1">Equation 1</xref>:<disp-formula id="e1">
<mml:math id="m3">
<mml:mrow>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:msubsup>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>
</p>
<p>To mitigate the misinterpretation of transient noise as defect onset or offset, an enhanced energy measure introduces a robustness constant (<xref ref-type="disp-formula" rid="e2">Equation 2</xref>):<disp-formula id="e2">
<mml:math id="m4">
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>log</mml:mi>
<mml:mn>10</mml:mn>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2b;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>
</p>
<p>The robustness constant <inline-formula id="inf3">
<mml:math id="m5">
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.003</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> (range <inline-formula id="inf4">
<mml:math id="m6">
<mml:mrow>
<mml:mfenced open="[" close="]" separators="&#x7c;">
<mml:mrow>
<mml:mn>0.001</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>0.005</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula>) was determined through pilot experiments on 100 training samples, providing an optimal balance between noise suppression and feature preservation. Values below 0.001 exhibited excessive sensitivity to noise, while values above 0.005 tended to over-smooth critical transient features.</p>
<p>To stabilize the calculation of the zero-crossing rate (ZCR) and eliminate minor zero drift artifacts, a central clipping operation is applied to the framed signal as in <xref ref-type="disp-formula" rid="e3">Equation 3</xref>, <inline-formula id="inf5">
<mml:math id="m7">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>:<disp-formula id="e3">
<mml:math id="m8">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>&#x223c;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="{" close="" separators="&#x7c;">
<mml:mrow>
<mml:mtable columnalign="left">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2265;</mml:mo>
<mml:mi>&#x3b4;</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2265;</mml:mo>
<mml:mi>&#x3b4;</mml:mi>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>
</p>
<p>The center-cropping threshold <inline-formula id="inf6">
<mml:math id="m9">
<mml:mrow>
<mml:mi>&#x3b4;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is a dimensionless scaling parameter. The value <inline-formula id="inf7">
<mml:math id="m10">
<mml:mrow>
<mml:mi>&#x3b4;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.7</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> was selected from the range [0.5, 1.0] based on maximizing drift artifact removal effectiveness (91% effectiveness) while minimizing signal distortion (&#x3c;3.5% relative error).</p>
<p>Following central clipping, the ZCR of each frame is computed as in <xref ref-type="disp-formula" rid="e4">Equation 4</xref>, where the sign function is defined in <xref ref-type="disp-formula" rid="e5">Equation 5</xref>:<disp-formula id="e4">
<mml:math id="m11">
<mml:mrow>
<mml:mi>Z</mml:mi>
<mml:mi>C</mml:mi>
<mml:msub>
<mml:mi>R</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="&#x7c;">
<mml:mrow>
<mml:mtext>sign</mml:mtext>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>&#x223c;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi mathvariant="normal">n</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mtext>sign</mml:mtext>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>&#x223c;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi mathvariant="normal">n</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>
<disp-formula id="e5">
<mml:math id="m12">
<mml:mrow>
<mml:mtext>sign</mml:mtext>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>&#x223c;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="{" close="" separators="&#x7c;">
<mml:mrow>
<mml:mtable columnalign="left">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>&#x223c;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2265;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>x</mml:mi>
<mml:mo>&#x223c;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(5)</label>
</disp-formula>
</p>
<p>The Energy to Zero Ratio (EZR) is then defined as in <xref ref-type="disp-formula" rid="e6">Equation 6</xref>:<disp-formula id="e6">
<mml:math id="m13">
<mml:mrow>
<mml:mi>E</mml:mi>
<mml:mi>Z</mml:mi>
<mml:msub>
<mml:mi>R</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mi>Z</mml:mi>
<mml:mi>C</mml:mi>
<mml:msub>
<mml:mi>R</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(6)</label>
</disp-formula>
</p>
<p>The regularization parameter <inline-formula id="inf8">
<mml:math id="m14">
<mml:mrow>
<mml:mi>b</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.03</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> (range [0.01, 0.05]) ensures numerical stability when the zero-crossing rate (ZCR) approaches zero (condition number &#x3c;500), avoiding the overflow errors observed with <inline-formula id="inf9">
<mml:math id="m15">
<mml:mrow>
<mml:mi>b</mml:mi>
<mml:mo>&#x2264;</mml:mo>
<mml:mn>0.02</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> while preventing the systematic bias (&#x3c;0.5%) associated with <inline-formula id="inf10">
<mml:math id="m16">
<mml:mrow>
<mml:mi>b</mml:mi>
<mml:mo>&#x2265;</mml:mo>
<mml:mn>0.04</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>The dual-threshold segmentation mechanism adopts a two-level decision process to reliably identify defect-related acoustic events. The high threshold <inline-formula id="inf11">
<mml:math id="m17">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is set to the 95th percentile of the EZR distribution across all training samples, yielding <inline-formula id="inf12">
<mml:math id="m18">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.035</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>. This percentile-based rule robustly triggers candidate defect waves while limiting false positives from stochastic noise spikes. The low threshold <inline-formula id="inf13">
<mml:math id="m19">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is fixed at 0.015 (<inline-formula id="inf14">
<mml:math id="m20">
<mml:mrow>
<mml:mo>&#x2248;</mml:mo>
<mml:mn>43</mml:mn>
<mml:mo>%</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> of <inline-formula id="inf15">
<mml:math id="m21">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>), determined via receiver operating characteristic (ROC) analysis to balance detection sensitivity and false-alarm rate. Operationally, when an EZR excursion exceeds <inline-formula id="inf16">
<mml:math id="m22">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, the algorithm searches bidirectionally (backward and forward in time) for the nearest intersections with <inline-formula id="inf17">
<mml:math id="m23">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, which define the temporal boundaries of the acoustic signature. This hysteresis strategy captures complete defect signatures despite transient EZR fluctuations. Thresholds were validated by a grid search over <inline-formula id="inf18">
<mml:math id="m24">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="&#x7c;">
<mml:mrow>
<mml:mn>0.025</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>0.045</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf19">
<mml:math id="m25">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>/</mml:mo>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="&#x7c;">
<mml:mrow>
<mml:mn>0.3</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>0.5</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> on the validation set; the selected pair (<inline-formula id="inf20">
<mml:math id="m26">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.035</mml:mn>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.015</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>) achieved the highest F1-score (0.92), confirming their effectiveness for wind-turbine main-shaft defect detection.</p>
<p>For individual acoustic signature extraction, the ultrasonic signal is processed frame-by-frame to compute energy and EZR, from which a smoothed EZR sequence is obtained. The dual-threshold scheme then delineates segment boundaries: when EZR first exceeds <inline-formula id="inf21">
<mml:math id="m27">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, a significant event is flagged; the start point is set by backtracking to where EZR falls below <inline-formula id="inf22">
<mml:math id="m28">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, and the end point is set when EZR again drops below <inline-formula id="inf23">
<mml:math id="m29">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. This procedure yields stable boundaries and shows strong robustness to noise. To meet the MSCNN input requirements, each extracted segment is resampled to a fixed duration and amplitude-normalized to ensure consistency in temporal length and signal magnitude. In this design, <inline-formula id="inf24">
<mml:math id="m30">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> provides precise event triggering, whereas <inline-formula id="inf25">
<mml:math id="m31">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> supplies hysteresis, suppressing noise while preserving the completeness of defect information.</p>
<p>This procedure, illustrated in <xref ref-type="fig" rid="F1">Figure 1</xref>, exemplifies the extraction of individual acoustic features from a wind turbine main shaft using the EZR method. The left panel shows the raw ultrasonic echo signal, which includes the transmitted wave, intrinsic structural wave, and backwall reflection from the shaft. By applying the dual threshold EZR segmentation method, high EZR segments correlated with potential defects are isolated (as highlighted by the middle arrow). The right panel displays the resulting acoustic signature, comprising both the intrinsic structural response and the moderate crack reflection, which serves as the standardized input for subsequent MSCNN based feature learning.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Illustrates an example of EZR based segmentation for a main shaft signal.</p>
</caption>
<graphic xlink:href="fenrg-14-1635112-g001.tif">
<alt-text content-type="machine-generated">Ultrasonic inspection signal graph on the left shows amplitude versus distance in millimeters, identifying transmitted wave, inherent structure, and reflection from shaft backwall. Acoustical signature graph on the right displays amplitude versus distance, highlighting inherent structure and moderate crack. Both graphs illustrate energy-to-zero ratio and driven acoustic feature isolation.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s2-2">
<label>2.2</label>
<title>MSCNN feature learning</title>
<p>To address the limitations of conventional CNN based crack detection models such as restricted feature expressiveness from fixed size convolution kernels and insufficient multi-dimensional feature extraction this study proposes a MSCNN tailored to the characteristics of acoustic wave signals (<xref ref-type="bibr" rid="B12">Huang and Wang, 2019</xref>; <xref ref-type="bibr" rid="B3">Chen et al., 2024</xref>; <xref ref-type="bibr" rid="B7">Fu et al., 2020</xref>). The MSCNN employs parallel convolutional branches with kernels of varying sizes to extract features at different temporal scales. Small kernels capture fine grained, short term details, whereas large kernels extract broader, long term structural patterns, enabling the network to model the diverse morphology and topology of crack related signals. The framework diagram of the multi - scale convolutional neural network is shown in <xref ref-type="fig" rid="F2">Figure 2</xref>.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Multi scale convolution kernels for acoustic feature extraction.</p>
</caption>
<graphic xlink:href="fenrg-14-1635112-g002.tif">
<alt-text content-type="machine-generated">Diagram of a multi-scale convolutional neural network architecture. It shows an input layer processed through three different convolution paths with kernel sizes one by seven, one by five, and one by three. Each path includes convolution and pooling layers, followed by concatenation. A fully connected (FC) layer processes the concatenated output, leading to the final output layer. Arrows indicate the flow of data through the network.</alt-text>
</graphic>
</fig>
<p>A CNN is a deep, feed forward network characterized by convolutional operations, sparse connections, and weight sharing. A one dimensional architecture is adopted because: (i) the EZR feature already encodes time-frequency information; (ii) 1D inputs better preserve temporal structure of transient crack signatures; and (iii) computational efficiency is critical for real-time monitoring. In MSCNN, convolution kernels of different sizes operate in parallel, each followed by a pooling layer, allowing simultaneous extraction of multi scale temporal features. Formally, the convolution operation is expressed as in <xref ref-type="disp-formula" rid="e7">Equation 7</xref>:<disp-formula id="e7">
<mml:math id="m32">
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mi>e</mml:mi>
<mml:msubsup>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
<mml:mi>l</mml:mi>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msubsup>
<mml:mi>b</mml:mi>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2b;</mml:mo>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>d</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:mfenced open="&#x2329;" close="&#x232a;" separators="&#x7c;">
<mml:mrow>
<mml:msubsup>
<mml:mi>W</mml:mi>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>d</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(7)</label>
</disp-formula>where <inline-formula id="inf26">
<mml:math id="m33">
<mml:mrow>
<mml:msubsup>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>d</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> is the input 1D sequence, <inline-formula id="inf27">
<mml:math id="m34">
<mml:mrow>
<mml:msubsup>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>d</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> is the weight tensor with <inline-formula id="inf28">
<mml:math id="m35">
<mml:mrow>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>l</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> output channels and <inline-formula id="inf29">
<mml:math id="m36">
<mml:mrow>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> input channels, <inline-formula id="inf30">
<mml:math id="m37">
<mml:mrow>
<mml:msup>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mi>l</mml:mi>
</mml:msub>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is the bias term, <inline-formula id="inf31">
<mml:math id="m38">
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mo>&#x2286;</mml:mo>
<mml:mrow>
<mml:mfenced open="{" close="}" separators="&#x7c;">
<mml:mrow>
<mml:mn>3</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>5</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>7</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is the kernel size for capturing multi-scale patterns, and different strides match the temporal scale of interest. The activation function <inline-formula id="inf32">
<mml:math id="m39">
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mo>&#xb7;</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is ReLU, chosen because: (i) its sparsity aligns with the clustered sparse energy distribution of acoustic signals; (ii) it suppresses low amplitude noise leakage compared to Leaky ReLU; and (iii) combined with batch normalization, it achieves faster convergence and smaller generalization gaps on our dataset without observable dying neuron effects.</p>
<p>Following convolution, the max-pooling operation is defined in <xref ref-type="disp-formula" rid="e8">Equation 8</xref>:<disp-formula id="e8">
<mml:math id="m40">
<mml:mrow>
<mml:msubsup>
<mml:mi>p</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi>h</mml:mi>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:munder>
<mml:mi>max</mml:mi>
<mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi>g</mml:mi>
<mml:mo>&#x3c;</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>&#x3c;</mml:mo>
<mml:mi>n</mml:mi>
<mml:mi>g</mml:mi>
</mml:mrow>
</mml:munder>
<mml:mrow>
<mml:mfenced open="{" close="}" separators="&#x7c;">
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mi>e</mml:mi>
<mml:msubsup>
<mml:mi>a</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi>h</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mi>n</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
</mml:mrow>
</mml:math>
<label>(8)</label>
</disp-formula>where <inline-formula id="inf33">
<mml:math id="m41">
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mi>e</mml:mi>
<mml:msup>
<mml:mi>a</mml:mi>
<mml:msup>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> denotes the activation of the <italic>h</italic>th neuron in the <italic>m</italic>th feature map of at layer <italic>t</italic>, <italic>g</italic> is the pooling kernel width, and <inline-formula id="inf34">
<mml:math id="m42">
<mml:mrow>
<mml:msup>
<mml:mi>p</mml:mi>
<mml:msup>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is the output of the pooling operation. Max-pooling serves multiple purposes: (i) reducing temporal dimensions by half while retaining salient features, (ii) providing translational invariance to minor temporal shifts in crack signatures, and (iii) acting as an implicit regularizer by reducing feature map complexity. Batch normalization, applied before each activation, stabilizes training by normalizing layer inputs, which is particularly beneficial in data-scarce scenarios where gradient estimates from small batches can be unstable. Together, these techniques reduce overfitting risk and accelerate convergence, as evidenced by our ablation studies. This multi-scale framework enhances temporal feature diversity, enabling the model to robustly identify both common and complex crack patterns in wind turbine main shafts.</p>
<p>Outputs from all branches are concatenated into a unified feature vector, followed by batch normalization (BN) to stabilize training and accelerate convergence. The final representation is expressed as in <xref ref-type="disp-formula" rid="e9">Equation 9</xref>:<disp-formula id="e9">
<mml:math id="m43">
<mml:mrow>
<mml:mi>Y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mtext>short</mml:mtext>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mtext>midium</mml:mtext>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mtext>long</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(9)</label>
</disp-formula>where <inline-formula id="inf35">
<mml:math id="m44">
<mml:mrow>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mtext>short</mml:mtext>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mtext>midium</mml:mtext>
</mml:msub>
<mml:mo>,</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf36">
<mml:math id="m45">
<mml:mrow>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mtext>long</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> are the short, medium, and long term features, respectively; &#x2295; denotes the feature concatenation operation.</p>
</sec>
<sec id="s2-3">
<label>2.3</label>
<title>Proposed framework for unknown crack identification</title>
<p>To address the limited feature representation capability of conventional fixed-kernel CNNs in capturing multi-scale acoustic characteristics of crack defects, we propose an integrated framework that combines ultrasonic guided wave acquisition, EZR-based signature extraction, and MSCNN classification. The MSCNN employs parallel convolutional branches with varying kernel sizes to extract features across multiple temporal scales, enabling robust identification of diverse crack types in wind turbine main shafts. Notably, the framework can recognize previously unseen compound crack patterns through similarity based decomposition, eliminating the need for exhaustive training data covering all possible defect combinations. The proposed framework comprises four main stages, progressing from raw ultrasonic signal acquisition to classification of previously unseen crack patterns, as illustrated in <xref ref-type="fig" rid="F3">Figure 3</xref>.<list list-type="simple">
<list-item>
<p>Step 1: Extraction of individual acoustic signatures based on the EZR</p>
</list-item>
</list>
</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Separation and identification of multiple crack defects in wind turbine main shafts using the MSCNN model.</p>
</caption>
<graphic xlink:href="fenrg-14-1635112-g003.tif">
<alt-text content-type="machine-generated">Flowchart depicting a process for crack recognition and classification. It includes three main steps: 1) Extracting acoustic signatures using ultrasonic inspection, showing wave amplitude over distance. 2) Feature learning with a multi-scale convolutional neural network (MSCNN), illustrating layers like Conv1D and Maxpooling1D. 3) Crack recognition involving signature extraction, matching, and similarity scoring. Images illustrate detected conditions: inherent structure, the absence of cracks, and presence of cracks with corresponding checkmarks and crosses.</alt-text>
</graphic>
</fig>
<p>Raw acoustic signals are segmented through windowing and framing operations to produce temporal frames of specified length with partial overlap. For each frame, the energy and zero-crossing count are computed to derive the EZR feature, which effectively captures localized acoustic variations under different structural states. A dual-threshold segmentation algorithm is then applied to isolate salient acoustic signatures, followed by z-score normalization and length standardization. This preprocessing stage enhances crack induced acoustic anomalies while suppressing ambient noise, producing standardized feature matrices suitable for subsequent neural network processing.<list list-type="simple">
<list-item>
<p>Step 2: Multi-scale feature construction using MSCNN</p>
</list-item>
</list>
</p>
<p>The normalized EZR matrices are fed into the MSCNN as one-dimensional time-series inputs, preserving temporal dependencies crucial for detecting transient defect signatures. The MSCNN employs three parallel convolutional branches with different kernel sizes to capture features across multiple temporal scales. Each branch consists of successive Conv1D&#x2013;MaxPool1D blocks with progressively increasing channel dimensions. Features extracted from all branches are concatenated into a unified high-dimensional vector, which is further refined by fully connected layers to enhance discriminative capability for crack identification.<list list-type="simple">
<list-item>
<p>Step 3: Detection and classification of unknown conditions</p>
</list-item>
</list>
</p>
<p>For unknown shaft conditions, the EZR based acoustic signature is extracted following the Step-1 procedure. The signal is subsequently segmented to isolate individual acoustic components, because complex or compound cracks typically manifest as multiple high-amplitude regions in the EZR sequence, each corresponding to a distinct structural anomaly. Empirically, different crack configurations exhibit characteristic EZR patterns: a healthy shaft presents a single peak reflecting the inherent structural response; single-crack states exhibit two peaks; and multi-crack states present three or more peaks. Each segmented component is independently input to the trained MSCNN to obtain a deep feature vector, which is then matched against the reference acoustic-signature library using cosine similarity as defined in <xref ref-type="disp-formula" rid="e10">Equation 10</xref>:<disp-formula id="e10">
<mml:math id="m46">
<mml:mrow>
<mml:mtext>sim</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">v</mml:mi>
<mml:mi>q</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold">v</mml:mi>
<mml:mi>r</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">v</mml:mi>
<mml:mi>q</mml:mi>
</mml:msub>
<mml:mo>&#xb7;</mml:mo>
<mml:msub>
<mml:mi mathvariant="bold">v</mml:mi>
<mml:mi>r</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">v</mml:mi>
<mml:mi>q</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">v</mml:mi>
<mml:mi>r</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(10)</label>
</disp-formula>
<inline-formula id="inf37">
<mml:math id="m47">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">v</mml:mi>
<mml:mi>q</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:mi>d</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is the query vector, the deep feature of a segmented acoustic component from an unknown sample, with <inline-formula id="inf38">
<mml:math id="m48">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">v</mml:mi>
<mml:mi>q</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf39">
<mml:math id="m49">
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mo>&#xb7;</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> denote the trained MSCNN embedding function. <inline-formula id="inf40">
<mml:math id="m50">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="bold">v</mml:mi>
<mml:mi>r</mml:mi>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:mi>d</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is the reference vector, the prototype vector of class in the reference library, computed from training embeddings.</p>
<p>The reference library comprises prototype feature vectors for the elementary crack categories: inherent structure, small crack, medium crack, and significant defect. If the maximum similarity falls below a predefined threshold (as specified in the experimental section), the component is classified as healthy; otherwise, it is assigned to the crack category with the highest similarity. For compound cracks, all constituent components are identified and their similarity scores are reported, yielding a comprehensive diagnosis.<list list-type="simple">
<list-item>
<p>Step 4:Classification output and validation</p>
</list-item>
</list>
</p>
<p>This decomposition based strategy enables the framework to identify previously unseen compound crack patterns by representing them as combinations of known elementary crack types. Specifically, the method can recognize novel multi-crack states using models trained only on basic single crack categories, thereby significantly improving data efficiency and scalability. The validation design for both known and unknown crack states is detailed in <xref ref-type="table" rid="T3">Tables 3</xref>, <xref ref-type="table" rid="T4">4</xref>, demonstrating the framework&#x2019;s strong generalization capability to unseen defect patterns.</p>
</sec>
</sec>
<sec id="s3">
<label>3</label>
<title>Experimental setup</title>
<sec id="s3-1">
<label>3.1</label>
<title>Physical characteristics of wind turbine main shaft</title>
<p>The experimental investigation was carried out on a megawatt class wind turbine main shaft located at a wind farm in Yunnan Province, China. The physical characteristics of the tested main shaft are summarized in <xref ref-type="table" rid="T1">Table 1</xref>. The shaft is installed within a semi enclosed compartment characterized by limited space and densely arranged equipment. Due to structural constraints, inspection devices can only access the exposed end face near the blade side, while the remaining sections are enclosed within a protective casing. To enable testing without dismantling the shaft, the exposed end face was selected as the inspection location, and the pulse echo method was adopted for ultrasonic signal acquisition.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Physical characteristics of the wind turbine main shaft.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Parameter category</th>
<th align="left">Specific parameter</th>
<th align="left">Value/Description</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="5" align="left">Geometric dimensions</td>
<td align="left">Total length</td>
<td align="left">2,690 mm</td>
</tr>
<tr>
<td align="left">Maximum diameter</td>
<td align="left">560 mm</td>
</tr>
<tr>
<td align="left">Minimum diameter</td>
<td align="left">420 mm</td>
</tr>
<tr>
<td align="left">Central bore diameter</td>
<td align="left">140 mm</td>
</tr>
<tr>
<td align="left">Cross sectional features</td>
<td align="left">Flange arcs, chamfers, fillets, varying cross-sections</td>
</tr>
<tr>
<td rowspan="3" align="left">Material properties</td>
<td align="left">Material type</td>
<td align="left">42CrMo alloy steel</td>
</tr>
<tr>
<td align="left">Chemical composition</td>
<td align="left">C: 0.38%&#x2013;0.45%, Cr: 0.90%&#x2013;1.20%, Mo: 0.15%&#x2013;0.25%, Mn: 0.40%&#x2013;0.70%, si: 0.17%&#x2013;0.37%</td>
</tr>
<tr>
<td align="left">Ultrasonic wave propagation velocity</td>
<td align="left">5,930 m/s</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Considering the inspection depth, an excitation frequency of 4 MHz and a 34 mm diameter straight probe were selected to achieve an optimal balance between penetration depth and defect resolution, in line with industry standards for wind turbine component inspection. <xref ref-type="fig" rid="F4">Figure 4</xref> illustrates the experimental setup and ultrasonic signal acquisition process. The data acquisition system comprised high precision ultrasonic sensors, signal conditioners, and a data acquisition card with a 100 MHz sampling frequency to ensure capture of fine signal details.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Experimental setup and ultrasonic signal acquisition from the wind turbine main shaft.</p>
</caption>
<graphic xlink:href="fenrg-14-1635112-g004.tif">
<alt-text content-type="machine-generated">Diagram illustrating the process of ultrasonic testing on a wind turbine's main shaft. It includes labeled components: hub, impeller, tower, nacelle, gearbox, and generator. The setup involves a pulse generator and power amplifier connected to a computer for signal data storage. Ultrasonic waves detect crack defects, with detailed views of the shaft's circumferential surface, flange arc, and transition chamfer. Additional images show a person inspecting equipment and a close-up of a turbine component.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s3-2">
<label>3.2</label>
<title>Artificial crack creation and validation</title>
<p>To simulate realistic and variable crack conditions, artificial cracks of varying sizes (minor, moderate, and major) were introduced across the shaft, with their strategic distribution designed to replicate overlapping and heterogeneous defect conditions.</p>
<p>Three fabrication techniques were employed. This approach enabled the acquisition of acoustic signature signals representing both isolated and composite crack states. Artificial cracks with precisely controlled dimensions were fabricated using three primary methods. Electric discharge machining was employed for small and medium sized cracks, utilizing a 0.2 mm wire to produce defects with high dimensional accuracy and well defined boundaries, while minimizing alterations to the surrounding material properties. Ultrasonic Impact Treatment was applied at selected locations to induce microcracks resembling fatigue induced natural cracks through high frequency impact loading. Mechanical Notching, performed with specialized 0.5 mm tungsten carbide tools, was used to create large cracks with controlled depth and geometry.</p>
<p>Based on statistical data from actual failure cases in the wind energy industry and the structural characteristics of the main shaft, three representative crack sizes were designed: (1) Minor cracks: length 5&#x2013;10 mm, width 0.2&#x2013;0.3 mm, depth 2&#x2013;3 mm; (2) Moderate cracks: length 15&#x2013;25 mm, width 0.3&#x2013;0.4 mm, depth 4&#x2013;6 mm; (3) Major defects: length 30&#x2013;50 mm, width 0.5&#x2013;0.7 mm, depth 8&#x2013;12 mm. To ensure the representativeness of the artificial cracks, their ultrasonic reflection characteristics were compared with those of naturally occurring cracks in defective main shafts retrieved from in service wind turbines. A high degree of similarity (&#x3e;90%) in waveform features, reflection amplitude, and spectral distribution confirmed that the fabricated cracks effectively replicated natural defect conditions.</p>
</sec>
<sec id="s3-3">
<label>3.3</label>
<title>Data acquisition and sample design</title>
<p>A total of eight operating states of the wind turbine main shaft were recorded, covering the healthy condition, single crack states, and multiple crack states. For each state, 400 acoustic signature samples were collected. The acoustic signature data were labeled according to structural components and crack sizes. <xref ref-type="table" rid="T2">Table 2</xref> summarizes the operating states, their identifiers, detailed descriptions, and the corresponding extracted acoustic signatures.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Operational states and acoustic signatures of wind turbine main shaft.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Operating state</th>
<th align="left">State ID</th>
<th align="left">Detailed description</th>
<th align="left">Extracted acoustic signature</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Health</td>
<td align="left">C1</td>
<td align="left">Normal operation</td>
<td align="left">Inherent structure</td>
</tr>
<tr>
<td rowspan="3" align="left">Single crack</td>
<td align="left">C2</td>
<td align="left">Inherent structure &#x2b; minor crack</td>
<td align="left">Inherent structure; minor crack</td>
</tr>
<tr>
<td align="left">C3</td>
<td align="left">Inherent structure &#x2b; moderate crack</td>
<td align="left">Inherent structure; moderate crack</td>
</tr>
<tr>
<td align="left">C4</td>
<td align="left">Inherent structure &#x2b; major defect</td>
<td align="left">Inherent structure; major defect</td>
</tr>
<tr>
<td rowspan="4" align="left">Multiple cracks</td>
<td align="left">C5</td>
<td align="left">Inherent structure &#x2b; minor crack &#x2b; moderate crack</td>
<td align="left">Inherent structure; minor crack; moderate crack</td>
</tr>
<tr>
<td align="left">C6</td>
<td align="left">Inherent structure &#x2b; minor crack &#x2b; major defect</td>
<td align="left">Inherent structure; minor crack; major defect</td>
</tr>
<tr>
<td align="left">C7</td>
<td align="left">Inherent structure &#x2b; moderate crack &#x2b; major defect</td>
<td align="left">Inherent structure; minor crack; major defect</td>
</tr>
<tr>
<td align="left">C8</td>
<td align="left">Inherent structure &#x2b; minor crack &#x2b; moderate crack &#x2b; major defect</td>
<td align="left">Inherent structure; minor crack; moderate crack; major defect</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>As shown in <xref ref-type="table" rid="T2">Table 2</xref>, the inherent structure component is present in all eight states, yielding 3,200 samples in total (400 per state). Minor cracks appear in C2, C5, C6, and C8, providing 1,600 samples. Moderate cracks are present in C3, C5, C7, and C8, also totaling 1,600 samples. Major defects occur in C4, C6, C7, and C8, likewise totaling 1,600 samples.</p>
<p>To evaluate the model&#x2019;s capability in identifying unknown crack types, the eight operating states were divided into two distinct datasets as shown in <xref ref-type="table" rid="T3">Table 3</xref>.</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>Summary of conditions and extracted signatures for multi-defect effect experiments.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Dataset</th>
<th align="left">Operating states</th>
<th align="left">Number of samples</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="4" align="left">Dataset A</td>
<td rowspan="4" align="left">C1; C2; C3; C4</td>
<td align="left">Inherent structure: 400</td>
</tr>
<tr>
<td align="left">Minor crack: 400</td>
</tr>
<tr>
<td align="left">Moderate crack: 400</td>
</tr>
<tr>
<td align="left">Major defect: 400</td>
</tr>
<tr>
<td rowspan="4" align="left">Dataset B</td>
<td rowspan="4" align="left">C5; C6; C7; C8</td>
<td align="left">Inherent structure: 400</td>
</tr>
<tr>
<td align="left">Minor crack: 400</td>
</tr>
<tr>
<td align="left">Moderate crack: 400</td>
</tr>
<tr>
<td align="left">Major defect: 400</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>As shown in <xref ref-type="table" rid="T3">Table 3</xref>, Dataset A (C1&#x2013;C4) contains healthy and single-crack states and serves as the known-state dataset. Dataset B (C5&#x2013;C8) contains multiple-crack states and serves as the unknown-state dataset.</p>
<p>Two validation experiments were designed: (1) Recognition of known crack defect states to verify model accuracy for known defects. (2) Recognition of unknown crack defect states&#x2013;to evaluate generalization to unseen multiple-crack states.</p>
<p>As shown in <xref ref-type="table" rid="T4">Table 4</xref>, in Experiment 1, 70% of Dataset A was used for training and 30% for testing, resulting in 1,120 and 480 samples, respectively. In Experiment 2, all samples from Dataset A were used for training, while Dataset B served as the test set, each containing 1,600 samples. This rigorous experimental design ensured that the model was never exposed to composite crack data during training, thereby enabling an objective evaluation of its capability to learn from fundamental crack features and generalize to more complex scenarios. Such a design is crucial for validating the practical applicability of the proposed method, as it directly addresses the challenges encountered in real-world engineering applications.</p>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>Training and testing sample sets for validation experiments.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Validation purpose</th>
<th align="left">Training set</th>
<th align="left">Testing set</th>
<th align="left">Samples (training: testing)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Known crack defect recognition</td>
<td align="left">Dataset A</td>
<td align="left">Dataset A</td>
<td align="left">1120: 480</td>
</tr>
<tr>
<td align="left">Unknown crack defect recognition</td>
<td align="left">Dataset B</td>
<td align="left">Dataset B</td>
<td align="left">1600:1600</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3-4">
<label>3.4</label>
<title>MSCNN network architecture parameters</title>
<p>
<xref ref-type="table" rid="T5">Table 5</xref> presents the detailed network architecture parameters of the MSCNN, including the kernel sizes, strides, padding, channel configurations for each branch, and the output dimensions at each stage.</p>
<table-wrap id="T5" position="float">
<label>TABLE 5</label>
<caption>
<p>MSCNN network architecture parameters.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Layer type</th>
<th align="left">Branch</th>
<th align="left">Kernel size</th>
<th align="left">Stride</th>
<th align="left">Padding</th>
<th align="left">Channels (per block)</th>
<th align="left">Output shape (per stage)</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Input</td>
<td align="left">&#x2013;</td>
<td align="left">&#x2013;</td>
<td align="left">&#x2013;</td>
<td align="left">&#x2013;</td>
<td align="left">1</td>
<td align="left">1 &#xd7; 1024</td>
</tr>
<tr>
<td align="left">Conv1D &#x2b; max Pool1D &#xd7;3</td>
<td align="left">A</td>
<td align="left">7</td>
<td align="left">1</td>
<td align="left">3</td>
<td align="left">32 &#x2192; 64 &#x2192; 128</td>
<td align="left">32 &#xd7; 1024 &#x2192; 64 &#xd7; 512 &#x2192; 128 &#xd7; 256 &#x2192; 128 &#xd7; 128</td>
</tr>
<tr>
<td align="left">Conv1D &#x2b; max Pool1D &#xd7;3</td>
<td align="left">B</td>
<td align="left">5</td>
<td align="left">1</td>
<td align="left">2</td>
<td align="left">32 &#x2192; 64 &#x2192; 128</td>
<td align="left">32 &#xd7; 1024 &#x2192; 64 &#xd7; 512 &#x2192; 128 &#xd7; 256 &#x2192; 128 &#xd7; 128</td>
</tr>
<tr>
<td align="left">Conv1D &#x2b; max Pool1D &#xd7;3</td>
<td align="left">C</td>
<td align="left">3</td>
<td align="left">1</td>
<td align="left">1</td>
<td align="left">32 &#x2192; 64 &#x2192; 128</td>
<td align="left">32 &#xd7; 1024 &#x2192; 64 &#xd7; 512 &#x2192; 128 &#xd7; 256 &#x2192; 128 &#xd7; 128</td>
</tr>
<tr>
<td align="left">Concat</td>
<td align="left">All</td>
<td align="left">&#x2013;</td>
<td align="left">&#x2013;</td>
<td align="left">&#x2013;</td>
<td align="left">384</td>
<td align="left">384 &#xd7; 128</td>
</tr>
<tr>
<td align="left">Global avg Pool1D</td>
<td align="left">&#x2013;</td>
<td align="left">&#x2013;</td>
<td align="left">&#x2013;</td>
<td align="left">&#x2013;</td>
<td align="left">384</td>
<td align="left">384 &#xd7; 1</td>
</tr>
<tr>
<td align="left">FC1</td>
<td align="left">&#x2013;</td>
<td align="left">&#x2013;</td>
<td align="left">&#x2013;</td>
<td align="left">&#x2013;</td>
<td align="left">256</td>
<td align="left">256 &#xd7; 1</td>
</tr>
<tr>
<td align="left">FC2</td>
<td align="left">&#x2013;</td>
<td align="left">&#x2013;</td>
<td align="left">&#x2013;</td>
<td align="left">&#x2013;</td>
<td align="left">4</td>
<td align="left">4 &#xd7; 1</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>
<xref ref-type="table" rid="T5">Table 5</xref> presents the detailed parameters of the MSCNN architecture. The network employs a three branch design with kernel sizes of 7, 5, and 3 to extract multi-scale temporal features from the 1 &#xd7; 1024 EZR acoustic signatures. Each branch contains three consecutive Conv1D&#x2013;MaxPool1D blocks with progressively increasing channels (32, 64, and 128). The outputs from the three branches are concatenated, followed by a global average pooling layer and a two-layer fully connected (FC) classifier for four-class defect recognition.</p>
</sec>
</sec>
<sec id="s4">
<label>4</label>
<title>Experimental results</title>
<sec id="s4-1">
<label>4.1</label>
<title>Individual acoustic signature extraction via EZR</title>
<p>The collected acoustic signals are first subjected to noise reduction processing, followed by adaptive extraction of acoustic signature features.</p>
<p>As shown in <xref ref-type="fig" rid="F5">Figure 5</xref>, the horizontal axis represents the ultrasonic propagation distance along the wind turbine main shaft. In <xref ref-type="fig" rid="F5">Figure 5a</xref>, the raw ultrasonic signal exhibits substantial random fluctuations and high-frequency noise, which obscure meaningful features and make the waveform appear chaotic. In contrast, <xref ref-type="fig" rid="F5">Figure 5b</xref> presents the denoised signal, where the waveform is noticeably smoother, the main peaks are more distinct, and high-frequency noise is effectively suppressed. This enhancement improves both feature clarity and signal interpretability. Notably, pronounced peaks and discontinuities particularly near 1500 mm and 2000 mm are clearly visible after denoising, indicating potential defect-related features. Overall, the denoising process effectively removes high-frequency interference while preserving critical structural information, thereby producing a cleaner, more analytically useful signal suitable for feature extraction and defect localization.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Comparison of collected and denoised acoustic signals. <bold>(a)</bold> Collected signals. <bold>(b)</bold> Signals after noise reduction.</p>
</caption>
<graphic xlink:href="fenrg-14-1635112-g005.tif">
<alt-text content-type="machine-generated">Two line graphs comparing amplitude and distance in millimeters. Graph (a) shows fluctuating amplitude around 0.5, with a peak near 1500 mm. Graph (b) shows low amplitude with spikes near 1500 mm and 2700 mm.</alt-text>
</graphic>
</fig>
<p>To identify the onset and endpoint of high-threshold intervals corresponding to potential defects, a double-threshold segmentation method is applied to the EZR processed acoustic signal. Each individual acoustic signature is defined as a fixed-length segment starting from the onset point determined by the double-threshold method, enabling targeted feature extraction. Following segmentation, each signature is normalized, and a sample matrix is constructed for input into the MSCNN.</p>
<p>As illustrated in <xref ref-type="fig" rid="F6">Figure 6</xref>, the proposed algorithm incorporates two level decision mechanism based on EZR. In the first stage, a high threshold T2 is applied to the short time EZR curve to preliminarily identify candidate structural or defect waves, the segment between points A and B. In the second stage, a lower threshold T1 is introduced. From point A, the algorithm searches leftward, and from point B, rightward, to locate points C and D where the signal intersects T1. These points define the start and end of the acoustic signature. To ensure uniform segment lengths, the waveform beyond T1 is extended to a fixed duration. The segmented acoustic signal is thus divided into fixed length individual signatures, with their start and end positions marked by solid and dashed black lines in the figure. Experimental results demonstrate that this method effectively distinguishes between defect-free and defective signals, providing a robust dataset for subsequent defect identification and classification. The resulting segmented acoustic signatures are shown in <xref ref-type="fig" rid="F7">Figure 7</xref>.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Segmentation of acoustic signature using EZR.</p>
</caption>
<graphic xlink:href="fenrg-14-1635112-g006.tif">
<alt-text content-type="machine-generated">Graph showing acoustic signals and short-term energy-zero ratios for a fan spindle. The top graph displays amplitude over time with marked starting and ending points. The bottom graph shows energy-zero ratios with thresholds T1 and T2, and points A, B, C, and D labeled along the timeline.</alt-text>
</graphic>
</fig>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>Acoustic signature after segmentation. <bold>(a)</bold> Inherent structure. <bold>(b)</bold> Inherent structure &#x2b; minor crack. <bold>(c)</bold> Inherent structure &#x2b; moderate crack. <bold>(d)</bold> Inherent structure &#x2b; major defect.</p>
</caption>
<graphic xlink:href="fenrg-14-1635112-g007.tif">
<alt-text content-type="machine-generated">Four graphs labeled (a) to (d) depict amplitude against distance in millimeters. Graph (a) shows a sharp peak labeled &#x22;inherent structure.&#x22; Graph (b) includes a smaller peak labeled &#x22;minor crack.&#x22; Graph (c) features a slightly larger peak for &#x22;moderate crack.&#x22; Graph (d) shows a significant peak labeled &#x22;major crack.&#x22; All graphs display data from 1670 to 1730 mm on the x-axis.</alt-text>
</graphic>
</fig>
<p>As shown in <xref ref-type="fig" rid="F7">Figure 7</xref>, the four types of acoustic signatures correspond to the inherent structure, minor crack, moderate crack, and major defect. In <xref ref-type="fig" rid="F7">Figure 7a</xref>, a single prominent peak represents the inherent structure, the normal ultrasonic propagation path in a structurally intact shaft indicating no interference from cracks or defects. In <xref ref-type="fig" rid="F7">Figure 7b</xref>, a minor secondary peak near 1700 mm suggests a small defect, such as a shallow crack, with limited structural impact. <xref ref-type="fig" rid="F7">Figure 7c</xref> reveals a more pronounced secondary peak in the same region, indicating a medium-sized crack with higher energy reflection. In <xref ref-type="fig" rid="F7">Figure 7d</xref>, the secondary peak near 1700 mm exhibits significantly greater amplitude than in the small and medium defect cases, indicating a major defect or severe material flaw that may require urgent intervention. Across all cases, the inherent structure is consistently observed, underscoring its role as a fundamental ultrasonic feature of the main shaft. Furthermore, the amplitude of the reflected peak increases with defect severity from small to large defects demonstrating that peak amplitude in ultrasonic signals serves as a reliable indicator for defect sizing and severity assessment. This finding provides a valuable basis for non-destructive evaluation of main shaft integrity in wind turbines.</p>
</sec>
<sec id="s4-2">
<label>4.2</label>
<title>Model performance evaluation</title>
<sec id="s4-2-1">
<label>4.2.1</label>
<title>Comparison of experimental results for multi-scale feature extraction</title>
<p>To assess the effectiveness of the improved MSCNN in extracting features from various crack defects, as well as in defect separation and identification, comparative experiments were conducted between a standard CNN with a fixed size convolution kernel and the proposed MSCNN model. The experimental analysis focuses on the performance differences between the two architectures. The respective loss functions and recognition accuracies are presented in <xref ref-type="fig" rid="F8">Figure 8</xref>.</p>
<fig id="F8" position="float">
<label>FIGURE 8</label>
<caption>
<p>Loss function and accuracy of CNN and MSCNN. <bold>(a)</bold> Loss function. <bold>(b)</bold> Accuracy graph.</p>
</caption>
<graphic xlink:href="fenrg-14-1635112-g008.tif">
<alt-text content-type="machine-generated">Two-line graphs comparing MSCNN and CNN in terms of loss and accuracy over epochs. (a) The loss graph shows MSCNN (solid blue) and CNN (solid red) decreasing rapidly before plateauing. (b) The accuracy graph depicts MSCNN (solid blue) achieving higher accuracy than CNN (solid red) throughout the epochs. Both graphs include training (solid) and testing (dotted/dashed) lines.</alt-text>
</graphic>
</fig>
<p>As illustrated in <xref ref-type="fig" rid="F8">Figure 8</xref>, the loss function and recognition accuracy curves for both MSCNN and CNN models are plotted over 200 training iterations for both training and testing datasets. The MSCNN achieves recognition accuracies exceeding 90% for both datasets after only 20 iterations, with the corresponding loss function dropping below 0.1. The performance curves then stabilize, indicating convergence without overfitting. In contrast, the CNN requires more than 40 iterations to reach stability, demonstrating that MSCNN converges faster and requires less training time in practical applications. The MSCNN achieves a peak accuracy of 90.40%, outperforming the CNN&#x2019;s maximum accuracy of 85.26%, indicating the CNN&#x2019;s limitations in correctly classifying certain samples. This performance gain confirms MSCNN&#x2019;s superior ability to extract parallel multi-scale features, which is critical for accurate multi crack detection in real world diagnostic scenarios. The results further indicate that crack defect features exhibit distinct multi-scale temporal characteristics, often masked by noise in the raw signal. A single scale CNN struggles to capture such complex patterns, whereas the MSCNN, by leveraging multi-scale convolution, effectively captures these variations, thereby improving detection and classification performance.</p>
</sec>
<sec id="s4-2-2">
<label>4.2.2</label>
<title>Comparison between traditional machine learning algorithms and MSCNN</title>
<p>To further evaluate the proposed deep learning approach for identifying crack defects in wind turbine spindles, a comparison was conducted with traditional machine learning methods. Previous studies (<xref ref-type="bibr" rid="B1">Chen et al., 2017</xref>; <xref ref-type="bibr" rid="B22">Wang et al., 2020</xref>; <xref ref-type="bibr" rid="B27">Zhou et al., 2016</xref>) have commonly applied discrete wavelet transform (DWT) to preprocess acoustic or vibration signals, followed by extracting features such as Mel frequency cepstral coefficients (MFCCs) or energy based descriptors. These were then classified using conventional algorithms such as support vector machines (SVM), back propagation (BP) neural networks, or ELM. In this study, these same features were used as input to SVM, BP, and ELM classifiers, and their recognition results are summarized in <xref ref-type="fig" rid="F9">Figure 9</xref>.</p>
<fig id="F9" position="float">
<label>FIGURE 9</label>
<caption>
<p>Histogram of recognition accuracy rates of five comparative algorithms.</p>
</caption>
<graphic xlink:href="fenrg-14-1635112-g009.tif">
<alt-text content-type="machine-generated">Bar chart comparing recognition accuracy percentages for five methods: BP, SVM, ELM, CNN, and MSCNN across five instances. MSCNN consistently shows the highest accuracy, averaging 90.008 percent. Each method&#x2019;s average is displayed in the accompanying table.</alt-text>
</graphic>
</fig>
<p>As shown in <xref ref-type="fig" rid="F9">Figure 9</xref>, the average recognition accuracies of the traditional models&#x2014;SVM, BP, and ELM&#x2014;are 76.45%, 79.56%, and 81.80%, respectively. These values are consistently lower than those of the deep learning models, including CNN and MSCNN. Among all models, MSCNN demonstrates the highest recognition performance, clearly surpassing traditional methods. This highlights the advantage of deep architectures, which can automatically learn and extract discriminative features from acoustic signals, overcoming the limitations of manually engineered features.</p>
<p>Unlike traditional algorithms, deep learning models capture features across multiple levels and temporal scales, enhancing classification accuracy. Specifically, the MSCNN achieves an average recognition accuracy of 90.008%, representing a 6.7% improvement over the best performing traditional model and a 10.01% gain compared to the standard CNN. By using convolution kernels of varying sizes to extract acoustic features in parallel at different temporal scales, MSCNN achieves richer and more discriminative feature representations, which are particularly effective for complex, multi type defects.</p>
<p>The experimental results confirm that MSCNN provides more comprehensive and accurate multi-level acoustic feature extraction, leading to significantly improved recognition performance. The use of multi-scale convolution kernels allows MSCNN to better capture transient signal characteristics, further validating its superiority over both traditional machine learning and single-scale CNN approaches.</p>
</sec>
<sec id="s4-2-3">
<label>4.2.3</label>
<title>Ablation study of MSCNN architectural components</title>
<p>A series of ablation experiments were conducted to quantify the contribution of key MSCNN components to overall performance. By individually removing or altering elements such as the number of branches, convolution kernel sizes, input feature types, network depth and width, fusion methods, and regularization strategies, their impact on the 10 run average accuracy was systematically evaluated.</p>
<p>
<xref ref-type="table" rid="T6">Table 6</xref> summarizes the ablation results, averaged over ten runs. The baseline configuration comprising three branches (k &#x2208; {7, 5, 3}), EZR input, GAP fusion, and dropout of 0.5 achieves an average accuracy of 89.39% and serves as the reference. Removing the multi-scale structure (A0) causes the largest performance drop (&#x2212;5.04%), underscoring the critical role of multi-scale feature extraction in capturing discriminative patterns. Replacing the EZR input with raw time-domain signals yields a &#x2212;5.26% decline, confirming the robustness and discriminative power of EZR features. Reducing the number of branches from three to two (A3) leads to a &#x2212;2.64% decrease, indicating that additional branches enhance feature diversity. Likewise, decreasing network depth (B1) or width (C1) reduces accuracy by &#x2212;2.79% and &#x2212;4.07%, respectively; conversely, increasing them (B3, C3) offers negligible gains (&#x2b;0.09% and &#x2212;0.25%), suggesting that the baseline already strikes a sound balance between complexity and generalization. Changing the fusion method from GAP to flattening (D2) reduces accuracy by &#x2212;2.45%, and removing dropout (D4) decreases it by &#x2212;2.89%, highlighting the importance of both fusion design and regularization. Adding an additional feature channel (EZR &#x2b; ZCR, F2) provides only a marginal gain (&#x2b;0.03%), indicating limited benefit relative to core components such as the multi-scale design and EZR input. Overall, these results demonstrate that multi-scale convolution, EZR input, balanced network depth and width, and GAP-based fusion are the most influential factors for achieving high recognition accuracy in crack defect identification.</p>
<table-wrap id="T6" position="float">
<label>TABLE 6</label>
<caption>
<p>Ablation study on key architectural components of MSCNN.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Id</th>
<th align="left">Variant</th>
<th align="left">Avg. Acc (%)</th>
<th align="left">&#x394;Acc</th>
<th align="left">Key change</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Ours</td>
<td align="left">Baseline (3-branch; k &#x3d; 7/5/3; EZR; GAP; dropout 0.5)</td>
<td align="left">89.39</td>
<td align="left">-</td>
<td align="left">-</td>
</tr>
<tr>
<td align="left">A0</td>
<td align="left">Single branch (k &#x3d; 3)</td>
<td align="left">84.35</td>
<td align="left">&#x2212;5.04</td>
<td align="left">Remove multi-scale</td>
</tr>
<tr>
<td align="left">A3</td>
<td align="left">Dual branch (k &#x3d; 7,5)</td>
<td align="left">86.75</td>
<td align="left">&#x2212;2.64</td>
<td align="left">Reduce one branch</td>
</tr>
<tr>
<td align="left">B1</td>
<td align="left">Depth &#x3d; 2 blocks</td>
<td align="left">86.60</td>
<td align="left">&#x2212;2.79</td>
<td align="left">Shallower network</td>
</tr>
<tr>
<td align="left">B3</td>
<td align="left">Depth &#x3d; 4 blocks</td>
<td align="left">89.48</td>
<td align="left">&#x2b;0.09</td>
<td align="left">Deeper network</td>
</tr>
<tr>
<td align="left">C1</td>
<td align="left">Channels &#x3d; 16&#x2192;32&#x2192;64</td>
<td align="left">85.32</td>
<td align="left">&#x2212;4.07</td>
<td align="left">Narrow network</td>
</tr>
<tr>
<td align="left">C3</td>
<td align="left">Channels &#x3d; 64&#x2192;128&#x2192;128</td>
<td align="left">89.14</td>
<td align="left">&#x2212;0.25</td>
<td align="left">Wider network</td>
</tr>
<tr>
<td align="left">D2</td>
<td align="left">Flatten instead of GAP</td>
<td align="left">86.94</td>
<td align="left">&#x2212;2.45</td>
<td align="left">Change fusion method</td>
</tr>
<tr>
<td align="left">D4</td>
<td align="left">No dropout</td>
<td align="left">86.50</td>
<td align="left">&#x2212;2.89</td>
<td align="left">Remove regularization</td>
</tr>
<tr>
<td align="left">F1</td>
<td align="left">Raw time domain input</td>
<td align="left">84.13</td>
<td align="left">&#x2212;5.26</td>
<td align="left">Remove EZR</td>
</tr>
<tr>
<td align="left">F2</td>
<td align="left">EZR &#x2b; ZCR (2-channel)</td>
<td align="left">89.42</td>
<td align="left">&#x2b;0.03</td>
<td align="left">Add extra feature</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec id="s4-3">
<label>4.3</label>
<title>Effect of multi-scale feature extraction</title>
<p>The experimental results demonstrate that the crack feature maps extracted by the MSCNN model at different scales exhibit strong discriminative ability. Compared with single-scale convolution, the multi-scale convolution approach significantly enhances the accuracy of crack detection.</p>
<p>As shown in <xref ref-type="fig" rid="F10">Figure 10</xref>, the t-distributed Stochastic Neighbor Embedding (t-SNE) method is used to visualize the dimensionally reduced feature distributions obtained through different feature extraction methods applied to acoustic signals from the wind turbine main shaft (<xref ref-type="bibr" rid="B19">van der Maaten and Hinton, 2008</xref>). These include the raw input signal, features extracted by a CNN, and features extracted by the MSCNN model trained on either known or unknown defect types. <xref ref-type="fig" rid="F10">Figure 10a</xref> shows that the raw signal lacks discernible structure, with feature points for various defects and inherent structural components appearing scattered and overlapping. This confirms that without feature extraction, differentiating between defect types is difficult, underscoring the necessity for advanced feature extraction methods. <xref ref-type="fig" rid="F10">Figure 10b</xref> presents the feature distribution obtained by the CNN model. Compared with the raw signal, the CNN extracted features exhibit preliminary clustering for inherent structure, minor defects, moderate defects, and major defects. However, overlap remains&#x2014;particularly between minor and moderate defects&#x2014;indicating that while CNN improves separability, it still struggles with complex defect distinctions. In <xref ref-type="fig" rid="F10">Figure 10c</xref>, the MSCNN model trained on known defect types yields distinctly separated clusters for each class. Feature points corresponding to inherent structure, minor, moderate, and major defects are tightly grouped, with minimal overlap. Notably, the boundary between minor and moderate defects is significantly clearer, demonstrating the superior feature extraction and classification performance of MSCNN when guided by known defect labels. <xref ref-type="fig" rid="F10">Figure 10d</xref> shows the feature distribution generated by the MSCNN model without prior knowledge of defect types. While separability remains superior to that achieved by CNN, it is less distinct than in the known-defect scenario. Some overlap between defect classes persists, particularly between minor and moderate defects. This suggests that although MSCNN can extract discriminative features in unsupervised contexts, the guidance of known labels markedly improves classification performance.</p>
<fig id="F10" position="float">
<label>FIGURE 10</label>
<caption>
<p>Feature maps after dimensionality reduction by t-SNE. <bold>(a)</bold> Input signal. <bold>(b)</bold> CNN with a single scale. <bold>(c)</bold> MSCNN (known defects). <bold>(d)</bold> MSCNN (unknown defects).</p>
</caption>
<graphic xlink:href="fenrg-14-1635112-g010.tif">
<alt-text content-type="machine-generated">Four scatter plot diagrams labeled (a) to (d), each depicting clusters of data points in various colors and shapes, representing defects and structures. Annotations indicate categories: major crack, minor crack, moderate crack, inherent structure, major defect, moderate defect, minor defect. Each plot shows different distributions and groupings, highlighting variations in defect severity and structural characteristics within the data.</alt-text>
</graphic>
</fig>
</sec>
</sec>
<sec sec-type="conclusion" id="s5">
<label>5</label>
<title>Conclusion</title>
<p>This study presents an intelligent crack detection method for wind turbine main shafts that leverages acoustic signature analysis and a MSCNN. The proposed EZR based segmentation algorithm effectively isolates crack features and demonstrates robustness in identifying both single and composite cracks. Experimental results show that multi scale feature learning outperforms single scale methods, achieving an average recognition accuracy of 90%, representing a 6.73% improvement over traditional models such as ELM and a 3.36% gain over single scale CNNs.</p>
<p>Despite these promising results, several practical challenges remain. Sensor installation during maintenance is time consuming, requiring 4&#x2013;6 h per turbine. Computational efficiency for real time applications also needs further optimization, with current inference times of 47 m on standard hardware and 215 m on edge devices. Cross platform validation confirmed good adaptability, with accuracy ranging from 83.9% to 87.2% across different turbine models. However, environmental testing revealed performance degradation under extreme conditions, highlighting the need for compensation and adaptation techniques.</p>
<p>Future work will focus on improving computational efficiency for edge devices, integrating additional sensor modalities, and extending the system to predict crack propagation. Developing decision support algorithms for autonomous maintenance recommendations will also be a priority. Overall, the proposed method offers a practical and effective solution for wind turbine fault diagnosis, though further refinements are required for large scale industrial deployment.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec sec-type="author-contributions" id="s7">
<title>Author contributions</title>
<p>LZ: Investigation, Writing &#x2013; original draft, Software, Writing &#x2013; review and editing. FL: Conceptualization, Writing &#x2013; review and editing, Writing &#x2013; original draft. SZ: Conceptualization, Writing &#x2013; original draft, Investigation. XZ: Data curation, Project administration, Formal Analysis, Methodology, Writing &#x2013; review and editing, Conceptualization, Writing &#x2013; original draft. GH: Investigation, Supervision, Project administration, Writing &#x2013; original draft, Methodology.</p>
</sec>
<sec sec-type="COI-statement" id="s9">
<title>Conflict of interest</title>
<p>Authors LZ, FL, and SZ were employed by CGN New Energy Investment (Shenzhen) Co., Ltd., Yunnan Branch.</p>
<p>The remaining author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
<p>The author(s) declared that this work received funding from Industry&#x2013;University Collaborative Research Project (Contract No. HZ2024K0212A). The funder had the following involvement in the study: study design and data collection. The funder had no role in the data analysis or interpretation, manuscript preparation, or the decision to publish.</p>
</sec>
<sec sec-type="ai-statement" id="s10">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="s11">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<fn-group>
<fn fn-type="custom" custom-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2529133/overview">Jesus Enrique Sierra Garcia</ext-link>, University of Burgos, Spain</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1707150/overview">Cheng Liu</ext-link>, Shanghai Jiao Tong University, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2140949/overview">Petr Dole&#x17e;el</ext-link>, University of Pardubice, Czechia</p>
</fn>
</fn-group>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zi</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2017</year>). <article-title>Wavelet transform-based feature extraction for fault diagnosis of wind turbine blades</article-title>. <source>Renew. Energy</source> <volume>102</volume>, <fpage>275</fpage>&#x2013;<lpage>287</lpage>.</mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Bearing fault diagnosis based on multi-scale CNN and LSTM model</article-title>. <source>J. Intelligent Manuf.</source> <volume>32</volume>, <fpage>971</fpage>&#x2013;<lpage>987</lpage>. <pub-id pub-id-type="doi">10.1007/s10845-020-01600-2</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Lou</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Ding</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>A rolling bearing fault diagnosis method via 2D feature map of CSCoh after denoising and MSCNN under different conditions</article-title>. <source>J. Vib. Control</source> <volume>30</volume> (<issue>5&#x2013;6</issue>), <fpage>1241</fpage>&#x2013;<lpage>1253</lpage>. <pub-id pub-id-type="doi">10.1177/10775463231158739</pub-id>
</mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cheng</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Lyu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Xie</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Ultrasonic inspection of the surface crack for the main shaft of a wind turbine from the end face</article-title>. <source>NDT E Int.</source> <volume>114</volume>, <fpage>102283</fpage>. <pub-id pub-id-type="doi">10.1016/j.ndteint.2020.102283</pub-id>
</mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ding</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Jiao</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Multi-objective iterative optimization algorithm based optimal wavelet filter selection for multi-fault diagnosis of rolling element bearings</article-title>. <source>ISA Trans.</source> <volume>88</volume>, <fpage>199</fpage>&#x2013;<lpage>215</lpage>. <pub-id pub-id-type="doi">10.1016/j.isatra.2018.12.010</pub-id>
<pub-id pub-id-type="pmid">30578001</pub-id>
</mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Feng</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Ni</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Zuo</surname>
<given-names>M. J.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Specifying roller-bearing clearance for wind turbine gearboxes: an experimental and theoretical investigation</article-title>. <source>IEEE Access</source> <volume>7</volume>, <fpage>103911</fpage>&#x2013;<lpage>103922</lpage>.</mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fu</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>MSCNN-AM: a multi-scale convolutional neural network with attention mechanisms for retinal vessel segmentation</article-title>. <source>IEEE Access</source> <volume>8</volume>, <fpage>163926</fpage>&#x2013;<lpage>163936</lpage>. <pub-id pub-id-type="doi">10.1109/access.2020.3022177</pub-id>
</mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gbashi</surname>
<given-names>S. M.</given-names>
</name>
<name>
<surname>Olatunji</surname>
<given-names>O. O.</given-names>
</name>
<name>
<surname>Adedeji</surname>
<given-names>P. A.</given-names>
</name>
<name>
<surname>Madushele</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>From academic to industrial research: a comparative review of advances in rolling element bearings for wind turbine main shaft</article-title>. <source>Eng. Fail. Anal.</source> <volume>163</volume>, <fpage>108510</fpage>. <pub-id pub-id-type="doi">10.1016/j.engfailanal.2024.108510</pub-id>
</mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guo</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhen</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Shi</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Gu</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Ball</surname>
<given-names>A. D.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Fault feature extraction for rolling element bearing diagnosis based on a multi-stage noise reduction method</article-title>. <source>Measurement</source> <volume>139</volume>, <fpage>226</fpage>&#x2013;<lpage>235</lpage>. <pub-id pub-id-type="doi">10.1016/j.measurement.2019.02.072</pub-id>
</mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hassan</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Viktor</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Al-Musawi</surname>
<given-names>T. J.</given-names>
</name>
<name>
<surname>Mahmood Ali</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Algburi</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Alzoubi</surname>
<given-names>H. M.</given-names>
</name>
<etal/>
</person-group> (<year>2024</year>). <article-title>The renewable energy role in the global energy transformations</article-title>. <source>Renew. Energy Focus</source> <volume>48</volume>, <fpage>100545</fpage>. <pub-id pub-id-type="doi">10.1016/j.ref.2024.100545</pub-id>
</mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hinton</surname>
<given-names>G. E.</given-names>
</name>
<name>
<surname>Osindero</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Teh</surname>
<given-names>Y. W.</given-names>
</name>
</person-group> (<year>2006</year>). <article-title>A fast learning algorithm for deep belief nets</article-title>. <source>Neural Comput.</source> <volume>18</volume> (<issue>7</issue>), <fpage>1527</fpage>&#x2013;<lpage>1554</lpage>. <pub-id pub-id-type="doi">10.1162/neco.2006.18.7.1527</pub-id>
<pub-id pub-id-type="pmid">16764513</pub-id>
</mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Huang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2019</year>). &#x201c;<article-title>Multi-format speech perception hashing based on time-frequency parameter fusion of energy zero ratio and frequency band variance</article-title>,&#x201d; in <source>Proceedings of the 2019 3rd International Conference on Electronic Information Technology and Computer Engineering (EITCE)</source>, <conf-loc>Xiamen, China</conf-loc>, <conf-date>18&#x2013;20 October 2019</conf-date> (<publisher-name>IEEE</publisher-name>), <fpage>243</fpage>&#x2013;<lpage>251</lpage>. <pub-id pub-id-type="doi">10.1109/EITCE47263.2019.9094822</pub-id>
</mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kumar Dora</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Bath</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Mitra</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Ernst</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Halinka</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Zychma</surname>
<given-names>D.</given-names>
</name>
<etal/>
</person-group> (<year>2025</year>). <article-title>The global electricity grid: a comprehensive review</article-title>. <source>Energies</source> <volume>18</volume> (<issue>5</issue>), <fpage>1152</fpage>. <pub-id pub-id-type="doi">10.3390/en18051152</pub-id>
</mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nejad</surname>
<given-names>A. R.</given-names>
</name>
<name>
<surname>Keller</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Sheng</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Polinder</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Watson</surname>
<given-names>S.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Wind turbine drivetrains: state-of-the-art technologies and future development trends</article-title>. <source>Wind Energy Sci.</source> <volume>7</volume> (<issue>1</issue>), <fpage>387</fpage>&#x2013;<lpage>411</lpage>. <pub-id pub-id-type="doi">10.5194/wes-7-387-2022</pub-id>
</mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Peng</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Gui</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Yuan</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Fault diagnosis method for rotating machinery based on MSCNN-MGAT</article-title>. <source>IEEE Trans. Instrum. Meas.</source> <volume>74</volume>, <fpage>2540511</fpage>. <pub-id pub-id-type="doi">10.1109/tim.2025.3587368</pub-id>
</mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Santelo</surname>
<given-names>T. N.</given-names>
</name>
<name>
<surname>de Oliveira</surname>
<given-names>C. M. R.</given-names>
</name>
<name>
<surname>Maciel</surname>
<given-names>C. D.</given-names>
</name>
<name>
<surname>de A. Monteiro</surname>
<given-names>J. R. B.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Wind turbine failures review and trends</article-title>. <source>J. Control Autom. Electr. Syst.</source> <volume>33</volume>, <fpage>1</fpage>&#x2013;<lpage>17</lpage>. <pub-id pub-id-type="doi">10.1007/s40313-021-00789-8</pub-id>
</mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Teng</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Ding</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Mu</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Fault diagnosis for a wind turbine planetary gearbox via novel method based on an extended cepstrum and MOMEDA</article-title>. <source>Appl. Sci.</source> <volume>9</volume>, <fpage>4355</fpage>.</mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>van der Maaten</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Hinton</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Visualizing data using t-SNE</article-title>. <source>J. Mach. Learn. Res.</source> <volume>9</volume>, <fpage>2579</fpage>&#x2013;<lpage>2605</lpage>.</mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Review of acoustic emission signal analysis for structural health monitoring</article-title>. <source>Sensors</source> <volume>23</volume> (<issue>1</issue>), <fpage>312</fpage>.</mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Order spectrum analysis for planetary gearbox fault detection via joint amplitude and frequency demodulation</article-title>. <source>Shock Vib.</source> <volume>2019</volume>, <fpage>9086538</fpage>.</mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Fault diagnosis of wind turbine gearbox using ELM and wavelet packet energy entropy</article-title>. <source>Measurement</source> <volume>165</volume>, <fpage>108076</fpage>.</mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Q. y.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Multi-format speech biohashing based on energy to zero ratio and improved lp-mmse parameter fusion</article-title>. <source>Multimed. Tools Appl.</source> <volume>80</volume>, <fpage>10013</fpage>&#x2013;<lpage>10036</lpage>. <pub-id pub-id-type="doi">10.1007/s11042-020-09701-z</pub-id>
</mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xia</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Qiao</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>A fault diagnosis approach for gears using improved spectral kurtosis, ensemble intrinsic time-scale decomposition and correlated feature selection</article-title>. <source>Appl. Sci.</source> <volume>10</volume>, <fpage>1879</fpage>.</mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Tian</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Fu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhong</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Hou</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Multi-scale convolutional neural network combining BiLSTM and attention mechanism for bearing fault diagnosis under multiple working conditions</article-title>. <source>Sci. Rep.</source> <volume>15</volume>, <fpage>13035</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-025-96137-w</pub-id>
<pub-id pub-id-type="pmid">40234523</pub-id>
</mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Few-shot learning for intelligent fault diagnosis: a survey</article-title>. <source>IEEE Trans. Ind. Inf.</source> <volume>18</volume> (<issue>6</issue>), <fpage>3762</fpage>&#x2013;<lpage>3772</lpage>.</mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>Wind turbine blade damage detection using wavelet packet decomposition and BP neural network</article-title>. <source>Mech. Syst. Signal Process.</source> <volume>70&#x2013;71</volume>, <fpage>103</fpage>&#x2013;<lpage>115</lpage>.<pub-id pub-id-type="pmid">70</pub-id>
</mixed-citation>
</ref>
</ref-list>
</back>
</article>