<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Electron.</journal-id>
<journal-title>Frontiers in Electronics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Electron.</abbrev-journal-title>
<issn pub-type="epub">2673-5857</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1654344</article-id>
<article-id pub-id-type="doi">10.3389/felec.2025.1654344</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Electronics</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>A hybrid LSTM&#x2013;transformer model for accurate remaining useful life prediction of lithium-ion batteries</article-title>
<alt-title alt-title-type="left-running-head">Zhao et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/felec.2025.1654344">10.3389/felec.2025.1654344</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Zhao</surname>
<given-names>Tianren</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Zhang</surname>
<given-names>Yanhui</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1087925/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Wang</surname>
<given-names>Minghao</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Feng</surname>
<given-names>Wei</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Cao</surname>
<given-names>Shengxian</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Wang</surname>
<given-names>Gong</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<role content-type="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences</institution>, <addr-line>Shenzhen</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>University of Macau</institution>, <addr-line>Taipa</addr-line>, <country>Macao SAR, China</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Northeast Electric Power University</institution>, <addr-line>Jilin</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1414047/overview">Xiaohu Yang</ext-link>, Xi&#x2019;an Jiaotong University, China</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1799340/overview">Bowen Zhou</ext-link>, Northeastern University, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1204971/overview">Wenji Song</ext-link>, Chinese Academy of Sciences (CAS), China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3132840/overview">Yushuang Liu</ext-link>, Wuhan University, China</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Yanhui Zhang, <email>zhangyh@siat.ac.cn</email>
</corresp>
</author-notes>
<pub-date pub-type="epub">
<day>21</day>
<month>08</month>
<year>2025</year>
</pub-date>
<pub-date pub-type="ecorrected">
<day>27</day>
<month>10</month>
<year>2025</year>
</pub-date>
<pub-date pub-type="collection">
<year>2025</year>
</pub-date>
<volume>6</volume>
<elocation-id>1654344</elocation-id>
<history>
<date date-type="received">
<day>26</day>
<month>06</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>04</day>
<month>08</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2025 Zhao, Zhang, Wang, Feng, Cao and Wang.</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Zhao, Zhang, Wang, Feng, Cao and Wang</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>With the widespread application of lithium-ion batteries in electric vehicles and energy storage systems, health monitoring and remaining useful life prediction have become critical components of battery management systems. To address the challenges posed by the high nonlinearity and long-term dependency in battery degradation modeling, this paper proposes a deep hybrid architecture that integrates Long Short-Term Memory networks with Transformer mechanisms, aiming to improve the accuracy and robustness of RUL prediction. Firstly, time-series samples are constructed from raw battery data, and physically consistent temperature-derived features&#x2014;including average temperature, temperature range, and temperature fluctuation&#x2014;are engineered. Data preprocessing is performed using standardization and Yeo-Johnson transformation. The model employs LSTM modules to capture local temporal patterns, while the Transformer modules extract global dependencies through multi-head self-attention mechanisms. These complementary features are fused to enable joint modeling of battery health states. The regression task is optimized using the Mean Squared Error loss function and trained with the Adam optimizer. Experimental results on the MIT battery dataset demonstrate the proposed model achieves excellent performance in a 7-step multi-point prediction task, with a Root Mean Square Error of 0.0085, Mean Absolute Percentage Error of 0.0200, and a coefficient of determination of 0.9902. Compared with alternative models such as MC-LSTM and XGBoost-LSTM, the proposed model exhibits superior accuracy and stability. Residual analysis and visualization further confirm the model&#x2019;s unbiased and stable predictive capability. This study shows that the LSTM-Transformer hybrid architecture offers significant potential in modeling complex battery degradation processes and enhancing RUL prediction accuracy, providing effective technical support for the development of intelligent battery health management systems.</p>
</abstract>
<kwd-group>
<kwd>lithium-ion battery</kwd>
<kwd>remaining useful life</kwd>
<kwd>LSTM</kwd>
<kwd>transformer</kwd>
<kwd>time-series prediction</kwd>
</kwd-group>
<counts>
<page-count count="12"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Power Electronics</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>With the global transition toward cleaner energy and the rapid advancement of electrification technologies, lithium-ion batteries have emerged as essential energy storage components in electric vehicles, renewable energy storage systems, and portable electronic devices (<xref ref-type="bibr" rid="B19">Wang et al., 2023</xref>). Owing to their high energy density, long cycle life, and low self-discharge rate, lithium-ion batteries have become increasingly important. However, their performance inevitably degrades over time due to repeated charge-discharge cycles, leading to capacity fade and a shortened remaining useful life (RUL) (<xref ref-type="bibr" rid="B17">Wang et al., 2021</xref>). Accurate prediction of battery state of health (SOH) and RUL is crucial for optimizing battery management, extending service life, reducing maintenance costs, and ensuring system safety (<xref ref-type="bibr" rid="B4">Elmahallawy et al., 2022</xref>; <xref ref-type="bibr" rid="B22">Yao et al., 2021</xref>). In particular, battery failure in electric vehicles or large-scale energy storage systems can result in significant safety hazards and economic losses, making the development of high-accuracy RUL prediction methods a pressing research focus.</p>
<p>Precise RUL prediction plays a vital role in optimizing battery management systems. First, it provides a scientific basis for battery replacement and maintenance decisions, thereby lowering operational costs (<xref ref-type="bibr" rid="B16">Tong et al., 2021</xref>). Second, it enables early identification of potential failures, thus enhancing system safety (<xref ref-type="bibr" rid="B21">Yang et al., 2023</xref>). Moreover, accurate RUL estimates support battery recycling and second-life applications, contributing to sustainable resource utilization. Nevertheless, several challenges hinder effective RUL prediction. The degradation process is influenced by multiple factors such as temperature, charge/discharge rates, and usage scenarios, exhibiting high nonlinearity and complexity (<xref ref-type="bibr" rid="B15">Sharma and Bora, 2022</xref>). Real-world operational data often contain noise and missing values, increasing the difficulty of modeling (<xref ref-type="bibr" rid="B9">Li et al., 2023</xref>). Furthermore, long-term prediction requires models that can simultaneously capture short-term fluctuations and long-term trends, which is difficult for single-model architectures to achieve.</p>
<p>Traditional RUL prediction approaches can be categorized into physics-based and data-driven methods. Physics-based models rely on the electrochemical mechanisms of batteries (<xref ref-type="bibr" rid="B11">Liu et al., 2022</xref>), using complex mathematical formulations to describe the degradation process. However, these methods require detailed knowledge of material properties and operating conditions, involve high computational complexity, and often lack generalizability across different battery types. In contrast, data-driven approaches have gained popularity by learning patterns directly from operational data (<xref ref-type="bibr" rid="B7">Ji et al., 2024</xref>). With advances in sensor technology and data acquisition capabilities, these methods can effectively model battery behavior using features such as voltage, current, and temperature, showing improved adaptability and predictive accuracy.</p>
<p>Data-driven RUL prediction techniques generally fall into three categories: statistical models (<xref ref-type="bibr" rid="B3">Crawford et al., 2021</xref>), machine learning methods (<xref ref-type="bibr" rid="B23">Zhang L. et al., 2022</xref>), and deep learning models (<xref ref-type="bibr" rid="B24">Zhang D. et al., 2022</xref>). Statistical approaches, such as Kalman filtering and particle filtering, model battery degradation probabilistically. For example, <xref ref-type="bibr" rid="B12">Nunes et al. (2023)</xref> proposed an online RUL estimation method for second-life lithium-ion batteries based on an unscented Kalman filter and degradation curve modeling, validated on six different second-life battery datasets. Despite some success&#x2014;achieving a worst-case mean absolute percentage error (MAPE) of 5.279% and an R<sup>2</sup> score of 0.726&#x2014;statistical models often struggle with nonlinear or complex degradation behaviors. Machine learning approaches such as support vector machines, random forests, and XGBoost have demonstrated promising results in RUL prediction through hand-crafted features. <xref ref-type="bibr" rid="B6">Jafari and Byun (2022)</xref> introduced a hybrid RUL prediction method based on particle filtering and Kalman filtering, where XGBoost was used as the observation model due to its strong nonlinear fitting capabilities. Despite high predictive accuracy based on full-cycle test data, such methods are heavily dependent on the quality of feature engineering and may suffer from overfitting or inefficiency when applied to high-dimensional time-series data.</p>
<p>The emergence of deep learning has opened new avenues for RUL prediction. Long Short-Term Memory (LSTM) networks, known for their capability in modeling temporal dependencies, have been widely adopted for battery degradation modeling. LSTM networks utilize gating mechanisms to effectively capture long-term dependencies, making them well-suited for modeling the nonlinear degradation process of batteries. <xref ref-type="bibr" rid="B14">Reza et al. (2024)</xref> proposed an improved method combining LSTM with the Gravitational Search Algorithm (GSA), using data cleaning to remove noise, replacing anomalies with highly correlated data, and applying normalization. GSA was employed to optimize the LSTM hyperparameters to address key challenges in battery life prediction. However, LSTM models may still face limitations such as vanishing gradients and computational inefficiencies when dealing with long sequences. Recently, Transformer models have demonstrated excellent performance in natural language processing and time-series analysis tasks due to their strong capability in extracting global features (<xref ref-type="bibr" rid="B2">Chen et al., 2022</xref>; <xref ref-type="bibr" rid="B5">Han et al., 2023</xref>). The Transformer architecture processes sequences in parallel through multi-head attention mechanisms, effectively capturing long-range dependencies. Nevertheless, its application in battery RUL prediction remains relatively unexplored.</p>
<p>Given the above challenges, accurate RUL prediction remains difficult due to the following key factors,<list list-type="simple">
<list-item>
<p>1. The degradation process is highly nonlinear and influenced by external factors such as temperature, cycling rate, and usage scenarios;</p>
</list-item>
<list-item>
<p>2. Real-world data are often noisy and incomplete, complicating the modeling process;</p>
</list-item>
<list-item>
<p>3. Long-term prediction tasks require models to simultaneously capture short-term variations and long-term trends, which is difficult for single models to handle effectively.</p>
</list-item>
</list>
</p>
<p>To address these challenges, this study develops a hybrid deep learning model that captures both local temporal dependencies and global contextual features. As illustrated in <xref ref-type="fig" rid="F1">Figure 1</xref>, we propose a novel LSTM-Transformer hybrid model based on the MIT battery dataset to enhance prediction accuracy and robustness. The LSTM module extracts local temporal dynamics from time-series inputs, while the Transformer module, with its attention mechanism, captures global feature dependencies. The integration of both allows the model to effectively represent complex degradation patterns.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Schematic diagram of the LSTM-Transformer hybrid model architecture.</p>
</caption>
<graphic xlink:href="felec-06-1654344-g001.tif">
<alt-text content-type="machine-generated">Flowchart illustrating the LSTM-Transformer hybrid architecture for battery data processing. It includes steps: raw data collection (voltage, SOC, temperature, current), data sanitization, feature design, Z-score standardization, Yeo-Johnson transformation, and sample construction. The prepared input goes through an attention mechanism, multi-head attention, and feed-forward layers, concluding with output generation.</alt-text>
</graphic>
</fig>
<p>The main contributions of this work are as follows,<list list-type="simple">
<list-item>
<p>1. A novel hybrid deep learning architecture combining local temporal modeling and global attention mechanisms is proposed for lithium-ion battery RUL prediction.</p>
</list-item>
<list-item>
<p>2. Temperature-based features are engineered based on battery physical mechanisms to enhance input feature expressiveness.</p>
</list-item>
<list-item>
<p>3. The proposed model is validated on the MIT battery dataset, demonstrating superior prediction accuracy and robustness compared to existing methods, with strong potential for real-world applications.</p>
</list-item>
</list>
</p>
<p>The remainder of this paper is organized as follows: <xref ref-type="sec" rid="s2">Section 2</xref> introduces the data preprocessing and feature engineering methods; <xref ref-type="sec" rid="s3">Section 3</xref> details the architecture and training process of the LSTM-Transformer hybrid model; <xref ref-type="sec" rid="s4">Section 4</xref> presents experimental results and performance evaluation; <xref ref-type="sec" rid="s5">Section 5</xref> concludes the paper with a summary of findings and future work directions. Through this research, we aim to provide an efficient and accurate solution for lithium-ion battery RUL prediction, offering theoretical and technical support for optimized battery management systems.</p>
</sec>
<sec id="s2">
<title>2 Data preprocessing and feature engineering</title>
<p>Accurate prediction of the remaining useful life of lithium-ion batteries critically depends on high-quality data preprocessing and feature engineering. To construct a time-series modeling&#x2013;ready prediction dataset, this study systematically performs data cleaning, temperature feature extraction, normalization, target variable transformation, and sample construction based on a sliding window. By incrementally sliding a fixed-length window over the time-series data to extract local segments, representative features or sequential samples are generated. This approach facilitates the capture of local temporal dependencies and dynamic variations (<xref ref-type="bibr" rid="B10">Lin et al., 2024</xref>), thereby ensuring that the input data fed into the model possesses strong representativeness and consistency.</p>
<p>The original dataset used in this work is the MIT Battery Dataset, which includes operational data from multiple batteries under various charging and discharging conditions. Key variables include voltage, current, and temperature. Let the raw feature matrix be denoted as <inline-formula id="inf1">
<mml:math id="m1">
<mml:mrow>
<mml:mi>x</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, where <inline-formula id="inf2">
<mml:math id="m2">
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> represents the number of samples and <inline-formula id="inf3">
<mml:math id="m3">
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> indicates the number of feature dimensions. The target variable is denoted as <inline-formula id="inf4">
<mml:math id="m4">
<mml:mrow>
<mml:mi>y</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:mi>n</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>To explore the linear relationships between different features, we construct a Pearson correlation-based heatmap, as illustrated in <xref ref-type="fig" rid="F2">Figure 2a</xref>. In this heatmap, red indicates strong positive correlations, while blue denotes strong negative correlations. The heatmap reveals significant correlations among several voltage-related, capacity-related, and temperature-related features. Based on this analysis, redundant features are removed to improve the generalization ability of the model and to mitigate issues related to multicollinearity.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>
<bold>(a)</bold> Feature correlation heatmap based on Pearson coefficients. <bold>(b)</bold> Feature space distribution of battery life using Principal Component Analysis.</p>
</caption>
<graphic xlink:href="felec-06-1654344-g002.tif">
<alt-text content-type="machine-generated">(a) Heatmap showing feature correlations, with values ranging from negative (blue) to positive (red), displaying interrelationships among features labeled T1-0 to T3-3 and Lifetime. (b) Scatter plot of PCA projection, illustrating distribution along PCA components 1 and 2, with data points colored by battery lifetime from 500 to 2000.</alt-text>
</graphic>
</fig>
<p>To comprehensively evaluate the structure of the feature space, Principal Component Analysis was applied to the high-dimensional feature data after redundancy removal, as shown in <xref ref-type="fig" rid="F2">Figure 2b</xref>. The first two principal components were visualized, with a color gradient representing the corresponding battery RUL values. This visualization enables the observation of degradation trends within the feature space. The results indicate that samples with different RUL levels exhibit clear clustering patterns in the two-dimensional PCA space, demonstrating the discriminative capability of the extracted features in characterizing battery degradation states.</p>
<p>Temperature, as a critical factor influencing lithium-ion battery aging and performance degradation, plays a key role in modeling degradation behavior. Analysis reveals that the raw temperature features (T1-0 to T3-3) exhibit significant dynamic fluctuations during battery operation and are highly correlated with changes in the RUL curve.</p>
<p>To this end, three types of temperature-derived features are designed: temperature mean, temperature range, and temperature fluctuation. The temperature mean reflects the overall thermal load level during battery operation. Elevated operating temperatures accelerate electrolyte decomposition, solid electrolyte interphase (SEI) layer growth, and structural degradation of electrode materials, which are key contributors to capacity fade and internal resistance increase. The temperature range measures the amplitude of temperature variation within each time window, indicating the degree of thermal stress fluctuation. Frequent and intense thermal stress cycles may induce mechanical fatigue or even cracking of electrode particles, exacerbating material degradation and performance decline. Temperature fluctuation, quantified by the standard deviation, characterizes the local instability of temperature over time, typically associated with abnormal conditions such as high-current charge/discharge events and cooling system failures. These factors are prone to cause localized hotspots and accelerate undesirable electrochemical side reactions, ultimately shortening battery life.</p>
<p>For each temperature sensor group, the temperature mean is defined as shown in <xref ref-type="disp-formula" rid="e1">Equation 1</xref>,<disp-formula id="e1">
<mml:math id="m5">
<mml:mrow>
<mml:msubsup>
<mml:mi>T</mml:mi>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:munderover>
</mml:mstyle>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>
</p>
<p>In this context, <inline-formula id="inf5">
<mml:math id="m6">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> denotes the <inline-formula id="inf6">
<mml:math id="m7">
<mml:mrow>
<mml:msup>
<mml:mi>j</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> temperature channel within the <inline-formula id="inf7">
<mml:math id="m8">
<mml:mrow>
<mml:msup>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> group, where <inline-formula id="inf8">
<mml:math id="m9">
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>4</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> represents the number of channels in each group. The corresponding temperature difference range is defined as shown in <xref ref-type="disp-formula" rid="e2">Equation 2</xref>,<disp-formula id="e2">
<mml:math id="m10">
<mml:mrow>
<mml:msubsup>
<mml:mi>T</mml:mi>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>g</mml:mi>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:munder>
<mml:mi>max</mml:mi>
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>j</mml:mi>
<mml:mo>&#x3c;</mml:mo>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:munder>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:munder>
<mml:mi>min</mml:mi>
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>j</mml:mi>
<mml:mo>&#x3c;</mml:mo>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:munder>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>
</p>
<p>The above two features respectively characterize the central tendency and extreme dispersion of each temperature group, which can reflect phenomena such as localized overheating or abnormal heat dissipation. In addition, to capture the overall fluctuation level of the thermal behavior, the standard deviation across all temperature channels is calculated as shown in <xref ref-type="disp-formula" rid="e3">Equation 3</xref> and used as an indicator of temperature volatility.<disp-formula id="e3">
<mml:math id="m11">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>p</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msqrt>
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>K</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mover accent="true">
<mml:mi>T</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:msqrt>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>
</p>
<p>Let <inline-formula id="inf9">
<mml:math id="m12">
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>12</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> denote the total number of temperature channels, <inline-formula id="inf10">
<mml:math id="m13">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> represent the value of the <inline-formula id="inf11">
<mml:math id="m14">
<mml:mrow>
<mml:msup>
<mml:mi>k</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> temperature channel, and <inline-formula id="inf12">
<mml:math id="m15">
<mml:mrow>
<mml:mover accent="true">
<mml:mi>T</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula> be the mean temperature across all channels. These three temperature-derived features not only enhance the semantic expressiveness of the data but also provide physically consistent inputs aligned with the underlying battery aging mechanisms. To eliminate dimensional discrepancies among features and to improve training stability, as shown in <xref ref-type="disp-formula" rid="e4">Equation 4</xref>, Z-score normalization is applied to the feature matrix, ensuring that each feature has zero mean and unit variance before being fed into the model.<disp-formula id="e4">
<mml:math id="m16">
<mml:mrow>
<mml:msup>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>X</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>
</p>
<p>Let <inline-formula id="inf13">
<mml:math id="m17">
<mml:mrow>
<mml:mi>&#x3bc;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf14">
<mml:math id="m18">
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> denote the mean and standard deviation of each feature column, respectively, and <inline-formula id="inf15">
<mml:math id="m19">
<mml:mrow>
<mml:msup>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> represent the normalized feature matrix. This normalization ensures that all features follow an approximately zero-mean and unit-variance distribution, which facilitates faster convergence of gradient descent during model training and enhances generalization performance. Meanwhile, the Yeo-Johnson transformation (<xref ref-type="bibr" rid="B1">Bao-Hua et al., 2024</xref>) is applied to the target variable for nonlinear processing. This parameterized transformation adjusts the data distribution to approximate a normal distribution, thereby enhancing the stability and accuracy of subsequent model training.</p>
<p>The transformation is defined as shown in <xref ref-type="disp-formula" rid="e5">Equation 5</xref>,<disp-formula id="e5">
<mml:math id="m20">
<mml:mrow>
<mml:msup>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="{" close="" separators="|">
<mml:mrow>
<mml:mtable columnalign="left">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>y</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:msup>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mfrac>
<mml:mo>,</mml:mo>
<mml:mi>y</mml:mi>
<mml:mo>&#x2265;</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
<mml:mo>&#x2260;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi>ln</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>y</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mi>y</mml:mi>
<mml:mo>&#x2265;</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>y</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
<mml:mi>y</mml:mi>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
<mml:mo>&#x2260;</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>ln</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>y</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mi>y</mml:mi>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>&#x3bb;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(5)</label>
</disp-formula>
</p>
<p>Here, <inline-formula id="inf16">
<mml:math id="m21">
<mml:mrow>
<mml:mi>&#x3bb;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> denotes the transformation parameter, which is automatically estimated using the maximum likelihood method. The transformed variable <inline-formula id="inf17">
<mml:math id="m22">
<mml:mrow>
<mml:msup>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> exhibits a more symmetric distribution, which is beneficial for subsequent model convergence and stable error control.</p>
<p>To accommodate the requirements of deep learning-based time series modeling, the dataset is reconstructed into a sliding window format. As illustrated in <xref ref-type="fig" rid="F3">Figure 3</xref>, let the input window length be <inline-formula id="inf18">
<mml:math id="m23">
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>30</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> and the prediction horizon be <inline-formula id="inf19">
<mml:math id="m24">
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mrow>
<mml:mi>o</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>7</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>. For the normalized feature matrix <inline-formula id="inf20">
<mml:math id="m25">
<mml:mrow>
<mml:msup>
<mml:mi>x</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and the transformed target variable <inline-formula id="inf21">
<mml:math id="m26">
<mml:mrow>
<mml:msup>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:mi>n</mml:mi>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, as shown in <xref ref-type="disp-formula" rid="e6">Equation 6</xref>, each training sample is constructed from the following subsequences:<disp-formula id="e6">
<mml:math id="m27">
<mml:mrow>
<mml:mtable columnalign="left">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>:</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mi>y</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>:</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mrow>
<mml:mi>o</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mrow>
<mml:mi>o</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:msup>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
<label>(6)</label>
</disp-formula>
</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Illustration of the sliding window input features.</p>
</caption>
<graphic xlink:href="felec-06-1654344-g003.tif">
<alt-text content-type="machine-generated">Line chart titled &#x22;Sliding Window Input Features Example&#x22; displays the standardized feature values against time steps. Three lines, T1_mean (blue), T2_mean (orange), and T3_mean (green), show variations with noticeable peaks around time steps 11 and 24. The y-axis ranges from negative two to five.</alt-text>
</graphic>
</fig>
<p>The total number of samples that can be constructed from the dataset is given by <xref ref-type="disp-formula" rid="e7">Equation 7</xref>,<disp-formula id="e7">
<mml:math id="m28">
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>n</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mrow>
<mml:mi>o</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
<label>(7)</label>
</disp-formula>
</p>
<p>The final dataset was split into training and testing sets at a ratio of 8:2, with the training set randomly shuffled to enhance sample diversity and training robustness. The data preprocessing pipeline significantly improved the semantic representation and structural compatibility of the input data. The design of temperature-derived features was closely aligned with the underlying battery physical mechanisms. Standardization and nonlinear transformation ensured numerical stability during model training, while the sliding window data construction effectively captured the dynamic evolution of battery degradation. Together, these steps laid a solid foundation for subsequent battery life prediction modeling based on the LSTM-Transformer architecture.</p>
</sec>
<sec id="s3">
<title>3 Model architecture and training process</title>
<p>To achieve high-precision regression prediction of the remaining useful life of lithium-ion batteries, this study designs a deep neural network model that integrates Long Short-Term Memory networks with the Transformer architecture. The model leverages the strength of LSTM in capturing local temporal dynamics in time series data and the powerful capability of Transformer in modeling global dependencies, thereby enhancing the ability to characterize the evolving performance trends of batteries. Time series features are fed into two parallel subnetworks for separate encoding, followed by feature-level fusion to ultimately predict the battery life over multiple future time steps.</p>
<sec id="s3-1">
<title>3.1 LSTM model</title>
<p>In lithium-ion battery life prediction, operational features such as voltage, temperature, and current exhibit pronounced temporal correlations. Single-step or short-range modeling approaches often fail to capture the complex evolutionary processes. The Long Short-Term Memory network employs gating mechanisms to propagate information along the temporal dimension, enabling the model to capture nonlinear dynamic changes with long-term dependencies. <xref ref-type="fig" rid="F4">Figure 4</xref> illustrates the neural network architecture of the LSTM.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Computational process of the LSTM neural network.</p>
</caption>
<graphic xlink:href="felec-06-1654344-g004.tif">
<alt-text content-type="machine-generated">Diagram of a Long Short-Term Memory (LSTM) cell. It shows three main gates: input, forget, and output, represented by sigma and tanh functions. Inputs include previous cell state \( C_{t-1} \), previous hidden state \( h_{t-1} \), and current input \( X_t \). The operations involve element-wise multiplication and addition, controlling information flow, and updating the current cell state \( C_t \) and hidden state \( h_t \).</alt-text>
</graphic>
</fig>
<p>The fundamental computational process of the LSTM is as follows. The forget gate determines whether to retain the cell state from the previous time step <inline-formula id="inf22">
<mml:math id="m29">
<mml:mrow>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> at the current time step, and its formulation is given by <xref ref-type="disp-formula" rid="e8">Equation 8</xref>,<disp-formula id="e8">
<mml:math id="m30">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
<mml:mo>&#xb7;</mml:mo>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(8)</label>
</disp-formula>
</p>
<p>Here, <inline-formula id="inf23">
<mml:math id="m31">
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula> represents the concatenation of the previous hidden state <inline-formula id="inf24">
<mml:math id="m32">
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and the current input features <inline-formula id="inf25">
<mml:math id="m33">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>; <inline-formula id="inf26">
<mml:math id="m34">
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf27">
<mml:math id="m35">
<mml:mrow>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>f</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> denote learnable parameters; <inline-formula id="inf28">
<mml:math id="m36">
<mml:mrow>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mo>&#xb7;</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is the sigmoid activation function, whose output ranges from 0 to 1, indicating the retention proportion.</p>
<p>The input gate controls how much of the current input information is written into the cell state, consisting of <xref ref-type="disp-formula" rid="e9">Equations 9</xref>, <xref ref-type="disp-formula" rid="e10">10</xref>,<disp-formula id="e9">
<mml:math id="m37">
<mml:mrow>
<mml:msub>
<mml:mi>i</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#xb7;</mml:mo>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:msub>
<mml:mi>C</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(9)</label>
</disp-formula>
<disp-formula id="e10">
<mml:math id="m38">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>c</mml:mi>
<mml:mo>&#x223c;</mml:mo>
</mml:mover>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>tanh</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
<mml:mo>&#xb7;</mml:mo>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(10)</label>
</disp-formula>
</p>
<p>Here, <inline-formula id="inf29">
<mml:math id="m39">
<mml:mrow>
<mml:msub>
<mml:mi>i</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> denotes the input gate weights, <inline-formula id="inf30">
<mml:math id="m40">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>c</mml:mi>
<mml:mo>&#x223c;</mml:mo>
</mml:mover>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> represents the candidate cell state, and the hyperbolic tangent function <inline-formula id="inf31">
<mml:math id="m41">
<mml:mrow>
<mml:mi>tanh</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mo>&#xb7;</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> ensures that the output range is within [&#x2212;1,1], thereby enhancing the model&#x2019;s nonlinear fitting capability.</p>
<p>The cell state is updated by combining the weights of the forget gate and the input gate to revise the memory from the previous time step, as shown in <xref ref-type="disp-formula" rid="e11">Equation 11</xref>,<disp-formula id="e11">
<mml:math id="m42">
<mml:mrow>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2299;</mml:mo>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>i</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2299;</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi>c</mml:mi>
<mml:mo>&#x223c;</mml:mo>
</mml:mover>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(11)</label>
</disp-formula>
</p>
<p>In this equation, <inline-formula id="inf32">
<mml:math id="m43">
<mml:mrow>
<mml:mo>&#x2299;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> denotes element-wise multiplication, and the final current cell state <inline-formula id="inf33">
<mml:math id="m44">
<mml:mrow>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is obtained, enabling long-range retention of critical historical information.</p>
<p>The output gate determines the amount of information output as the current hidden state, expressed as shown in <xref ref-type="disp-formula" rid="e12">Equations 12</xref>, <xref ref-type="disp-formula" rid="e13">13</xref>,<disp-formula id="e12">
<mml:math id="m45">
<mml:mrow>
<mml:msub>
<mml:mi>o</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3c3;</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>o</mml:mi>
</mml:msub>
<mml:mo>&#xb7;</mml:mo>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mi>o</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(12)</label>
</disp-formula>
<disp-formula id="e13">
<mml:math id="m46">
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>o</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2299;</mml:mo>
<mml:mi>tanh</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>c</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(13)</label>
</disp-formula>
</p>
<p>Here, <inline-formula id="inf34">
<mml:math id="m47">
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> represents the hidden state at the current time step, serving as the response to the current sequential input and facilitating information flow to subsequent layers. Through the aforementioned gating mechanisms, the LSTM can effectively learn the stage-wise patterns and long-term dependencies in time series data, enabling accurate modeling of the performance evolution process in prediction tasks.</p>
</sec>
<sec id="s3-2">
<title>3.2 Transformer model</title>
<p>Although LSTM performs well in sequence modeling, it suffers from gradient decay and low training efficiency when handling long-term dependencies. To address these issues, the Transformer architecture is introduced, which establishes direct connections between any positions within the sequence through a multi-head self-attention mechanism, thereby enhancing the model&#x2019;s ability to capture global dynamics. The core computation in the Transformer is the scaled dot-product attention, defined as shown in <xref ref-type="disp-formula" rid="e14">Equation 14</xref>,<disp-formula id="e14">
<mml:math id="m48">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>Q</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>K</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>V</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mtext>softmax</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mi>Q</mml:mi>
<mml:msup>
<mml:mi>K</mml:mi>
<mml:mi>T</mml:mi>
</mml:msup>
</mml:mrow>
<mml:msqrt>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:msqrt>
</mml:mfrac>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mi>V</mml:mi>
</mml:mrow>
</mml:math>
<label>(14)</label>
</disp-formula>
</p>
<p>Here, Q, K, and V represent the Query, Key, and Value matrices, respectively, and <inline-formula id="inf35">
<mml:math id="m49">
<mml:mrow>
<mml:msqrt>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:msqrt>
</mml:mrow>
</mml:math>
</inline-formula> is a scaling factor used to prevent numerical instability. Attention weights are obtained via a softmax operation, enabling a weighted fusion of information across all time steps.</p>
<p>Since the Transformer lacks an explicit sequential structure, positional encoding is introduced to preserve temporal order. The fixed sinusoidal positional encoding scheme is employed as shown in <xref ref-type="disp-formula" rid="e15">Equation 15</xref>,<disp-formula id="e15">
<mml:math id="m50">
<mml:mrow>
<mml:mtable columnalign="left">
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>s</mml:mi>
<mml:mo>,</mml:mo>
<mml:mn>2</mml:mn>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>sin</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:msup>
<mml:mn>10000</mml:mn>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mi>i</mml:mi>
<mml:mo>/</mml:mo>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mtext>model</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mfrac>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mrow>
<mml:mi>P</mml:mi>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>s</mml:mi>
<mml:mo>,</mml:mo>
<mml:mn>2</mml:mn>
<mml:mi>i</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>cos</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:msup>
<mml:mn>10000</mml:mn>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mi>i</mml:mi>
<mml:mo>/</mml:mo>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mtext>model</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:msup>
</mml:mfrac>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:math>
<label>(15)</label>
</disp-formula>
</p>
<p>Here, <inline-formula id="inf36">
<mml:math id="m51">
<mml:mrow>
<mml:mi>p</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> denotes the position index of the current time step, <inline-formula id="inf37">
<mml:math id="m52">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> represents the dimension index, and <inline-formula id="inf38">
<mml:math id="m53">
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mtext>model</mml:mtext>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the embedding dimension. This encoding scheme enables the model to perceive the sequential order, thereby allowing it to capture temporal patterns such as periodicity and trends during modeling.</p>
<p>In the proposed model, the Transformer consists of two stacked encoder layers, each comprising a multi-head attention sublayer and a feed-forward neural network. The output is a sequence-level global feature representation, which is subsequently aggregated via average pooling to obtain a fixed-length vector <inline-formula id="inf39">
<mml:math id="m54">
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</sec>
<sec id="s3-3">
<title>3.3 Feature fusion and prediction output</title>
<p>Considering the complementary strengths of LSTM and Transformer in modeling different aspects of sequential data, a feature-level fusion strategy is employed. The hidden representations generated by each sub-network are concatenated to form a unified feature vector for downstream prediction. The fusion process is formulated as shown in <xref ref-type="disp-formula" rid="e16">Equation 16</xref>,<disp-formula id="e16">
<mml:math id="m55">
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>C</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>c</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>T</mml:mi>
<mml:mi>M</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(16)</label>
</disp-formula>
</p>
<p>Where <inline-formula id="inf40">
<mml:math id="m56">
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>L</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>T</mml:mi>
<mml:mi>M</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:mn>64</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> denotes the final hidden state output from the LSTM branch, and <inline-formula id="inf41">
<mml:math id="m57">
<mml:mrow>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:mn>64</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> represents the global feature vector obtained by average pooling from the Transformer module. These two components are concatenated to form a 128-dimensional fused vector. This fused representation is then passed through fully connected layers to perform multi-step regression prediction, as defined by <xref ref-type="disp-formula" rid="e17">Equation 17</xref>,<disp-formula id="e17">
<mml:math id="m58">
<mml:mrow>
<mml:mover accent="true">
<mml:mi>Y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>o</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mi>h</mml:mi>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mi>o</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(17)</label>
</disp-formula>
</p>
<p>Where <inline-formula id="inf42">
<mml:math id="m59">
<mml:mrow>
<mml:mover accent="true">
<mml:mi>Y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi mathvariant="double-struck">R</mml:mi>
<mml:mn>7</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> denotes the predicted remaining useful life percentages for the next seven time steps, <inline-formula id="inf43">
<mml:math id="m60">
<mml:mrow>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mi>o</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf44">
<mml:math id="m61">
<mml:mrow>
<mml:msub>
<mml:mi>b</mml:mi>
<mml:mrow>
<mml:mi>o</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> are the parameters of the linear projection. This structure enables the model to perform multi-step forecasting of long-term degradation trends, thereby supporting early warning and precise management of battery life. To achieve multi-step prediction of battery remaining useful life, a parallel forecasting strategy is adopted. Given a fixed-length historical input window, the model performs a single forward pass to directly output the target value sequence over the entire prediction horizon. This approach effectively avoids error accumulation during the prediction process and improves both prediction stability and computational efficiency.</p>
</sec>
<sec id="s3-4">
<title>3.4 Loss function and optimization strategy</title>
<p>The prediction of the remaining useful life of lithium-ion batteries is essentially a regression task, where the target variable is a continuous percentage value. Therefore, the mean squared error (MSE) is adopted as the loss function for optimization. It is defined as shown in <xref ref-type="disp-formula" rid="e18">Equation 18</xref>,<disp-formula id="e18">
<mml:math id="m62">
<mml:mrow>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>E</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:msup>
<mml:mrow>
<mml:mfenced open="&#x2016;" close="&#x2016;" separators="|">
<mml:mrow>
<mml:msup>
<mml:mover accent="true">
<mml:mi>Y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2212;</mml:mo>
<mml:msup>
<mml:mi>Y</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:math>
<label>(18)</label>
</disp-formula>
</p>
<p>The variable <inline-formula id="inf45">
<mml:math id="m63">
<mml:mrow>
<mml:msup>
<mml:mover accent="true">
<mml:mi>Y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> denotes the predicted RUL value of the <inline-formula id="inf46">
<mml:math id="m64">
<mml:mrow>
<mml:msup>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> sample generated by the model, <inline-formula id="inf47">
<mml:math id="m65">
<mml:mrow>
<mml:msup>
<mml:mi>Y</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> represents the corresponding ground truth label, and <inline-formula id="inf48">
<mml:math id="m66">
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the total number of samples. As a widely used regression loss function, MSE penalizes the squared prediction error, effectively reducing the impact of large deviations and improving the robustness of the model predictions.</p>
<p>During training, the Adam optimizer is employed for parameter updates. This optimization algorithm integrates the advantages of momentum and adaptive learning rate adjustment, offering fast convergence and flexible parameter tuning. The initial learning rate is set to <inline-formula id="inf49">
<mml:math id="m67">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:msup>
<mml:mn>10</mml:mn>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, and the total number of training epochs is set to 200. Furthermore, mini-batch gradient descent with a batch size of 32 is used to enhance both the training stability and computational efficiency.</p>
</sec>
</sec>
<sec id="s4">
<title>4 Experimental results and performance analysis</title>
<p>A comprehensive evaluation was conducted to assess the predictive performance of the proposed LSTM-Transformer hybrid model on the MIT battery dataset, demonstrating its effectiveness and superiority in the task of remaining useful life prediction for lithium-ion batteries. Model training and testing were performed on the publicly available MIT battery degradation dataset. After undergoing data preprocessing and temporal windowing, the dataset was restructured into time series samples with an input sequence length of 30 and an output prediction horizon of 7 steps. The objective was to forecast the battery capacity degradation trend over the next 7 cycles. During training, the Adam optimizer was employed with an initial learning rate set to 0.001, and an Early Stopping mechanism was integrated to prevent overfitting. Architecturally, the LSTM layer captures local temporal dynamics, while the Transformer module exploits its global attention mechanism to model long-term dependencies. The synergistic integration of both enhances the model&#x2019;s capacity to capture the complex degradation behaviors of batteries.</p>
<sec id="s4-1">
<title>4.1 Model training process</title>
<p>As shown in <xref ref-type="fig" rid="F5">Figure 5</xref>, the loss curves of both the training and testing sets over 200 epochs illustrate the model&#x2019;s convergence behavior. It can be observed that the loss values decrease steadily with increasing training epochs, particularly during the initial 50 epochs where a rapid drop is evident&#x2014;indicating efficient convergence. At epoch 50, the training loss decreased to 0.0021 and the testing loss reached 0.0054. Although the training loss continued to decrease afterward, the testing loss plateaued and remained low, reflecting the model&#x2019;s strong ability to avoid overfitting. By the end of training at epoch 200, the training loss converged to 0.0008, and the testing loss stabilized around 0.0038, suggesting strong generalization on unseen data. These training dynamics and test loss results demonstrate the model&#x2019;s stability and robust convergence characteristics.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Training and testing loss curves over epochs.</p>
</caption>
<graphic xlink:href="felec-06-1654344-g005.tif">
<alt-text content-type="machine-generated">Line graph titled &#x22;Training and Test Loss&#x22; showing loss over two hundred epochs. Training loss (blue line) and test loss (orange line) decrease sharply to near zero around epoch twenty and stabilize.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s4-2">
<title>4.2 Performance evaluation metrics for model prediction</title>
<p>To comprehensively and objectively evaluate the performance of the proposed model in battery life prediction tasks, three commonly used regression evaluation metrics are employed: Root Mean Square Error (RMSE), Mean Absolute Percentage Error (MAPE), and the Coefficient of Determination (R<sup>2</sup>). These metrics assess the prediction performance from three perspectives: absolute error, relative percentage error, and the model&#x2019;s ability to explain variance in the data. Collectively, they offer a robust evaluation framework for assessing the accuracy, stability, and generalization capability of data-driven predictive models. These metrics are also widely adopted in current state-of-the-art regression-based forecasting studies.</p>
<p>RMSE, which quantifies the standard deviation of the prediction errors between the predicted and actual values, is defined as <xref ref-type="disp-formula" rid="e19">Equation 19</xref>,<disp-formula id="e19">
<mml:math id="m68">
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>M</mml:mi>
<mml:mi>S</mml:mi>
<mml:mi>E</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msqrt>
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:msqrt>
</mml:mrow>
</mml:math>
<label>(19)</label>
</disp-formula>where <inline-formula id="inf50">
<mml:math id="m69">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> denotes the actual value of the <inline-formula id="inf51">
<mml:math id="m70">
<mml:mrow>
<mml:msup>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> sample, <inline-formula id="inf52">
<mml:math id="m71">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> represents the predicted value of the <inline-formula id="inf53">
<mml:math id="m72">
<mml:mrow>
<mml:msup>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>h</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> sample, and <inline-formula id="inf54">
<mml:math id="m73">
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the total number of samples. A lower RMSE indicates smaller deviations between predicted and true values, reflecting higher model accuracy.</p>
<p>MAPE, or Mean Absolute Percentage Error, measures the relative percentage deviation between predicted and actual values. It is defined as <xref ref-type="disp-formula" rid="e20">Equation 20</xref>,<disp-formula id="e20">
<mml:math id="m74">
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>A</mml:mi>
<mml:mi>P</mml:mi>
<mml:mi>E</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:mfenced open="|" close="|" separators="|">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mfrac>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#xd7;</mml:mo>
<mml:mn>100</mml:mn>
<mml:mo>%</mml:mo>
</mml:mrow>
</mml:math>
<label>(20)</label>
</disp-formula>
</p>
<p>This metric intuitively reflects the percentage error of the predicted values relative to the true values, making it suitable for comparing prediction accuracy across different scales.</p>
<p>The coefficient of determination <italic>R</italic>
<sup>2</sup> is used to measure the goodness of fit of the model and is defined as <xref ref-type="disp-formula" rid="e21">Equation 21</xref>,<disp-formula id="e21">
<mml:math id="m75">
<mml:mrow>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:msubsup>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
<mml:mrow>
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:msubsup>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="|">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(21)</label>
</disp-formula>
</p>
<p>Here, <inline-formula id="inf55">
<mml:math id="m76">
<mml:mrow>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
</mml:mrow>
</mml:math>
</inline-formula> denotes the mean of all true values. The coefficient of determination R<sup>2</sup> ranges from <inline-formula id="inf56">
<mml:math id="m77">
<mml:mrow>
<mml:mfenced open="[" close="]" separators="|">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
</inline-formula>, where values closer to 1 indicate a higher degree of model fit and greater explained variance.</p>
</sec>
<sec id="s4-3">
<title>4.3 Model performance analysis</title>
<p>The final evaluation results of the model&#x2019;s predictive performance are as follows: RMSE of 0.0085, MAPE of 0.0200, and an R<sup>2</sup> of 0.9902. These results demonstrate that the model exhibits excellent capability in fitting accuracy, error control, and capturing the variation trends of the target variable.</p>
<p>To more intuitively illustrate the model&#x2019;s predictive effectiveness, <xref ref-type="fig" rid="F6">Figure 6a</xref> presents a scatter plot comparing the predicted values with the true values. It can be observed that the majority of scatter points are densely clustered around the reference line, indicating a high consistency between the model&#x2019;s predictions and the actual battery life across different samples. This tightly concentrated scatter distribution not only validates the high R<sup>2</sup> value but also indirectly suggests that the model does not suffer from significant underfitting or overfitting, thereby demonstrating strong generalization ability.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>
<bold>(a)</bold> Scatter plot of predicted values versus true values. <bold>(b)</bold> Comparison of predicted and true trajectories during the training process.</p>
</caption>
<graphic xlink:href="felec-06-1654344-g006.tif">
<alt-text content-type="machine-generated">Graph (a) shows a scatter plot comparing predicted versus true life percentage, with a red dashed trend line indicating a positive correlation. Graph (b) displays a time series of life percentage, illustrating predicted versus true values using orange lines and blue dots, highlighting prediction accuracy over time steps.</alt-text>
</graphic>
</fig>
<p>
<xref ref-type="fig" rid="F6">Figure 6b</xref> shows the comparison between the predicted and true trajectories on the test set. By forecasting future states over consecutive time steps, it is evident that the proposed LSTM-Transformer hybrid model can effectively fit the target trend, with predictions closely matching the real values and no notable deviations. This result indicates that the model possesses reliable short-term predictive capability, effectively adapting to the nonlinear gradual degradation characteristics of the battery state sequence, thus meeting the engineering requirements for remaining useful life prediction.</p>
<p>For further analysis of prediction errors, <xref ref-type="fig" rid="F7">Figure 7a</xref> presents the frequency distribution histogram of the model&#x2019;s prediction residuals, aiming to reveal whether there exist systematic biases or outliers in the errors. As observed, the residuals roughly exhibit a symmetric bell-shaped distribution, with most errors concentrated near zero, indicating that the overall prediction errors are small and unbiased. This statistical characteristic of the error distribution suggests that the prediction deviations mainly arise from minor perturbations inherent in the data rather than from systematic errors caused by the model structure. Additionally, the absence of long tails or skewness in the residual distribution further confirms the stability and consistency of the model&#x2019;s predictions.</p>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>
<bold>(a)</bold> Histogram of model residual frequency distribution. <bold>(b)</bold> Temporal variation of prediction errors.</p>
</caption>
<graphic xlink:href="felec-06-1654344-g007.tif">
<alt-text content-type="machine-generated">Chart (a) is a histogram showing the distribution of prediction errors, with most errors clustered around zero. Chart (b) is a line graph depicting prediction error over time, fluctuating around zero on the sample index for a test set.</alt-text>
</graphic>
</fig>
<p>
<xref ref-type="fig" rid="F7">Figure 7b</xref> depicts the temporal variation of prediction errors for all samples in the test set during the prediction process. Overall, the prediction errors fluctuate slightly without persistent systematic bias, demonstrating that no significant underfitting or overfitting occurred during training.</p>
<p>To more clearly present the comparative effects of the evaluation metrics, <xref ref-type="fig" rid="F8">Figure 8a</xref> visualizes the three core performance indicators RMSE, MAPE, and R<sup>2</sup> using a bar chart. It can be intuitively observed that all metrics fall within excellent ranges: RMSE approaches zero, MAPE is well below the commonly accepted 5% tolerance threshold for predictive models, and R<sup>2</sup> significantly exceeds the benchmark of 0.9 for strong model fit. This visualization not only facilitates a comprehensive and balanced demonstration of the model&#x2019;s performance but also enables straightforward comparison with traditional models, providing important references for subsequent optimization studies.</p>
<fig id="F8" position="float">
<label>FIGURE 8</label>
<caption>
<p>
<bold>(a)</bold> Bar chart of evaluation metrics for the LSTM-Transformer model. <bold>(b)</bold> Comparison of RMSE performance among different models.</p>
</caption>
<graphic xlink:href="felec-06-1654344-g008.tif">
<alt-text content-type="machine-generated">(a) Bar chart showing LSTM-Transformer model performance metrics: RMSE at 0.0085, MAPE at 0.0200, and R2 at 0.9902, with R2 notably higher. (b) Violin plot comparing RMSE distribution across models: AUXF_GASVR, MC-LSTM, Bi-LSTM-AM, FBA-XGBoost-LSTM, and LSTM-Transformer, indicating variance in performance.</alt-text>
</graphic>
</fig>
<p>In summary, both quantitative metrics and visual analyses demonstrate that the proposed LSTM-Transformer hybrid model exhibits high accuracy, robustness, and interpretability in the battery remaining useful life prediction task. The model achieves satisfactory results not only in individual metric performance but also in fitting overall degradation trends and controlling prediction errors, providing strong empirical support for multimodal fusion approaches targeting complex time-series forecasting problems. Moreover, these outcomes lay a solid foundation for the model&#x2019;s future application in practical engineering scenarios.</p>
<p>To further validate the performance advantages of the proposed LSTM-Transformer model in lithium-ion battery RUL prediction, several representative benchmark models were selected for comparative experiments, as shown in <xref ref-type="fig" rid="F8">Figure 8b</xref>. Their prediction accuracies on the same dataset, expressed by RMSE values, are summarized in <xref ref-type="table" rid="T1">Table 1</xref>. The AUKF_GASVR model, which integrates particle filtering with nonlinear regression, and the deep learning-based MC-LSTM model achieved RMSEs of 0.0134 and 0.0168, respectively, yet both still suffered from relatively large fitting errors.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Performance comparison of different models.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Model</th>
<th align="center">RMSE</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">AUKF_GASVR (<xref ref-type="bibr" rid="B20">Xue et al., 2020</xref>)</td>
<td align="center">0.0134</td>
</tr>
<tr>
<td align="center">MC-LSTM (<xref ref-type="bibr" rid="B13">Park et al., 2020</xref>)</td>
<td align="center">0.0168</td>
</tr>
<tr>
<td align="center">Bi-LSTM-AM (<xref ref-type="bibr" rid="B18">Wang et al., 2022</xref>)</td>
<td align="center">0.0106</td>
</tr>
<tr>
<td align="center">FBA-XGBoost-LSTM (<xref ref-type="bibr" rid="B8">Jin et al., 2025</xref>)</td>
<td align="center">0.01003</td>
</tr>
<tr>
<td align="center">LSTM-Transformer</td>
<td align="center">0.0085</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>With the introduction of attention mechanisms and ensemble learning strategies, model performance further improved. For instance, the Bi-LSTM-AM model, combining bidirectional sequence modeling with attention mechanisms, reduced the RMSE to 0.0106; the FBA-XGBoost-LSTM model, leveraging feature enhancement and deep network integration, compressed the error to 0.01003, demonstrating strong learning capability. However, among all compared models, the proposed LSTM-Transformer model achieved the best overall performance with an optimal RMSE of 0.0085, indicating a significant accuracy advantage.</p>
<p>These results fully demonstrate that the LSTM-Transformer hybrid model effectively integrates LSTM&#x2019;s strength in capturing local temporal dynamics with Transformer&#x2019;s ability to extract global dependency features in time-series modeling. This synergy enables a more comprehensive learning of the complex mechanisms underlying battery life evolution, yielding higher accuracy and robustness, making it a highly efficient modeling solution for current RUL prediction tasks.</p>
</sec>
</sec>
<sec sec-type="conclusion" id="s5">
<title>5 Conclusion</title>
<p>With the widespread adoption of electrification and intelligent systems in transportation, energy storage, and industrial control, health management and remaining useful life prediction of lithium-ion batteries have become critical tasks to ensure system safety and operational efficiency. Addressing key challenges such as the difficulty of RUL prediction under nonlinear and complex degradation mechanisms during battery operation, this work constructs a hybrid deep learning model that integrates Long Short-Term Memory networks with Transformer architecture based on the publicly available MIT battery dataset. The model aims to enhance prediction accuracy, stability, and generalization capability.</p>
<p>This study centers on the theme of &#x201c;high-dimensional sequence modeling and deep fusion prediction.&#x201d; First, in data preprocessing and feature engineering, raw sensor data including battery temperature and multi-channel voltages were systematically processed. Feature dimensionality reduction, principal component analysis, and physics-informed temperature-derived feature design were conducted to construct interpretable input variables such as temperature mean, temperature difference range, and temperature fluctuation. Meanwhile, unified data normalization techniques, including Z-score standardization and Yeo-Johnson transformation, were applied to improve the model&#x2019;s capability to handle multi-scale and heterogeneous distributions. Furthermore, to align with sequence prediction tasks, a sliding window method was employed to reconstruct time series samples, effectively embedding local temporal dynamics.</p>
<p>Second, at the model design level, this work proposes a fusion modeling approach combining LSTM and Transformer structures. The LSTM module leverages gating mechanisms to accurately capture short-term fluctuations and long-term dependencies, which is well-suited for modeling dynamic sequences exhibiting continuity and staged features during battery degradation. The Transformer module employs multi-head attention and positional encoding to model global dependencies across the entire input sequence, enhancing expressiveness under long-sequence conditions. By concatenating and fusing the feature vectors from both modules, a multimodal prediction model capable of simultaneously capturing local temporal dynamics and global structural variations was constructed.</p>
<p>In terms of training optimization and performance evaluation, a training framework based on mean squared error loss and the Adam optimizer was established, achieving stable convergence after 200 iterations. Evaluation on the test set demonstrated that the proposed LSTM-Transformer model attained an RMSE of 0.0085, MAPE of 0.0200, and R<sup>2</sup> of 0.9902, significantly outperforming conventional single deep learning models. Residual distribution analysis and visualization of prediction results further validated the model&#x2019;s strong ability to capture battery degradation trends, robust performance, and lack of systematic bias, indicating substantial potential for engineering applications.</p>
<p>However, the proposed method still faces certain limitations in practical deployment, such as its reliance on high-quality sensor data and insufficient transferability across different usage scenarios. Future research will focus on enhancing the model&#x2019;s adaptability to multi-source data and exploring strategies that integrate online learning with few-shot learning to improve its practicality and robustness.</p>
<p>In summary, the proposed LSTM-Transformer fusion prediction model exhibits high accuracy and stability in battery RUL forecasting. It provides effective technical support for the development of next-generation intelligent battery management systems, with promising prospects for practical engineering deployment and broader adoption.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/supplementary material, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec sec-type="author-contributions" id="s7">
<title>Author contributions</title>
<p>TZ: Writing &#x2013; review and editing, Investigation, Writing &#x2013; original draft, Formal Analysis, Visualization, Data curation. YZ: Methodology, Conceptualization, Supervision, Writing &#x2013; review and editing, Writing &#x2013; original draft. MW: Formal Analysis, Methodology, Writing &#x2013; review and editing, Investigation. WF: Software, Formal Analysis, Methodology, Project administration, Writing &#x2013; review and editing. SC: Formal Analysis, Validation, Writing &#x2013; review and editing, Investigation. GW: Writing &#x2013; review and editing, Methodology, Supervision, Investigation.</p>
</sec>
<sec sec-type="funding-information" id="s8">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research and/or publication of this article. This work is supported by Guangdong Basic and Applied Basic Research Foundation (No. 2023A1515240014), the key project of Shenzhen science and technology plan (No. JCYJ20220818103416035).</p>
</sec>
<sec sec-type="COI-statement" id="s9">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s10">
<title>Correction note</title>
<p>A correction has been made to this article. Details can be found at: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/felec.2025.1693752">10.3389/felec.2025.1693752</ext-link>.</p>
</sec>
<sec sec-type="ai-statement" id="s11">
<title>Generative AI statement</title>
<p>The author(s) declare that no Generative AI was used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="s12">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bao-Hua</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Long-Wen</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Yi-Qiang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Dual power transformation and Yeo&#x2013;Johnson techniques for static and dynamic reliability assessments</article-title>. <source>Buildings</source> <volume>14</volume> (<issue>11</issue>), <fpage>3625</fpage>. <pub-id pub-id-type="doi">10.3390/buildings14113625</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Hong</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Transformer network for remaining useful life prediction of lithium-ion batteries</article-title>. <source>Ieee Access</source> <volume>10</volume>, <fpage>19621</fpage>&#x2013;<lpage>19628</lpage>. <pub-id pub-id-type="doi">10.1109/access.2022.3151975</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Crawford</surname>
<given-names>A. J.</given-names>
</name>
<name>
<surname>Choi</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Balducci</surname>
<given-names>P. J.</given-names>
</name>
<name>
<surname>Subramanian</surname>
<given-names>V. R.</given-names>
</name>
<name>
<surname>Viswanathan</surname>
<given-names>V. V.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Lithium-ion battery physics and statistics-based state of health model</article-title>. <source>J. Power Sources</source> <volume>501</volume>, <fpage>230032</fpage>. <pub-id pub-id-type="doi">10.1016/j.jpowsour.2021.230032</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Elmahallawy</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Elfouly</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Alouani</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Massoud</surname>
<given-names>A. M.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>A comprehensive review of lithium-ion batteries modeling, and state of health and remaining useful lifetime prediction</article-title>. <source>IEEE Access</source> <volume>10</volume>, <fpage>119040</fpage>&#x2013;<lpage>119070</lpage>. <pub-id pub-id-type="doi">10.1109/access.2022.3221137</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Han</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Zheng</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Lei</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Remaining useful life prediction of lithium-ion batteries by using a denoising transformer-based neural network</article-title>. <source>Energies</source> <volume>16</volume> (<issue>17</issue>), <fpage>6328</fpage>. <pub-id pub-id-type="doi">10.3390/en16176328</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jafari</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Byun</surname>
<given-names>Y. C.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Xgboost-based remaining useful life estimation model with extended kalman particle filter for lithium-ion batteries</article-title>. <source>Sensors</source> <volume>22</volume> (<issue>23</issue>), <fpage>9522</fpage>. <pub-id pub-id-type="doi">10.3390/s22239522</pub-id>
<pub-id pub-id-type="pmid">36502223</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ji</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>dos Reis</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Data-driven battery characterization and prognosis: recent progress, challenges, and prospects</article-title>. <source>Small Methods</source> <volume>8</volume> (<issue>7</issue>), <fpage>2301021</fpage>. <pub-id pub-id-type="doi">10.1002/smtd.202301021</pub-id>
<pub-id pub-id-type="pmid">38213008</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jin</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Qiu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Kong</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>A data-driven framework for lithium-ion battery RUL using LSTM and XGBoost with feature selection via binary firefly algorithm</article-title>. <source>Energy</source> <volume>314</volume>, <fpage>134229</fpage>. <pub-id pub-id-type="doi">10.1016/j.energy.2024.134229</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Yu</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Byg</surname>
<given-names>V. S.</given-names>
</name>
<name>
<surname>Daniel Ioan</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>The development of machine learning-based remaining useful life prediction for lithium-ion batteries</article-title>. <source>J. Energy Chem.</source> <volume>82</volume>, <fpage>103</fpage>&#x2013;<lpage>121</lpage>. <pub-id pub-id-type="doi">10.1016/j.jechem.2023.03.026</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lin</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Meng</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Battery health prognosis based on sliding window sampling of charging curves and independently recurrent neural network</article-title>. <source>IEEE Trans. Instrum. Meas.</source> <volume>73</volume>, <fpage>1</fpage>&#x2013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1109/tim.2023.3348894</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Fei</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Peng</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Electrochemical modeling and parameterization towards control-oriented management of lithium-ion batteries</article-title>. <source>Control Eng. Pract.</source> <volume>124</volume>, <fpage>105176</fpage>. <pub-id pub-id-type="doi">10.1016/j.conengprac.2022.105176</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Nunes</surname>
<given-names>T. S. N.</given-names>
</name>
<name>
<surname>Moura</surname>
<given-names>J. J. P.</given-names>
</name>
<name>
<surname>Prado</surname>
<given-names>O. G.</given-names>
</name>
<name>
<surname>Camboim</surname>
<given-names>M. M.</given-names>
</name>
<name>
<surname>de Fatima N. Rosolem</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Beck</surname>
<given-names>R. F.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>An online unscented Kalman filter remaining useful life prediction method applied to second-life lithium-ion batteries</article-title>. <source>Electr. Eng.</source> <volume>105</volume> (<issue>6</issue>), <fpage>3481</fpage>&#x2013;<lpage>3492</lpage>. <pub-id pub-id-type="doi">10.1007/s00202-023-01910-7</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Park</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Choi</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Choi</surname>
<given-names>W. J.</given-names>
</name>
<name>
<surname>Ryu</surname>
<given-names>H. Y.</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>LSTM-based battery remaining useful life prediction with multi-channel charging profiles</article-title>. <source>IEEE Access</source> <volume>8</volume>, <fpage>20786</fpage>&#x2013;<lpage>20798</lpage>. <pub-id pub-id-type="doi">10.1109/access.2020.2968939</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Reza</surname>
<given-names>M. S.</given-names>
</name>
<name>
<surname>Hannan</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Mansor</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Ker</surname>
<given-names>P. J.</given-names>
</name>
<name>
<surname>Tiong</surname>
<given-names>S. K.</given-names>
</name>
<name>
<surname>Hossain</surname>
<given-names>M. J.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Gravitational search algorithm based LSTM deep neural network for battery capacity and remaining useful life prediction with uncertainty</article-title>. <source>IEEE Trans. Industry Appl.</source> <volume>60</volume>, <fpage>9171</fpage>&#x2013;<lpage>9183</lpage>. <pub-id pub-id-type="doi">10.1109/tia.2024.3429452</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sharma</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Bora</surname>
<given-names>B. J.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>A review of modern machine learning techniques in the prediction of remaining useful life of lithium-ion batteries</article-title>. <source>Batteries</source> <volume>9</volume> (<issue>1</issue>), <fpage>13</fpage>. <pub-id pub-id-type="doi">10.3390/batteries9010013</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tong</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Miao</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Tong</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Early prediction of remaining useful life for lithium-ion batteries based on a hybrid machine learning method</article-title>. <source>J. Clean. Prod.</source> <volume>317</volume>, <fpage>128265</fpage>. <pub-id pub-id-type="doi">10.1016/j.jclepro.2021.128265</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Jin</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Bai</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Fan</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Shi</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Fernandez</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>A critical review of improved deep learning methods for the remaining useful life prediction of lithium-ion batteries</article-title>. <source>Energy Rep.</source> <volume>7</volume>, <fpage>5562</fpage>&#x2013;<lpage>5574</lpage>. <pub-id pub-id-type="doi">10.1016/j.egyr.2021.08.182</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>F. K.</given-names>
</name>
<name>
<surname>Amogne</surname>
<given-names>Z. E.</given-names>
</name>
<name>
<surname>Chou</surname>
<given-names>J. H.</given-names>
</name>
<name>
<surname>Tseng</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Online remaining useful life prediction of lithium-ion batteries using bidirectional long short-term memory with attention mechanism</article-title>. <source>Energy</source> <volume>254</volume>, <fpage>124344</fpage>. <pub-id pub-id-type="doi">10.1016/j.energy.2022.124344</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Perspectives and challenges for future lithium-ion battery control and management</article-title>. <source>eTransportation</source> <volume>18</volume>, <fpage>100260</fpage>. <pub-id pub-id-type="doi">10.1016/j.etran.2023.100260</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xue</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Remaining useful life prediction of lithium-ion batteries with adaptive unscented kalman filter and optimized support vector regression</article-title>. <source>Neurocomputing</source> <volume>376</volume>, <fpage>95</fpage>&#x2013;<lpage>102</lpage>. <pub-id pub-id-type="doi">10.1016/j.neucom.2019.09.074</pub-id>
</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Hong</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Liang</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Machine learning-based state of health prediction for battery systems in real-world electric vehicles</article-title>. <source>J. Energy Storage</source> <volume>66</volume>, <fpage>107426</fpage>. <pub-id pub-id-type="doi">10.1016/j.est.2023.107426</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yao</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Hou</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Xiao</surname>
<given-names>Y.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>A review of lithium-ion battery state of health estimation and prediction methods</article-title>. <source>World Electr. Veh. J.</source> <volume>12</volume> (<issue>3</issue>), <fpage>113</fpage>. <pub-id pub-id-type="doi">10.3390/wevj12030113</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Sajadi</surname>
<given-names>S. M.</given-names>
</name>
<name>
<surname>Prabuwono</surname>
<given-names>A. S.</given-names>
</name>
<name>
<surname>Mahmoud</surname>
<given-names>M. Z.</given-names>
</name>
<name>
<surname>Cheraghian</surname>
<given-names>G.</given-names>
</name>
<etal/>
</person-group> (<year>2022a</year>). <article-title>The machine learning in lithium-ion batteries: a review</article-title>. <source>Eng. Analysis Bound. Elem.</source> <volume>141</volume>, <fpage>1</fpage>&#x2013;<lpage>16</lpage>. <pub-id pub-id-type="doi">10.1016/j.enganabound.2022.04.035</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Zhong</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Tian</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2022b</year>). <article-title>Deep learning in the state of charge estimation for li-ion batteries of electric vehicles: a review</article-title>. <source>Machines</source> <volume>10</volume> (<issue>10</issue>), <fpage>912</fpage>. <pub-id pub-id-type="doi">10.3390/machines10100912</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>