<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article article-type="research-article" dtd-version="1.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Energy Res.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Energy Research</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Energy Res.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2296-598X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1778611</article-id>
<article-id pub-id-type="doi">10.3389/fenrg.2026.1778611</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>A federated collaborative short-term industrial load forecasting method integrating meta-learning and differentiated privacy allocation</article-title>
<alt-title alt-title-type="left-running-head">Ding et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fenrg.2026.1778611">10.3389/fenrg.2026.1778611</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Ding</surname>
<given-names>Maomao</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/3334192"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing - original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Cai</surname>
<given-names>Zhongwei</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing - original draft</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Chen</surname>
<given-names>Boyang</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal Analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing - original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Han</surname>
<given-names>Junjie</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing - review and editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
</contrib>
</contrib-group>
<aff id="aff1">
<label>1</label>
<institution>Customer Service Center, State Grid Corporation of China</institution>, <city>Tianjin</city>, <country country="CN">China</country>
</aff>
<aff id="aff2">
<label>2</label>
<institution>Research Center, Beijing Tsingsoft Technology Co., Ltd</institution>, <city>Beijing</city>, <country country="CN">China</country>
</aff>
<author-notes>
<corresp id="c001">
<label>&#x2a;</label>Correspondence: Maomao Ding, <email xlink:href="mailto:mmding8909_bjqr@163.com">mmding8909_bjqr@163.com</email>
</corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-18">
<day>18</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>14</volume>
<elocation-id>1778611</elocation-id>
<history>
<date date-type="received">
<day>31</day>
<month>12</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>19</day>
<month>01</month>
<year>2026</year>
</date>
<date date-type="accepted">
<day>27</day>
<month>01</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2026 Ding, Cai, Chen and Han.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Ding, Cai, Chen and Han</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-18">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<p>Short-term load forecasting for industrial users is often limited by scarce local data and heterogeneous load patterns, while cross-factory collaboration raises serious privacy concerns due to production-sensitive information. This paper proposes a privacy-preserving federated forecasting framework for industrial short-term load prediction that integrates meta-learning and differentiated differential privacy allocation. The framework is built on federated learning, where factories perform local training and upload encrypted model updates for secure aggregation, ensuring data availability without data visibility. To address negative transfer and poor personalization caused by load heterogeneity, a meta-learning strategy is introduced to enable rapid adaptation to new factories or operating conditions using only a small amount of local data. In addition, a differentiated privacy-weight mechanism is designed to dynamically allocate privacy budgets based on data contribution and sensitivity, achieving a better trade-off between privacy protection and forecasting accuracy. Experiments on real-world industrial load datasets show that the proposed method reduces MAPE by 1.99 percentage points compared with independent training, and by 2.63 percentage points in new operating-condition scenarios, demonstrating its effectiveness for secure and collaborative industrial load forecasting.</p>
</abstract>
<kwd-group>
<kwd>federated learning</kwd>
<kwd>industrial load forecasting</kwd>
<kwd>meta-learning</kwd>
<kwd>privacy preserving</kwd>
<kwd>short-term load forecasting</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was not received for this work and/or its publication.</funding-statement>
</funding-group>
<counts>
<fig-count count="7"/>
<table-count count="9"/>
<equation-count count="18"/>
<ref-count count="31"/>
<page-count count="00"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Energy Efficiency</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<label>1</label>
<title>Introduction</title>
<p>Industrial load forecasting is a fundamental basis for the secure and stable operation and refined management of power systems, playing a critical role in power grid dispatching, demand response, energy efficiency assessment, and renewable energy integration (<xref ref-type="bibr" rid="B4">Duan et al., 2026</xref>; <xref ref-type="bibr" rid="B6">Hasan et al., 2025</xref>). Compared with residential and commercial users, industrial electricity consumption is highly coupled with production schedules, process flows, and equipment operating conditions, resulting in load variations that are often abrupt and highly stochastic, thereby imposing more stringent requirements on short-term load forecasting (STLF) (<xref ref-type="bibr" rid="B15">Liu et al., 2023</xref>). However, due to frequent switching of production conditions and the diversity and complexity of industrial processes, industrial loads exhibit pronounced non-stationary and strongly nonlinear characteristics, making it difficult for traditional statistical models and single data-driven approaches to accurately capture their dynamic evolution patterns (<xref ref-type="bibr" rid="B8">Hu and Man, 2023</xref>; <xref ref-type="bibr" rid="B7">He et al., 2025</xref>). Moreover, constrained by the operating cycles of individual factories and the level of data accumulation, the historical load data available for modeling are often limited in scale, resulting in insufficient model training and restricted generalization capability, with prediction performance deteriorating significantly under new operating conditions or abnormal scenarios. How to fully exploit common patterns across multiple entities and scenarios while ensuring industrial data security has thus become a key issue in improving the accuracy of industrial short-term load forecasting. Cross-factory collaborative modeling, by integrating multi-source industrial load features, is expected to alleviate the performance bottlenecks caused by data scarcity and to provide new research perspectives for building more robust and high-accuracy industrial load forecasting models.</p>
<p>For industrial short-term load forecasting tasks, existing methods can generally be categorized into two types according to the number of participating entities: independent forecasting (<xref ref-type="bibr" rid="B13">Li et al., 2025</xref>) and cross-user centralized collaborative forecasting (<xref ref-type="bibr" rid="B10">Huang et al., 2025</xref>). Independent forecasting is typically based on data from a single factory and employs statistical models such as ARIMA and regression models, or machine learning and deep learning models such as LSTM and Transformer (<xref ref-type="bibr" rid="B2">Biswal et al., 2024</xref>; <xref ref-type="bibr" rid="B26">Wazirali et al., 2023</xref>). <xref ref-type="bibr" rid="B23">Walther et al. (2019)</xref> proposed a GBRT-based short-term load forecasting method that integrates feature selection, feature engineering, and hyperparameter tuning, achieving high-accuracy 15-min industrial load forecasting and effectively addressing the difficulties of model construction and optimization under high-dimensional multi-source features. <xref ref-type="bibr" rid="B31">Zhu et al. (2023)</xref> developed an industrial load forecasting approach that combines load change rate features with a firefly-algorithm-optimized extreme learning machine and AdaBoost ensemble learning, enabling effective characterization of the nonlinear properties of industrial loads and significantly reducing forecasting errors while overcoming the limitations of traditional models in feature representation and parameter optimization. <xref ref-type="bibr" rid="B30">Zhang et al. (2025)</xref> addressed the strong nonlinearity and severe fluctuations of industrial building loads by proposing a short-term load forecasting method that integrates multiple signal decomposition techniques with deep hybrid networks; by combining VMD and CEEMDAN decomposition with sample entropy&#x2013;based reconstruction, the method effectively reduces noise interference and extracts multi-scale load features. <xref ref-type="bibr" rid="B22">Walser and Sauer (2021)</xref> proposed a hybrid machine learning model for factory-level load forecasting, which combines typical load profile analysis with convolutional neural networks and leverages near-real-time high-resolution multidimensional data to achieve robust and high-accuracy short-term load forecasting, effectively overcoming the limitations of traditional models in terms of real-time performance, multi-source sub-meter data fusion, and cross-factory generalization. <xref ref-type="bibr" rid="B17">Majeske et al. (2025)</xref> introduced an industrial energy forecasting framework based on dynamic knowledge graphs and attention-based RNNs, achieving high-accuracy and interpretable energy consumption prediction, significantly outperforming conventional RNN models while maintaining suitability for edge deployment. However, statistical models are constrained by assumptions of linearity and stationarity, and although machine learning and deep learning models can capture nonlinear relationships, they rely heavily on sufficient high-quality historical data and are prone to overfitting with limited generalization capability in scenarios characterized by frequent industrial load pattern switching, high noise, and data scarcity. Overall, independent forecasting is easy to implement and deploy because it only requires single-factory data and avoids cross-organization data sharing. It can be tailored to a specific factory&#x2019;s operational schedule and equipment characteristics, thus achieving high accuracy when sufficient high-quality historical data are available. However, its performance is often constrained by data scarcity and non-stationary load behaviors in industrial scenarios. In particular, frequent production-mode switching, strong noise, and abrupt changes may lead to limited generalization, high sensitivity to feature drift, and overfitting, especially for complex deep models trained on small datasets.</p>
<p>To overcome the limitations of single-site data, an alternative line of research explores collaborative learning across multiple factories to exploit shared patterns and enlarge the training set. To alleviate the performance bottlenecks caused by data scarcity, cross-user centralized collaborative forecasting trains a unified model by aggregating load data from multiple factories, thereby expanding the sample size, extracting common patterns, and improving forecasting accuracy and robustness (<xref ref-type="bibr" rid="B18">Manzoor et al., 2024</xref>; <xref ref-type="bibr" rid="B5">Fan et al., 2024</xref>). <xref ref-type="bibr" rid="B16">Liu et al. (2025)</xref> proposed a self-allocated Kolmogorov&#x2013;Arnold Network (KAN) method for multi-energy load forecasting in integrated energy systems, which employs multi-decoder Informer encoding combined with random sample self-composition and correlation feature self-allocation modules to achieve multivariate fusion and co-decoding, thus enhancing predictive performance under coupled and uncertain scenarios. <xref ref-type="bibr" rid="B14">Liao et al. (2024)</xref> developed a bi-level multi-task learning (BiMTL) approach for multi-energy load forecasting in integrated energy systems, which enhances local feature representation through an improved wavelet packet decomposition and performs reconstruction and error correction of prediction results in the second-level shared learning stage. Experimental results demonstrate that the constructed BiMTL-LSTM model significantly outperforms existing methods. Cross-user centralized collaborative forecasting can significantly improve robustness by aggregating multi-factory data, enabling the model to learn common load dynamics and reducing variance under data scarcity. Nevertheless, it faces practical challenges: (i) centralized data collection may violate privacy constraints and data governance policies; (ii) industrial loads are highly heterogeneous across factories, so a unified model may suffer from negative transfer or biased learning toward dominant participants; and (iii) communication, storage, and continuous updating of large-scale multi-factory datasets increase engineering complexity and cost. These limitations motivate the need for collaborative forecasting paradigms that can leverage cross-user knowledge while addressing privacy and heterogeneity.</p>
<p>To address the privacy and compliance challenges inherent in centralized collaborative forecasting, research efforts have gradually shifted toward collaborative modeling under privacy-preserving conditions (<xref ref-type="bibr" rid="B11">Kaur et al., 2025</xref>; <xref ref-type="bibr" rid="B21">Shen et al., 2024</xref>). Achieving cross-entity information collaboration without directly sharing raw load data has thus become a key research direction. Privacy-preserving collaborative learning frameworks represented by federated learning effectively reduce the risk of industrial data leakage through local training and model parameter exchange, providing a feasible pathway for multi-factory joint modeling. Khalil et al. (<xref ref-type="bibr" rid="B12">Li et al., 2024</xref>) proposed a privacy-preserving federated learning approach for building energy load forecasting by incorporating differential privacy (DP) and a POWER-SELECTION protocol, which significantly reduces communication overhead while safeguarding user electricity consumption privacy; experimental results show that the method achieves an effective balance between improved forecasting accuracy, bandwidth efficiency, and provable privacy protection. <xref ref-type="bibr" rid="B25">Wang and Li (2022)</xref> introduced a cloud&#x2013;edge collaborative short-term load forecasting method for smart grids, which constructs a pool of pre-trained models in the cloud and performs lightweight retraining and optimal model selection at the edge, thereby improving forecasting accuracy while reducing edge-side computational burden; the effectiveness of the method was validated through extensive experiments, while also addressing data privacy and personalization through pre-training transfer and hierarchical edge training. <xref ref-type="bibr" rid="B3">Chen et al. (2024)</xref> proposed an improved Transformer model based on federated learning for estimating behind-the-meter photovoltaic power generation at the community level without centralized sharing of raw data, effectively mitigating privacy risks associated with centralized training; by combining locally retained parameters with exchanged parameters in a dual-layer structure, the method enables secure collaborative modeling of common features across multiple nodes while protecting user-side data privacy. <xref ref-type="bibr" rid="B9">Huang et al. (2024)</xref> developed a federated learning&#x2013;based load forecasting method for energy aggregation service providers, which achieves accurate prediction through weighted collaborative training and parameter sharing among energy entities without exchanging raw data. Nevertheless, due to the pronounced differences in factory operating modes and the high heterogeneity of industrial load characteristics (<xref ref-type="bibr" rid="B28">Yu et al., 2016</xref>), existing methods still suffer from limitations in model generalization capability, adaptability to new operating conditions, and the trade-off between privacy protection strength and forecasting performance, indicating the need for further research and improvement.</p>
<p>Compared with existing studies, the proposed method provides a unified solution to three key challenges in industrial STLF, namely, data scarcity, cross-factory heterogeneity, and privacy&#x2013;accuracy trade-off. Unlike independent forecasting that is often constrained by limited local data and weak generalization under operating-condition shifts, our framework enables cross-factory knowledge sharing while preserving personalization. In contrast to centralized collaborative forecasting that requires collecting raw industrial data and may violate privacy and compliance requirements, the proposed approach follows a federated learning paradigm so that raw load data remain on-site and only protected updates are exchanged. Moreover, compared with conventional federated forecasting that mainly relies on parameter averaging and is prone to negative transfer under heterogeneous factories, we incorporate meta-learning for rapid adaptation and an adaptive DP allocation mechanism to balance privacy protection and forecasting performance. The main contributions of this paper are summarized as follows:<list list-type="roman-lower">
<list-item>
<p>To mitigate negative transfer and insufficient personalization caused by heterogeneous industrial load patterns, a meta-learning algorithm for rapid cross-factory adaptation is proposed, enabling the global model to quickly adapt to new operating conditions or new factories with only a small amount of local data, thereby enhancing personalized forecasting performance and generalization capability.</p>
</list-item>
<list-item>
<p>To alleviate the conflict between privacy protection strength and model performance, an improved federated learning algorithm incorporating DP weights is designed. By jointly quantifying the data contribution and privacy sensitivity of each factory, the proposed method dynamically allocates DP budgets and performs weighted aggregation, achieving an adaptive trade-off between privacy protection and forecasting accuracy.</p>
</list-item>
<list-item>
<p>Experiments on real-world industrial load datasets show that the proposed method reduces MAPE by 1.99 percentage points compared with independent training, and by 2.63 percentage points in new operating-condition scenarios, demonstrating its effectiveness for secure and collaborative industrial load forecasting.</p>
</list-item>
</list>
</p>
</sec>
<sec id="s2">
<label>2</label>
<title>Methodologies</title>
<sec id="s2-1">
<label>2.1</label>
<title>Overall framework of the model</title>
<p>As illustrated in <xref ref-type="fig" rid="F1">Figure 1</xref>, the proposed privacy-preserving cross-scenario collaborative industrial short-term load forecasting framework contains three tightly coupled components under the federated learning paradigm: (i) a local meta-learning module deployed at each factory (client), (ii) an adaptive privacy allocation module executed on the client side to protect the shared updates, and (iii) a federated aggregation module on the server side. These modules operate collaboratively under the federated learning paradigm to enable the sharing and transfer of load forecasting capabilities across factories while ensuring data privacy and security.<list list-type="order">
<list-item>
<p>Local Meta-learning module. On the local side, each industrial factory acts as an independent client and performs meta-learning training based on its own historical load data <inline-formula id="inf1">
<mml:math id="m1">
<mml:mrow>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. Specifically, to improve generalization across different operating conditions within the same factory, <inline-formula id="inf2">
<mml:math id="m2">
<mml:mrow>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is organized into a set of scenario-driven learning tasks. For each task, the data are split into a support set and a query set: the support set is used for inner-loop adaptation, while the query set is used for outer-loop evaluation to compute the meta-objective and obtain a task-level meta-gradient. After completing task-level training over multiple tasks, the client produces a single update that summarizes how the meta-model should be adjusted to adapt quickly across local scenarios. This update is the meta-gradient <inline-formula id="inf3">
<mml:math id="m3">
<mml:mrow>
<mml:mo>&#x394;</mml:mo>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, which summarizes high-information-density transferable knowledge about how the forecasting model should adapt across scenarios. Although this procedure enables effective feature extraction from limited local data, gradient-level sharing may still introduce potential privacy leakage risks; therefore, raw time-series data never leave the factory, and only meta-gradient information is prepared for communication.</p>
</list-item>
<list-item>
<p>Adaptive Privacy Allocation module. Subsequently, the locally generated meta-gradient <inline-formula id="inf4">
<mml:math id="m4">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is fed into the adaptive privacy allocation module for privacy-preserving processing before transmission. This module quantitatively evaluates two key factors: (i) the data contribution (utility) of the client update to global modeling and (ii) the corresponding privacy sensitivity (risk) associated with sharing gradient information. Based on these two assessments, an adaptive &#x201c;privacy&#x2013;performance&#x201d; regulation mechanism assigns a client-specific DP budget. The meta-gradient is then clipped and perturbed with calibrated noise to obtain a privacy-protected update. Intuitively, factories with higher privacy sensitivity are allocated stricter privacy budgets, whereas updates with greater contribution are allowed to preserve more effective features under controllable privacy constraints, thereby enabling the extraction of shareable information. The protected update is further encrypted for secure transmission to the server.</p>
</list-item>
<list-item>
<p>Federated Aggregation module. At the server side, the federated aggregation module receives encrypted updates from all clients and performs secure aggregation to prevent the exposure of individual client updates. The server then conducts privacy-aware weighted fusion to iteratively update the global meta-model parameters. By integrating heterogeneous client updates in a differentiated manner, the server balances forecasting performance improvement and privacy protection. The updated global model is subsequently redistributed to all factories for the next round of local meta-learning, forming a closed-loop collaborative optimization process of &#x201c;local adaptation&#x2013;privacy control&#x2013;global aggregation.&#x201d;</p>
</list-item>
</list>
</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Overall structure of the model.</p>
</caption>
<graphic xlink:href="fenrg-14-1778611-g001.tif">
<alt-text content-type="machine-generated">Diagram illustrating a federated meta-learning process with three sections: local meta-learning using multi-factory client data for model updates; adaptive privacy allocation balancing privacy and performance through evaluation; federated aggregation module updating encrypted meta gradients for secure sharing.</alt-text>
</graphic>
</fig>
<p>Overall, the proposed framework achieves &#x201c;data remaining on-site with collaborative model learning&#x201d; at the architectural level, enhances the model&#x2019;s rapid cross-scenario adaptation capability through meta-learning, and alleviates the conflict between privacy protection and forecasting performance via DP allocation, thereby providing a systematic solution for secure and efficient collaborative modeling in industrial load forecasting scenarios.</p>
</sec>
<sec id="s2-2">
<label>2.2</label>
<title>Meta-learning model based on autoformer</title>
<p>To combine Autoformer with meta-learning, this paper uses Autoformer as the backbone forecasting network <inline-formula id="inf5">
<mml:math id="m5">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>&#x3b8;</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> in the meta-learning framework, and treats each factory as an independent task. Specifically, the series decomposition and auto-correlation mechanism of Autoformer are retained to extract trend&#x2013;seasonal representations and capture long-term periodic dependencies, while the model parameters <inline-formula id="inf6">
<mml:math id="m6">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> of the entire Autoformer are optimized in a bi-level meta-learning manner. In each meta-training round, Autoformer first performs a few inner-loop updates on the support set of a factory to obtain task-adapted parameters <inline-formula id="inf7">
<mml:math id="m7">
<mml:mrow>
<mml:msubsup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>i</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>, and then computes the meta-gradient using the query set to evaluate post-adaptation performance. The server (or meta-learner) aggregates meta-gradients across factories to update the shared initialization <inline-formula id="inf8">
<mml:math id="m8">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, so that the learned Autoformer initialization can rapidly adapt to unseen factories or new operating conditions with only limited local data and a small number of gradient steps.</p>
<sec id="s2-2-1">
<label>2.2.1</label>
<title>Autoformer</title>
<p>To effectively capture the significant trend and cyclical characteristics in short-term industrial load sequences and enhance the model&#x2019;s adaptability in small-sample and new operating condition scenarios, this paper selects Autoformer as the base model in the meta-learning framework. By employing series decomposition and an Auto-Correlation Mechanism, Autoformer achieves efficient modeling of long-term dependencies (<xref ref-type="bibr" rid="B19">Ouyang et al., 2025</xref>; <xref ref-type="bibr" rid="B1">Ban et al., 2024</xref>), demonstrating stronger stability and generalization capabilities compared to traditional Transformer models in time series forecasting tasks. The structure of the Autoformer model is shown in <xref ref-type="fig" rid="F2">Figure 2</xref>.<list list-type="order">
<list-item>
<p>Series Decomposition Mechanism: Industrial load time series typically contain both long-term variation trends and cyclical fluctuations induced by production rhythms, shift schedules, and other factors, exhibiting pronounced non-stationary characteristics. Directly modeling the raw series requires the model to simultaneously learn patterns across different temporal scales, which not only increases modeling difficulty but also tends to lead to unstable training. To address this, Autoformer introduces a series decomposition mechanism during the encoding stage, explicitly separating the original load series into trend and seasonal components, thereby achieving decoupled modeling of the complex temporal structure. Specifically, the trend component captures the slowly evolving long-term characteristics of the load over time, such as adjustments in production scale or changes in equipment operating states; while the seasonal component reflects short-term fluctuation patterns caused by periodic production behaviors. By employing moving average operations to extract the trend component, high-frequency noise and short-term disturbances can be effectively filtered out, allowing the remaining seasonal component to concentrate more on expressing the cyclical structure. This decomposition process not only reduces the non-stationarity of the series but also enables the model to separately model long-term trends and short-term dynamics at different levels, thereby enhancing the stability and generalization capability of predictions. Given an industrial load time series <inline-formula id="inf9">
<mml:math id="m9">
<mml:mrow>
<mml:mi>X</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mfenced open="{" close="}" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>.</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>T</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>, Autoformer first decomposes it into trend and seasonal components, as shown in <xref ref-type="disp-formula" rid="e1">Equation 1</xref>. The trend component <inline-formula id="inf10">
<mml:math id="m10">
<mml:mrow>
<mml:msup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is obtained through a moving average operation, as presented in <xref ref-type="disp-formula" rid="e2">Equation 2</xref>. The seasonal component <inline-formula id="inf11">
<mml:math id="m11">
<mml:mrow>
<mml:msup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is calculated according to <xref ref-type="disp-formula" rid="e3">Equation 3</xref>. This decomposition approach helps reduce non-stationarity and enables the model to focus more on the cyclical structure and dynamic variation patterns.</p>
</list-item>
<list-item>
<p>Auto-Correlation Attention Mechanism: In industrial load forecasting tasks, current load values often exhibit significant correlations with loads at certain historical moments, and such correlations typically present periodic repetition characteristics, such as daily or weekly cycles. The self-attention mechanism of the traditional Transformer models these relationships through pairwise similarities among all time points, resulting in high computational complexity and difficulty in explicitly capturing periodic dependencies. To address this issue, Autoformer adopts an auto-correlation attention mechanism centered on delay correlations, which directly models the periodic structures in time series. This mechanism first computes the correlation degree of the seasonal component under different time delays, measuring the similarity of the sequence across varying time offsets. By selecting several delay terms with the highest correlation, the model can automatically identify the most significant cycle lengths in the sequence and incorporate corresponding historical information into the representation of the current moment through weighted aggregation. This delay-based aggregation approach avoids pairwise computations across all time points, not only reducing computational complexity but also better aligning with the inherent &#x201c;periodic repetition&#x201d; pattern of time series. During the encoding stage, Autoformer replaces the conventional self-attention with the auto-correlation mechanism, enabling explicit modeling of periodic dependencies in time series. Its core idea is to leverage delay correlations for capturing long-term dependencies in the sequence, as shown in <xref ref-type="disp-formula" rid="e4">Equation 4</xref>. Features are aggregated via weighted summation using the <italic>K</italic> time delays <inline-formula id="inf12">
<mml:math id="m12">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c4;</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> with the highest correlations, as shown in <xref ref-type="disp-formula" rid="e5">Equation 5</xref>.</p>
</list-item>
</list>
<disp-formula id="e1">
<mml:math id="m13">
<mml:mrow>
<mml:mi>X</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2b;</mml:mo>
<mml:msup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>
<disp-formula id="e2">
<mml:math id="m14">
<mml:mrow>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>t</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mo>&#x230a;</mml:mo>
<mml:mi>k</mml:mi>
<mml:mo>/</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>&#x230b;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mo>&#x230a;</mml:mo>
<mml:mi>k</mml:mi>
<mml:mo>/</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>&#x230b;</mml:mo>
</mml:mrow>
</mml:munderover>
</mml:mstyle>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>
<disp-formula id="e3">
<mml:math id="m15">
<mml:mrow>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>
<disp-formula id="e4">
<mml:math id="m16">
<mml:mrow>
<mml:mi mathvariant="script">R</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
</mml:munderover>
</mml:mstyle>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mi>t</mml:mi>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#xb7;</mml:mo>
<mml:msubsup>
<mml:mi>X</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>
<disp-formula id="e5">
<mml:math id="m17">
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>C</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>r</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>K</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b1;</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
<mml:mo>&#xb7;</mml:mo>
<mml:mi>R</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
<mml:mspace width="0.17em"/>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>X</mml:mi>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>&#x3c4;</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(5)</label>
</disp-formula>where, <inline-formula id="inf13">
<mml:math id="m18">
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> denotes the input industrial load time series with length <italic>T</italic>, and <inline-formula id="inf14">
<mml:math id="m19">
<mml:mrow>
<mml:msub>
<mml:mi>x</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the load value at time index <italic>t</italic>. <inline-formula id="inf15">
<mml:math id="m20">
<mml:mrow>
<mml:msup>
<mml:mi>X</mml:mi>
<mml:mtext>trend</mml:mtext>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf16">
<mml:math id="m21">
<mml:mrow>
<mml:msup>
<mml:mi>X</mml:mi>
<mml:mtext>season</mml:mtext>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> denote the trend and seasonal components, respectively. <italic>k</italic> denotes the moving-average window size, and <inline-formula id="inf17">
<mml:math id="m22">
<mml:mrow>
<mml:mo>&#x230a;</mml:mo>
<mml:mo>&#xb7;</mml:mo>
<mml:mo>&#x230b;</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> is the floor operator. <inline-formula id="inf18">
<mml:math id="m23">
<mml:mrow>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> denotes the time delay, and <inline-formula id="inf19">
<mml:math id="m24">
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>&#x3c4;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> denotes the delay correlation score computed on the seasonal component. <italic>K</italic> denotes the number of selected delays, and <inline-formula id="inf20">
<mml:math id="m25">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c4;</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> denotes the <italic>k</italic>-th delay among the top-<italic>K</italic> delays with the highest correlation. <inline-formula id="inf21">
<mml:math id="m26">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b1;</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> denotes the normalized correlation weight corresponding to <inline-formula id="inf22">
<mml:math id="m27">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c4;</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula id="inf23">
<mml:math id="m28">
<mml:mrow>
<mml:mi>R</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>l</mml:mi>
<mml:mi>l</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mo>&#xb7;</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> represents the sequence shift operation along the time dimension by <inline-formula id="inf24">
<mml:math id="m29">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c4;</mml:mi>
<mml:mi>k</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> steps. <inline-formula id="inf25">
<mml:math id="m30">
<mml:mrow>
<mml:mtext>AutoCorr</mml:mtext>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>X</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> denotes the aggregated representation obtained via the weighted summation of the shifted sequences.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>The structure of autoformer.</p>
</caption>
<graphic xlink:href="fenrg-14-1778611-g002.tif">
<alt-text content-type="machine-generated">Block diagram illustrating the Autoformer model for time series forecasting, showing encoder and decoder blocks with auto-correlation, series decomposition, feed forward layers, and initialization of seasonal and trend-cyclical components leading to prediction.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s2-2-2">
<label>2.2.2</label>
<title>Meta learning model</title>
<p>In cross-factory industrial short-term load forecasting, the load distributions of different factories exhibit significant heterogeneity due to factors such as production scale, process flow, and operational regimes. Directly applying a unified global model often struggles to simultaneously account for the distinct characteristics of each factory, and may even lead to negative transfer. To address this, this paper introduces a meta-learning mechanism, enabling the model to learn shared priors across multiple factory tasks and achieve rapid adaptation in scenarios with new operating conditions or new factories. The core of meta-learning lies in learning a well-transferable parameter initialization, allowing the model to quickly adapt to new tasks with only a small amount of local data and a few update steps (<xref ref-type="bibr" rid="B29">Yuan et al., 2026</xref>; <xref ref-type="bibr" rid="B27">Xiao et al., 2022</xref>).</p>
<p>Within the meta-learning framework, each factory is defined as a task <inline-formula id="inf26">
<mml:math id="m31">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, with its local dataset <inline-formula id="inf27">
<mml:math id="m32">
<mml:mrow>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> divided into a support set <inline-formula id="inf28">
<mml:math id="m33">
<mml:mrow>
<mml:msubsup>
<mml:mi>D</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>sup</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> and a query set <inline-formula id="inf29">
<mml:math id="m34">
<mml:mrow>
<mml:msubsup>
<mml:mi>D</mml:mi>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>q</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>. The model parameters <inline-formula id="inf30">
<mml:math id="m35">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> no longer aim solely for optimal fitting on all data but serve as an initialization point with good adaptability to task variations. Specifically, the model first undergoes rapid intra-task updates on the support set, as shown in <xref ref-type="disp-formula" rid="e6">Equation 6</xref>. This process simulates the model&#x2019;s quick adaptation behavior after receiving a small amount of new data. Subsequently, the updated model parameters <inline-formula id="inf31">
<mml:math id="m36">
<mml:mrow>
<mml:msup>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> are evaluated on the query set, and the meta-gradient relative to the initial parameters <inline-formula id="inf32">
<mml:math id="m37">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is computed, as shown in <xref ref-type="disp-formula" rid="e7">Equation 7</xref>. This meta-gradient captures the sensitivity of the model&#x2019;s generalization performance to changes in the initial parameters after intra-task updates, representing key information shared across tasks. After repeating the above process across multiple factory tasks, the global parameters are updated by aggregating the meta-gradients from each task, as shown in <xref ref-type="disp-formula" rid="e8">Equation 8</xref>. Through this update strategy, the model progressively learns a parameter initialization that enables it to achieve superior predictive performance with only a few gradient updates when faced with different factories or new operating conditions. Combining Autoformer&#x2019;s strengths in long-term dependency modeling and series decomposition, the meta-learning mechanism further enhances the model&#x2019;s generalization capability and adaptation efficiency in small-sample and highly heterogeneous industrial scenarios.<disp-formula id="e6">
<mml:math id="m38">
<mml:mrow>
<mml:msup>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3b1;</mml:mi>
<mml:msub>
<mml:mo>&#x2207;</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi mathvariant="script">L</mml:mi>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>&#x3b8;</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi>D</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>sup</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(6)</label>
</disp-formula>
<disp-formula id="e7">
<mml:math id="m39">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mo>&#x2207;</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi mathvariant="script">L</mml:mi>
<mml:msub>
<mml:mi mathvariant="script">T</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:msup>
<mml:msub>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi>D</mml:mi>
<mml:mi>i</mml:mi>
<mml:mrow>
<mml:mi>q</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>y</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(7)</label>
</disp-formula>
<disp-formula id="e8">
<mml:math id="m40">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x2190;</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3b2;</mml:mi>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="script">S</mml:mi>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:msub>
<mml:mi>&#x3c9;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(8)</label>
</disp-formula>where <inline-formula id="inf33">
<mml:math id="m41">
<mml:mrow>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> denotes the <italic>i</italic>-th factory task, <inline-formula id="inf34">
<mml:math id="m42">
<mml:mrow>
<mml:msubsup>
<mml:mi>D</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>sup</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf35">
<mml:math id="m43">
<mml:mrow>
<mml:msubsup>
<mml:mi>D</mml:mi>
<mml:mi>i</mml:mi>
<mml:mtext>qry</mml:mtext>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> denote the corresponding support set and query set, respectively. <inline-formula id="inf36">
<mml:math id="m44">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> denotes the global meta-parameters (initialization) before adaptation, <inline-formula id="inf37">
<mml:math id="m45">
<mml:mrow>
<mml:msub>
<mml:mi>f</mml:mi>
<mml:mi>&#x3b8;</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is the forecasting model parameterized by <inline-formula id="inf38">
<mml:math id="m46">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf39">
<mml:math id="m47">
<mml:mrow>
<mml:msubsup>
<mml:mi>&#x3b8;</mml:mi>
<mml:mi>i</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> denotes the task-adapted parameters after the inner-loop update on <inline-formula id="inf40">
<mml:math id="m48">
<mml:mrow>
<mml:msubsup>
<mml:mi>D</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>sup</mml:mi>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf41">
<mml:math id="m49">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="script">L</mml:mi>
<mml:msub>
<mml:mi>T</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mo>&#xb7;</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> represents the task-specific prediction loss function, <inline-formula id="inf42">
<mml:math id="m50">
<mml:mrow>
<mml:msub>
<mml:mo>&#x2207;</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> denotes the gradient operator with respect to <inline-formula id="inf43">
<mml:math id="m51">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf44">
<mml:math id="m52">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> denotes the meta-gradient computed from the query loss after adaptation, <inline-formula id="inf45">
<mml:math id="m53">
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> denotes the intra-task learning rate, <italic>S</italic> denotes the set (mini-batch) of sampled tasks in each meta-update, <inline-formula id="inf46">
<mml:math id="m54">
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> denotes the meta-learning rate, and <inline-formula id="inf47">
<mml:math id="m55">
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> denotes the task weight coefficient for aggregating the meta-gradients from different tasks.</p>
</sec>
</sec>
<sec id="s2-3">
<label>2.3</label>
<title>Federated learning model integrating differential privacy algorithm</title>
<p>During the federated collaborative forecasting process, although the original industrial load data remain locally stored, model gradients or parameter updates may still risk leaking sensitive production information of the factories. To further enhance the privacy and security of the system, this paper introduces a DP mechanism into the federated learning framework (<xref ref-type="bibr" rid="B20">Sarantinopoulos et al., 2025</xref>) and designs an adaptive privacy allocation strategy, aiming to ensure data security while minimizing the impact of privacy noise on model performance. The model structure is illustrated in <xref ref-type="fig" rid="F3">Figure 3</xref>. <xref ref-type="fig" rid="F3">Figure 3</xref> depicts a standard client&#x2013;server federated learning architecture augmented with client-side DP and server-side secure aggregation. Each factory trains the local forecasting model using its own on-site industrial load data. Before any update leaves the factory, the DP module processes the locally computed meta-gradient/parameter update: (i) it bounds the sensitivity of the update by clipping, ensuring that a single sample (or a small subset of samples) cannot dominate the update; (ii) it injects calibrated random noise into the clipped update, so that the shared information becomes statistically indistinguishable with respect to the presence/absence of any individual record, thereby reducing the risk of inferring sensitive production characteristics from communicated gradients. The resulting DP-protected update is then encrypted and transmitted to the aggregation server (Step 1), meaning that the server never receives raw data or unprotected gradients. At the server side, the aggregation server performs secure aggregation (Step 3), which ensures that only the aggregated update over multiple factories can be recovered, while individual client updates remain hidden. The server then computes the global model update based on the aggregated (DP-protected) information and sends the updated global parameters back to each factory (Step 2). Finally, each factory updates its local model with the received global parameters and continues the next local training round (Step 4). Therefore, the DP algorithm plays a critical role by providing a privacy-preserving &#x201c;filter&#x201d; on the client side&#x2014;transforming potentially sensitive gradients into protected shareable signals&#x2014;while secure aggregation prevents exposure of single-client updates at the server side. Together, they enable collaborative model training with &#x201c;data staying on-site&#x201d; and privacy-aware information exchange.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Structure of federated learning model integrating differential privacy algorithm.</p>
</caption>
<graphic xlink:href="fenrg-14-1778611-g003.tif">
<alt-text content-type="machine-generated">Diagram showing a federated learning process where multiple factories each apply differential privacy to local neural networks, send encrypted gradients to a central aggregation server for secure aggregation, and receive model updates, with steps labeled for encrypted gradients, model updates, secure aggregation, and updating models.</alt-text>
</graphic>
</fig>
<p>DP protects data by injecting random noise during the model update process, making it difficult for an attacker to determine whether a specific sample participated in the training (<xref ref-type="bibr" rid="B24">Wang and Kang, 2026</xref>). For any adjacent datasets <inline-formula id="inf48">
<mml:math id="m56">
<mml:mrow>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf49">
<mml:math id="m57">
<mml:mrow>
<mml:msup>
<mml:mi>D</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>, if a randomized mechanism <inline-formula id="inf50">
<mml:math id="m58">
<mml:mrow>
<mml:mi mathvariant="script">M</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mo>&#xb7;</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> satisfies <xref ref-type="disp-formula" rid="e9">Equation 9</xref>, then the mechanism is said to satisfy &#x3b5;-DP. In this paper, the DP mechanism is applied to the meta-gradients <inline-formula id="inf51">
<mml:math id="m59">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> uploaded by each factory to prevent the inference of local load data characteristics from gradients. To limit the impact of individual factory model updates on privacy leakage risk, the meta-gradients are first clipped by their <inline-formula id="inf52">
<mml:math id="m60">
<mml:mrow>
<mml:msub>
<mml:mi>L</mml:mi>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> norm, as shown in <xref ref-type="disp-formula" rid="e10">Equation 10</xref>. Subsequently, Gaussian noise is injected into the clipped gradients, as shown in <xref ref-type="disp-formula" rid="e11">Equation 11</xref>.</p>
<p>There are significant differences in data volume, data quality, and privacy sensitivity among different factories in federated learning. To avoid the issues of &#x201c;over-protection&#x201d; or &#x201c;privacy deficiency&#x201d; caused by a unified privacy budget, this paper proposes an adaptive privacy allocation strategy, assigning differentiated privacy budgets to each factory. First, a data contribution metric for factory <italic>i</italic> is defined, as shown in <xref ref-type="disp-formula" rid="e12">Equation 12</xref>. This metric reflects the importance of the factory&#x2019;s updates to the optimization of the global model. Simultaneously, a privacy sensitivity coefficient <inline-formula id="inf53">
<mml:math id="m61">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c1;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is introduced to characterize the factory&#x2019;s demand intensity for data privacy protection. Considering both contribution and privacy sensitivity, the privacy budget for factory <italic>i</italic> is allocated as shown in <xref ref-type="disp-formula" rid="e13">Equation 13</xref>. This design allows factories with higher contributions and lower privacy sensitivity to receive relatively relaxed privacy constraints, thereby reducing noise interference, while factories with higher privacy sensitivity automatically obtain stronger protection. Finally, on the server side, the differentially private meta-gradients are weighted and aggregated to update the global model parameters, as shown in <xref ref-type="disp-formula" rid="e14">Equation 14</xref>, where the update weight <inline-formula id="inf54">
<mml:math id="m62">
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> is related to the privacy budget, as shown in <xref ref-type="disp-formula" rid="e15">Equation 15</xref>. This mechanism further mitigates the adverse impact of high-noise updates on the global model, achieving a dynamic balance between privacy protection and predictive performance.<disp-formula id="e9">
<mml:math id="m63">
<mml:mrow>
<mml:mi>Pr</mml:mi>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="&#x7c;">
<mml:mrow>
<mml:mi mathvariant="script">M</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="script">S</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2264;</mml:mo>
<mml:msup>
<mml:mi>e</mml:mi>
<mml:mi>&#x3b5;</mml:mi>
</mml:msup>
<mml:mo>&#x2061;</mml:mo>
<mml:mi>Pr</mml:mi>
<mml:mrow>
<mml:mfenced open="[" close="]" separators="&#x7c;">
<mml:mrow>
<mml:mi mathvariant="script">M</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msup>
<mml:mi>D</mml:mi>
<mml:mo>&#x2032;</mml:mo>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="script">S</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(9)</label>
</disp-formula>
<disp-formula id="e10">
<mml:math id="m64">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:msub>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mi>max</mml:mi>
<mml:mspace width="0.17em"/>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mfenced open="&#x2016;" close="&#x2016;" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(10)</label>
</disp-formula>
<disp-formula id="e11">
<mml:math id="m65">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
<mml:mo>&#x223c;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mi mathvariant="script">N</mml:mi>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:msubsup>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>i</mml:mi>
<mml:mn>2</mml:mn>
</mml:msubsup>
<mml:msup>
<mml:mi>C</mml:mi>
<mml:mn>2</mml:mn>
</mml:msup>
<mml:mi>I</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(11)</label>
</disp-formula>
<disp-formula id="e12">
<mml:math id="m66">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b7;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:msub>
<mml:mrow>
<mml:mfenced open="&#x2016;" close="&#x2016;" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msub>
<mml:mrow>
<mml:msub>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="script">S</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mfenced open="&#x2016;" close="&#x2016;" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(12)</label>
</disp-formula>
<disp-formula id="e13">
<mml:math id="m67">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b5;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mi>&#x3b5;</mml:mi>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#xb7;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b7;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c1;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(13)</label>
</disp-formula>
<disp-formula id="e14">
<mml:math id="m68">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x2190;</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3b2;</mml:mi>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="script">S</mml:mi>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:msub>
<mml:mi>&#x3c9;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:msub>
<mml:mover accent="true">
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
<mml:mo>&#x223c;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(14)</label>
</disp-formula>
<disp-formula id="e15">
<mml:math id="m69">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c9;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:msub>
<mml:mi>&#x3b5;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:msub>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="script">S</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mstyle>
<mml:msub>
<mml:mi>&#x3c9;</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(15)</label>
</disp-formula>where <italic>&#x3b5;</italic> is the privacy budget, with smaller values indicating higher privacy protection strength. <italic>C</italic> is the clipping threshold, and <inline-formula id="inf55">
<mml:math id="m70">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3c3;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> denotes the noise intensity, which is inversely proportional to the privacy budget <inline-formula id="inf56">
<mml:math id="m71">
<mml:mrow>
<mml:msub>
<mml:mi>&#x3b5;</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> of factory <italic>i</italic>. <bold>
<italic>I</italic>
</bold> represent the identity matrix, whose dimensionality matches that of the meta-gradient vector <inline-formula id="inf57">
<mml:math id="m72">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi mathvariant="normal">&#x3a6;</mml:mi>
<mml:mo>&#xaf;</mml:mo>
</mml:mover>
<mml:mi>i</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</sec>
</sec>
<sec id="s3">
<label>3</label>
<title>Case study</title>
<sec id="s3-1">
<label>3.1</label>
<title>Data description and model configuration</title>
<p>This study considers six industrial users connected to a regional power grid in northern China, including five conventional industrial users and one newly connected industrial user. The newly connected user has a limited amount of historical load data and is mainly used to evaluate the rapid adaptation capability of the proposed federated meta-learning model under small-sample and new operating-condition scenarios. The time span of the load data for the industrial users ranges from 1.5 to 3 years, with installed capacities between 128 MW and 381 MW, as detailed in <xref ref-type="table" rid="T1">Table 1</xref>. In this study, the privacy protection level (High/Medium/Low) reflects the sensitivity of each factory&#x2019;s load data with respect to production confidentiality, and is implemented by allocating differentiated differential-privacy budgets via privacy-weight coefficients (High/Medium/Low: 0.5/0.3/0.2) under a fixed gradient clipping threshold of 1.0 and a noise multiplier of 0.6.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Overview of industrial user data.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">User ID</th>
<th align="center">User type</th>
<th align="left">Data time span</th>
<th align="left">Installed capacity (MW)</th>
<th align="left">Privacy level</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">F1</td>
<td align="left">Conventional user</td>
<td align="left">3 years</td>
<td align="left">310</td>
<td align="left">High</td>
</tr>
<tr>
<td align="left">F2</td>
<td align="left">Conventional user</td>
<td align="left">2.5 years</td>
<td align="left">128</td>
<td align="left">Medium</td>
</tr>
<tr>
<td align="left">F3</td>
<td align="left">Conventional user</td>
<td align="left">2 years</td>
<td align="left">258</td>
<td align="left">Medium</td>
</tr>
<tr>
<td align="left">F4</td>
<td align="left">Conventional user</td>
<td align="left">2 years</td>
<td align="left">219</td>
<td align="left">Low</td>
</tr>
<tr>
<td align="left">F5</td>
<td align="left">Conventional user</td>
<td align="left">1.5 years</td>
<td align="left">381</td>
<td align="left">Low</td>
</tr>
<tr>
<td align="left">F6</td>
<td align="left">Newly connected user</td>
<td align="left">1 month</td>
<td align="left">292</td>
<td align="left">High</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>To capture the temporal evolution characteristics of industrial loads, a short-term load forecasting task is constructed. The forecasting resolution is 15 min, and the prediction target is the active power load at the next time step. The model input features include historical load sequences, meteorological variables (e.g., ambient temperature and humidity), and timestamp features (such as time of day and day of week), which enhance the model&#x2019;s ability to learn periodic patterns and respond to external disturbances.</p>
<p>Considering the heterogeneity among industrial users in terms of data sensitivity and privacy requirements, users are categorized into three privacy protection levels&#x2014;high, medium, and low&#x2014;according to the confidentiality of their production-related load data. Correspondingly, differentiated privacy-preserving strategies are adopted in the federated learning process. For each user, the dataset is split into training, validation, and test sets with a ratio of 70%/15%/15%. During data preprocessing, outliers are removed, missing values are imputed, and all input features are normalized using Min&#x2013;Max scaling to ensure a consistent numerical range.</p>
<p>Model training is conducted in a unified computational environment, with an NVIDIA GPU as the hardware platform and Python-based deep learning frameworks as the software environment. The forecasting model adopts Autoformer as the base time-series prediction network, upon which a collaborative framework integrating federated learning and meta-learning is constructed. The hyperparameters of Autoformer, as well as those related to federated learning and meta-learning, follow default configurations that have been validated in classical literature to ensure fairness and reproducibility of model comparisons. The detailed parameter settings are summarized in <xref ref-type="table" rid="T2">Table 2</xref>.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Model architecture and training parameters.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Module</th>
<th align="left">Parameter</th>
<th align="left">Value</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="9" align="left">Autoformer</td>
<td align="left">Input sequence length</td>
<td align="left">96 (24 h with 15-min resolution)</td>
</tr>
<tr>
<td align="left">Forecast horizon</td>
<td align="left">96 (24 h with 15-min resolution)</td>
</tr>
<tr>
<td align="left">Encoder layers</td>
<td align="left">2</td>
</tr>
<tr>
<td align="left">Decoder layers</td>
<td align="left">1</td>
</tr>
<tr>
<td align="left">Model dimension</td>
<td align="left">512</td>
</tr>
<tr>
<td align="left">Number of heads</td>
<td align="left">8</td>
</tr>
<tr>
<td align="left">Feed-forward dimension</td>
<td align="left">2048</td>
</tr>
<tr>
<td align="left">Dropout</td>
<td align="left">0.1</td>
</tr>
<tr>
<td align="left">Activation function</td>
<td align="left">GELU</td>
</tr>
<tr>
<td rowspan="5" align="left">Training</td>
<td align="left">Optimizer</td>
<td align="left">Adam</td>
</tr>
<tr>
<td align="left">Initial learning rate</td>
<td align="left">0.0001</td>
</tr>
<tr>
<td align="left">Batch size</td>
<td align="left">32</td>
</tr>
<tr>
<td align="left">Epochs</td>
<td align="left">20</td>
</tr>
<tr>
<td align="left">Learning rate scheduler</td>
<td align="left">Cosine annealing</td>
</tr>
<tr>
<td rowspan="5" align="left">Federated learning</td>
<td align="left">Number of clients</td>
<td align="left">6</td>
</tr>
<tr>
<td align="left">Aggregation algorithm</td>
<td align="left">FedAvg</td>
</tr>
<tr>
<td align="left">Communication rounds</td>
<td align="left">50</td>
</tr>
<tr>
<td align="left">Local update epochs</td>
<td align="left">1 (per round)</td>
</tr>
<tr>
<td align="left">Client participation ratio</td>
<td align="left">1.0 (full participation)</td>
</tr>
<tr>
<td rowspan="4" align="left">Meta-learning</td>
<td align="left">Meta-learning algorithm</td>
<td align="left">Reptile</td>
</tr>
<tr>
<td align="left">Inner-loop steps</td>
<td align="left">5</td>
</tr>
<tr>
<td align="left">Inner-loop learning rate</td>
<td align="left">0.001</td>
</tr>
<tr>
<td align="left">Outer-loop learning rate</td>
<td align="left">0.0001</td>
</tr>
<tr>
<td rowspan="3" align="left">Differential privacy</td>
<td align="left">Gradient clipping threshold</td>
<td align="left">1.0</td>
</tr>
<tr>
<td align="left">Noise multiplier</td>
<td align="left">0.6</td>
</tr>
<tr>
<td align="left">Privacy budget weights</td>
<td align="left">High/Medium/Low: 0.5/0.3/0.2</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3-2">
<label>3.2</label>
<title>Evaluation metrics</title>
<p>To enable a fair and comparable evaluation of model performance in privacy-preserving cross-factory collaborative forecasting scenarios, three error metrics are adopted: mean absolute percentage error (MAPE), normalized root mean square error (NRMSE), and normalized mean absolute error (NMAE). Given the heterogeneity among factories in terms of load scale, production schedules, and data distributions, normalized and dimensionless metrics are employed to mitigate the dominant influence of capacity differences on evaluation results. Specifically, all error metrics are normalized by the installed capacity of each industrial user to ensure consistency and comparability across factories with different load scales. This allows a more objective assessment of the prediction accuracy improvements achieved by the federated meta-learning framework under the constraint of &#x201c;data available but not visible&#x201d;.</p>
<p>In practical evaluation, each factory computes the performance metrics locally on its own test set, and only the scalar metric values are uploaded for statistical aggregation. This design avoids the disclosure of raw load sequences or fine-grained information that could potentially be reverse-engineered, thereby ensuring privacy preservation throughout the evaluation process.</p>
<p>Let <inline-formula id="inf58">
<mml:math id="m73">
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> denote the number of samples in the test set, <inline-formula id="inf59">
<mml:math id="m74">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> the ground-truth load at time <inline-formula id="inf60">
<mml:math id="m75">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, <inline-formula id="inf61">
<mml:math id="m76">
<mml:mrow>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> the corresponding predicted load, and <inline-formula id="inf62">
<mml:math id="m77">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> the installed capacity of the user. The three metrics are defined as <xref ref-type="disp-formula" rid="e16">Equations 16</xref>&#x2013;<xref ref-type="disp-formula" rid="e18">18</xref>.<disp-formula id="e16">
<mml:math id="m78">
<mml:mrow>
<mml:mtext>MAPE</mml:mtext>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>100</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:munderover>
</mml:mstyle>
<mml:mrow>
<mml:mfenced open="&#x7c;" close="&#x7c;" separators="|">
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mfrac>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
</mml:math>
<label>(16)</label>
</disp-formula>
<disp-formula id="e17">
<mml:math id="m79">
<mml:mrow>
<mml:mtext>NRMSE</mml:mtext>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msqrt>
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:msubsup>
</mml:mstyle>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")" separators="&#x7c;">
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mn>2</mml:mn>
</mml:msup>
</mml:mrow>
</mml:msqrt>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(17)</label>
</disp-formula>
<disp-formula id="e18">
<mml:math id="m80">
<mml:mrow>
<mml:mtext>NMAE</mml:mtext>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:msubsup>
<mml:mo>&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>N</mml:mi>
</mml:msubsup>
</mml:mstyle>
<mml:mrow>
<mml:mfenced open="&#x7c;" close="&#x7c;" separators="|">
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>t</mml:mi>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mover accent="true">
<mml:mi>y</mml:mi>
<mml:mo>&#x5e;</mml:mo>
</mml:mover>
<mml:mi>t</mml:mi>
</mml:msub>
</mml:mfenced>
</mml:mrow>
</mml:mrow>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>c</mml:mi>
</mml:msub>
</mml:mfrac>
</mml:mrow>
</mml:math>
<label>(18)</label>
</disp-formula>
</p>
</sec>
<sec id="s3-3">
<label>3.3</label>
<title>Multi-user forecasting results and accuracy analysis</title>
<p>
<xref ref-type="table" rid="T3">Tables 3</xref>&#x2013;<xref ref-type="table" rid="T5">5</xref> report the comparative results of the proposed federated learning &#x2b; meta-learning collaborative framework and traditional independent forecasting on five industrial users (F1&#x2013;F5) under three error metrics: MAPE, NRMSE, and NMAE. As observed, the proposed framework consistently outperforms independent training across all three backbone models, with stable and significant accuracy gains.</p>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>MAPE comparison between the proposed method and independent forecasting (%).</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Method</th>
<th align="left">Model</th>
<th align="left">F1</th>
<th align="left">F2</th>
<th align="left">F3</th>
<th align="left">F4</th>
<th align="left">F5</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="3" align="left">FL &#x2b; ML</td>
<td align="left">Autoformer</td>
<td align="left">3.61</td>
<td align="left">3.74</td>
<td align="left">3.69</td>
<td align="left">3.93</td>
<td align="left">3.85</td>
</tr>
<tr>
<td align="left">Transformer</td>
<td align="left">3.78</td>
<td align="left">3.89</td>
<td align="left">3.84</td>
<td align="left">4.03</td>
<td align="left">3.89</td>
</tr>
<tr>
<td align="left">TimeMixer</td>
<td align="left">3.98</td>
<td align="left">4.19</td>
<td align="left">3.98</td>
<td align="left">4.08</td>
<td align="left">3.88</td>
</tr>
<tr>
<td rowspan="3" align="left">Independent</td>
<td align="left">Autoformer</td>
<td align="left">5.78</td>
<td align="left">5.66</td>
<td align="left">5.80</td>
<td align="left">5.56</td>
<td align="left">5.97</td>
</tr>
<tr>
<td align="left">Transformer</td>
<td align="left">4.87</td>
<td align="left">5.54</td>
<td align="left">5.21</td>
<td align="left">5.34</td>
<td align="left">5.19</td>
</tr>
<tr>
<td align="left">TimeMixer</td>
<td align="left">6.07</td>
<td align="left">6.01</td>
<td align="left">6.13</td>
<td align="left">6.09</td>
<td align="left">5.82</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>NRMSE comparison between the proposed method and independent forecasting (%).</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Method</th>
<th align="left">Model</th>
<th align="left">F1</th>
<th align="left">F2</th>
<th align="left">F3</th>
<th align="left">F4</th>
<th align="left">F5</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="3" align="left">FL &#x2b; ML</td>
<td align="left">Autoformer</td>
<td align="left">8.50</td>
<td align="left">11.70</td>
<td align="left">8.36</td>
<td align="left">14.63</td>
<td align="left">17.66</td>
</tr>
<tr>
<td align="left">Transformer</td>
<td align="left">9.01</td>
<td align="left">12.42</td>
<td align="left">8.52</td>
<td align="left">14.88</td>
<td align="left">16.18</td>
</tr>
<tr>
<td align="left">TimeMixer</td>
<td align="left">9.05</td>
<td align="left">13.57</td>
<td align="left">9.21</td>
<td align="left">14.37</td>
<td align="left">15.80</td>
</tr>
<tr>
<td rowspan="3" align="left">Independent</td>
<td align="left">Autoformer</td>
<td align="left">12.68</td>
<td align="left">16.74</td>
<td align="left">13.36</td>
<td align="left">18.61</td>
<td align="left">22.65</td>
</tr>
<tr>
<td align="left">Transformer</td>
<td align="left">10.88</td>
<td align="left">16.37</td>
<td align="left">11.57</td>
<td align="left">17.64</td>
<td align="left">19.94</td>
</tr>
<tr>
<td align="left">TimeMixer</td>
<td align="left">13.56</td>
<td align="left">17.39</td>
<td align="left">13.11</td>
<td align="left">19.52</td>
<td align="left">22.27</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T5" position="float">
<label>TABLE 5</label>
<caption>
<p>NMAE comparison between the proposed method and independent forecasting (%).</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Method</th>
<th align="left">Model</th>
<th align="left">F1</th>
<th align="left">F2</th>
<th align="left">F3</th>
<th align="left">F4</th>
<th align="left">F5</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="3" align="left">FL &#x2b; ML</td>
<td align="left">Autoformer</td>
<td align="left">6.30</td>
<td align="left">8.56</td>
<td align="left">6.20</td>
<td align="left">9.38</td>
<td align="left">11.25</td>
</tr>
<tr>
<td align="left">Transformer</td>
<td align="left">6.61</td>
<td align="left">8.88</td>
<td align="left">6.44</td>
<td align="left">9.63</td>
<td align="left">11.39</td>
</tr>
<tr>
<td align="left">TimeMixer</td>
<td align="left">7.00</td>
<td align="left">9.60</td>
<td align="left">6.73</td>
<td align="left">9.75</td>
<td align="left">11.35</td>
</tr>
<tr>
<td rowspan="3" align="left">Independent</td>
<td align="left">Autoformer</td>
<td align="left">10.14</td>
<td align="left">13.02</td>
<td align="left">9.73</td>
<td align="left">13.38</td>
<td align="left">17.57</td>
</tr>
<tr>
<td align="left">Transformer</td>
<td align="left">8.55</td>
<td align="left">12.66</td>
<td align="left">8.78</td>
<td align="left">12.78</td>
<td align="left">15.19</td>
</tr>
<tr>
<td align="left">TimeMixer</td>
<td align="left">10.67</td>
<td align="left">13.78</td>
<td align="left">10.28</td>
<td align="left">14.54</td>
<td align="left">17.02</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Taking Autoformer (which yields the lowest errors under the proposed FL &#x2b; ML setting) as an example, the average MAPE is reduced from 5.75% under independent forecasting to 3.76%, corresponding to an absolute reduction of 1.99 percentage points (with per-user reductions ranging from 1.63 to 2.17 percentage points). Similarly, NRMSE decreases from 16.81% to 12.17%, yielding an improvement of 4.64 percentage points (3.98&#x2013;5.04 percentage points across users), while NMAE is reduced from 12.77% to 8.34%, i.e., a decrease of 4.43 percentage points (3.53&#x2013;6.32 percentage points). Consistent gains are also observed for Transformer and TimeMixer when incorporating the proposed framework: relative to their independent counterparts, the average MAPE/NRMSE/NMAE decrease by 1.34/3.08/3.00 percentage points for Transformer and 2.00/4.77/4.37 percentage points for TimeMixer, respectively. These results demonstrate that under limited data availability and heterogeneous load patterns, federated collaboration improves global representation learning, while meta-learning further strengthens rapid cross-factory adaptation, leading to stable accuracy gains without sharing raw load data.</p>
<p>The multi-user forecasting curves shown in <xref ref-type="fig" rid="F4">Figure 4</xref> provide a direct visual comparison. For all five industrial users (F1&#x2013;F5), the proposed method (Proposed, red curves) exhibits the closest alignment with the ground truth (True, black curves). In critical transition periods&#x2014;such as the morning ramp-up from off-peak valleys to daytime loads and the subsequent decline&#x2014;the proposed method more accurately tracks trend changes, effectively mitigating the amplitude bias and phase lag commonly observed in independent forecasting models at peak and valley regions. Moreover, during periods of pronounced load volatility (e.g., around peak loads of F1, F3, and F5), the proposed method produces smoother trajectories while retaining responsiveness to sharp variations, avoiding the local overshoot or underestimation seen in independently trained Autoformer, Transformer, and TimeMixer models. This behavior reflects enhanced robustness.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Load forecasting curves for various industrial users.</p>
</caption>
<graphic xlink:href="fenrg-14-1778611-g004.tif">
<alt-text content-type="machine-generated">Grouped line charts compare predicted and actual electric load (MW) across five regions labeled F1 to F5, with time on the x-axis and multiple forecasting models shown in various colors for each subplot.</alt-text>
</graphic>
</fig>
<p>Further comparisons with hybrid baselines (e.g., ML &#x2b; FL &#x2b; Transformer and ML &#x2b; FL &#x2b; TimeMixer) indicate in <xref ref-type="fig" rid="F5">Figure 5</xref> that the proposed framework maintains a consistent advantage across users, confirming that the combination of federated knowledge sharing and meta-learning&#x2013;based rapid adaptation jointly improves the modeling of heterogeneous industrial load patterns, leading to more reliable short-term forecasting under privacy constraints.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Radar chart showing the performance of different methods across different users.</p>
</caption>
<graphic xlink:href="fenrg-14-1778611-g005.tif">
<alt-text content-type="machine-generated">Three radar charts compare six forecasting models&#x2014;Proposed, ML&#x2b;FL&#x2b;Timemixer, Transformer, ML&#x2b;FL&#x2b;Transformer, Autoformer, and Timemixer&#x2014;across metrics MAPE, NRMSE, and NMAE for features F1 to F5, with Timemixer consistently covering a larger area.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s3-4">
<label>3.4</label>
<title>Validation of rapid adaptation capability for newly connected users based on meta-learning</title>
<p>In the new-scenario, small-sample test for the newly connected user F6, <xref ref-type="table" rid="T6">Table 6</xref> compares the prediction errors of three approaches under different backbone models: the proposed federated learning &#x2b; meta-learning method (Proposed, ML &#x2b; FL), conventional federated learning (FL), and local modeling based solely on small samples. The results show that the proposed method achieves the best and most stable performance across all three metrics.</p>
<table-wrap id="T6" position="float">
<label>TABLE 6</label>
<caption>
<p>Load forecasting error table for F6 (%).</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Method</th>
<th align="center">Model</th>
<th align="left">MAPE</th>
<th align="left">NRMSE</th>
<th align="left">NMAE</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="3" align="center">Proposed (ML &#x2b; FL)</td>
<td align="left">Autoformer</td>
<td align="left">3.85</td>
<td align="left">9.36</td>
<td align="left">7.22</td>
</tr>
<tr>
<td align="left">Transformer</td>
<td align="left">3.80</td>
<td align="left">9.27</td>
<td align="left">7.20</td>
</tr>
<tr>
<td align="left">Timemixer</td>
<td align="left">3.92</td>
<td align="left">9.90</td>
<td align="left">7.42</td>
</tr>
<tr>
<td rowspan="3" align="center">Federated</td>
<td align="left">Autoformer</td>
<td align="left">5.93</td>
<td align="left">14.27</td>
<td align="left">11.21</td>
</tr>
<tr>
<td align="left">Transformer</td>
<td align="left">5.28</td>
<td align="left">13.13</td>
<td align="left">10.04</td>
</tr>
<tr>
<td align="left">Timemixer</td>
<td align="left">6.16</td>
<td align="left">14.94</td>
<td align="left">11.69</td>
</tr>
<tr>
<td rowspan="3" align="center">Independent (few samples)</td>
<td align="left">Autoformer</td>
<td align="left">6.48</td>
<td align="left">15.92</td>
<td align="left">12.37</td>
</tr>
<tr>
<td align="left">Transformer</td>
<td align="left">5.78</td>
<td align="left">13.69</td>
<td align="left">10.92</td>
</tr>
<tr>
<td align="left">Timemixer</td>
<td align="left">6.96</td>
<td align="left">16.64</td>
<td align="left">13.21</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Taking Autoformer as an example, the Proposed method achieves MAPE/NRMSE/NMAE values of 3.85%/9.36%/7.22%, respectively. Compared with conventional FL (5.93%/14.27%/11.21%), the errors are reduced by 2.08, 4.91, and 3.99 percentage points, respectively. Compared with local small-sample modeling (6.48%/15.92%/12.37%), the errors are further reduced by 2.63, 6.56, and 5.15 percentage points. Consistent conclusions are observed for Transformer and TimeMixer. Relative to conventional FL, MAPE is reduced by 1.48&#x2013;2.24 percentage points, NRMSE by 3.86&#x2013;5.04 percentage points, and NMAE by 2.84&#x2013;4.27 percentage points. Relative to independent few-sample modeling, the Proposed method reduces MAPE by 1.98&#x2013;3.04 percentage points, NRMSE by 4.42&#x2013;6.74 percentage points, and NMAE by 3.72&#x2013;5.79 percentage points. These results indicate that, after introducing meta-learning, the global model can leverage cross-factory collaborative knowledge to form a more transferable initialization, enabling rapid personalized adaptation under extremely limited data conditions for F6.</p>
<p>The load curve comparisons in <xref ref-type="fig" rid="F6">Figure 6</xref> further corroborate these findings. During multiple peak&#x2013;valley transitions and ramp-up/ramp-down phases of F6, the proposed method exhibits closer alignment with the ground-truth load curve. It not only tracks overall trend variations more accurately, but also effectively suppresses common issues observed in conventional FL and small-sample modeling, such as peak underestimation, valley shifts, and amplified local fluctuations. In particular, during periods of rapid load increase or sudden drop, the proposed method responds more promptly with reduced phase lag, demonstrating stronger short-term dynamic modeling capability. Combined with the quantitative results in <xref ref-type="table" rid="T6">Table 6</xref>, it can be concluded that under privacy constraints where raw load data are not shared, the proposed federated collaboration &#x2b; meta-learning&#x2013;based rapid adaptation framework effectively mitigates the performance degradation caused by cold-start issues in newly connected users, achieving efficient transfer learning and robust forecasting in new access scenarios.</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Load forecasting result curves of various prediction methods.</p>
</caption>
<graphic xlink:href="fenrg-14-1778611-g006.tif">
<alt-text content-type="machine-generated">Line chart compares predicted and true values of electricity load in megawatts across multiple forecasting models over time in fifteen-minute intervals. The proposed model closely follows the true load values.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s3-5">
<label>3.5</label>
<title>Federated learning&#x2013;based forecasting under differential privacy constraints</title>
<p>Under DP constraints, to validate the effectiveness of the proposed Differentiated Privacy Protection Weight (DP) strategy, it is compared with the Maximum Privacy Protection Weight (MP) scheme, in which the strictest noise level is imposed on all users. The results demonstrate that DP achieves superior accuracy across all users (F1&#x2013;F5) and backbone models, as shown in <xref ref-type="table" rid="T7">Table 7</xref>&#x2013;<xref ref-type="table" rid="T9">9</xref>.</p>
<table-wrap id="T7" position="float">
<label>TABLE 7</label>
<caption>
<p>MAPE of different prediction methods for each user (%).</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Method</th>
<th align="center">Model</th>
<th align="center">F1</th>
<th align="center">F2</th>
<th align="center">F3</th>
<th align="center">F4</th>
<th align="center">F5</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="3" align="center">Proposed (DP)</td>
<td align="center">Autoformer</td>
<td align="center">3.61</td>
<td align="center">3.74</td>
<td align="center">3.69</td>
<td align="center">3.93</td>
<td align="center">3.85</td>
</tr>
<tr>
<td align="center">Transformer</td>
<td align="center">3.78</td>
<td align="center">3.89</td>
<td align="center">3.84</td>
<td align="center">4.03</td>
<td align="center">3.89</td>
</tr>
<tr>
<td align="center">Timemixer</td>
<td align="center">3.98</td>
<td align="center">4.19</td>
<td align="center">3.98</td>
<td align="center">4.08</td>
<td align="center">3.88</td>
</tr>
<tr>
<td rowspan="3" align="center">MP</td>
<td align="center">Autoformer</td>
<td align="center">5.02</td>
<td align="center">4.95</td>
<td align="center">5.08</td>
<td align="center">5.06</td>
<td align="center">4.77</td>
</tr>
<tr>
<td align="center">Transformer</td>
<td align="center">4.44</td>
<td align="center">4.66</td>
<td align="center">4.76</td>
<td align="center">4.83</td>
<td align="center">4.70</td>
</tr>
<tr>
<td align="center">Timemixer</td>
<td align="center">4.46</td>
<td align="center">4.73</td>
<td align="center">4.48</td>
<td align="center">4.61</td>
<td align="center">4.38</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T8" position="float">
<label>TABLE 8</label>
<caption>
<p>NRMSE of different prediction methods for each user (%).</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Method</th>
<th align="center">Model</th>
<th align="center">F1</th>
<th align="center">F2</th>
<th align="center">F3</th>
<th align="center">F4</th>
<th align="center">F5</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="3" align="center">Proposed (DP)</td>
<td align="center">Autoformer</td>
<td align="center">8.50</td>
<td align="center">11.70</td>
<td align="center">8.36</td>
<td align="center">14.63</td>
<td align="center">17.66</td>
</tr>
<tr>
<td align="center">Transformer</td>
<td align="center">9.01</td>
<td align="center">12.42</td>
<td align="center">8.52</td>
<td align="center">14.88</td>
<td align="center">16.18</td>
</tr>
<tr>
<td align="center">Timemixer</td>
<td align="center">9.05</td>
<td align="center">13.57</td>
<td align="center">9.21</td>
<td align="center">14.37</td>
<td align="center">15.80</td>
</tr>
<tr>
<td rowspan="3" align="center">MP</td>
<td align="center">Autoformer</td>
<td align="center">11.12</td>
<td align="center">14.52</td>
<td align="center">10.90</td>
<td align="center">17.28</td>
<td align="center">19.52</td>
</tr>
<tr>
<td align="center">Transformer</td>
<td align="center">10.33</td>
<td align="center">14.40</td>
<td align="center">10.95</td>
<td align="center">17.31</td>
<td align="center">18.46</td>
</tr>
<tr>
<td align="center">Timemixer</td>
<td align="center">10.13</td>
<td align="center">15.20</td>
<td align="center">10.41</td>
<td align="center">15.92</td>
<td align="center">17.73</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T9" position="float">
<label>TABLE 9</label>
<caption>
<p>NMAE of different prediction methods for each user (%).</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Method</th>
<th align="center">Model</th>
<th align="center">F1</th>
<th align="center">F2</th>
<th align="center">F3</th>
<th align="center">F4</th>
<th align="center">F5</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="3" align="center">Proposed (DP)</td>
<td align="center">Autoformer</td>
<td align="center">6.30</td>
<td align="center">8.56</td>
<td align="center">6.20</td>
<td align="center">9.38</td>
<td align="center">11.25</td>
</tr>
<tr>
<td align="center">Transformer</td>
<td align="center">6.61</td>
<td align="center">8.88</td>
<td align="center">6.44</td>
<td align="center">9.63</td>
<td align="center">11.39</td>
</tr>
<tr>
<td align="center">Timemixer</td>
<td align="center">7.00</td>
<td align="center">9.60</td>
<td align="center">6.73</td>
<td align="center">9.75</td>
<td align="center">11.35</td>
</tr>
<tr>
<td rowspan="3" align="center">MP</td>
<td align="center">Autoformer</td>
<td align="center">8.82</td>
<td align="center">11.29</td>
<td align="center">8.55</td>
<td align="center">12.06</td>
<td align="center">13.89</td>
</tr>
<tr>
<td align="center">Transformer</td>
<td align="center">7.82</td>
<td align="center">10.66</td>
<td align="center">7.99</td>
<td align="center">11.52</td>
<td align="center">13.76</td>
</tr>
<tr>
<td align="center">Timemixer</td>
<td align="center">7.85</td>
<td align="center">10.84</td>
<td align="center">7.56</td>
<td align="center">11.01</td>
<td align="center">12.81</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Taking Autoformer as an example, the average MAPE under DP decreases from 4.976% (MP) to 3.764%, corresponding to a reduction of 1.21 percentage points (with per-user reductions ranging from 0.92 to 1.41 percentage points). The average NRMSE decreases from 14.668% to 12.170%, yielding an improvement of 2.50 percentage points, while the average NMAE decreases from 10.922% to 8.338%, corresponding to a reduction of 2.58 percentage points. Consistent trends are also observed for Transformer and TimeMixer: compared with MP, DP reduces MAPE by approximately 0.79 and 0.51 percentage points, NRMSE by about 2.09 and 1.48 percentage points, and NMAE by about 1.76 and 1.13 percentage points, respectively. These results indicate that differentiated noise allocation can effectively alleviate the accuracy degradation caused by uniformly strong noise, without compromising privacy protection requirements.</p>
<p>The error boxplots further confirm these findings from a distributional perspective in <xref ref-type="fig" rid="F7">Figure 7</xref>. Compared with the MP strategy, which exhibits higher median errors and wider interquartile ranges (indicating larger dispersion) for multiple users, the DP strategy produces boxplots that are overall closer to the low-error region, with smaller interquartile ranges and fewer extreme values, reflecting improved stability and robustness. This is mainly because MP homogenizes all users&#x2019; privacy requirements to the highest level, forcing most users to inject excessive noise and thereby diluting the effective information during federated aggregation. In contrast, DP configures noise intensity according to users&#x2019; privacy levels, providing stronger protection for highly sensitive users while avoiding unnecessary perturbation for less sensitive ones. As a result, a more favorable trade-off between privacy strength and prediction accuracy is achieved during global aggregation, leading to consistent error reduction and variance convergence across users.</p>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>Box plots of prediction errors for different methods across various users.</p>
</caption>
<graphic xlink:href="fenrg-14-1778611-g007.tif">
<alt-text content-type="machine-generated">Boxplot comparison of seven forecasting models across five datasets (F1 to F5) using three metrics: MAPE percentage, NRMSE percentage, and NMAE percentage. Models include Proposed, DP plus Transformer, DP plus Timemixer, MP plus Autoformer, MP plus Transformer, and MP plus Timemixer, each distinguished by color in the legend. Lower error metric values are visually favored by the Proposed model across all datasets and metrics.</alt-text>
</graphic>
</fig>
</sec>
</sec>
<sec sec-type="conclusion" id="s4">
<label>4</label>
<title>Conclusion</title>
<p>In short-term load forecasting for industrial users, prediction accuracy is often constrained by limited single-user data and complex load patterns, while cross-factory collaborative forecasting faces substantial privacy leakage risks due to the involvement of production-sensitive load data. To address these challenges, this paper proposes a privacy-preserving cross-scenario collaborative forecasting framework. Federated learning is employed to establish a secure collaborative training mechanism under the principle of &#x201c;data available but not visible.&#x201d; Meta-learning is further introduced to alleviate negative transfer caused by multi-factory heterogeneity and to enhance rapid adaptation capability. In addition, a differentiated noise-weight allocation strategy for DP budgets is designed to achieve an improved trade-off between prediction accuracy and privacy protection under diverse privacy requirements. Case studies demonstrate that the proposed framework exhibits consistent advantages across different users and backbone models.<list list-type="roman-lower">
<list-item>
<p>Compared with traditional independent training, the proposed federated learning &#x2b; meta-learning approach significantly improves forecasting accuracy. Taking Autoformer as an example, the average MAPE decreases from 5.754% to 3.764%, corresponding to a reduction of 1.99 percentage points.</p>
</list-item>
<list-item>
<p>The proposed method achieves rapid personalized adaptation with only a small amount of local data. For Autoformer, the MAPE for F6 is reduced to 3.85%, representing a decrease of 2.08 percentage points compared with conventional federated learning, and a decrease of 2.63 percentage points compared with local small-sample modeling.</p>
</list-item>
<list-item>
<p>The differentiated privacy noise-weight strategy effectively mitigates the accuracy degradation caused by uniformly strong noise when compared with the maximum privacy protection weight. For Autoformer, the average MAPE decreases from 4.976% to 3.764%, corresponding to a reduction of 1.21 percentage points.</p>
</list-item>
</list>
</p>
<p>In summary, the proposed approach provides a practical and deployable technical pathway for secure collaborative load forecasting for industrial users in power systems. By improving prediction accuracy and alleviating cold-start issues for newly connected users without exposing raw data, it offers more reliable load priors for day-ahead scheduling, load management and demand response, reserve capacity allocation, and operational risk control, thereby enhancing refined operation and coordinated control on the industrial load side.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s5">
<title>Data availability statement</title>
<p>The data analyzed in this study is subject to the following licenses/restrictions: The authors do not have permission to share data. Requests to access these datasets should be directed to Maomao Ding, <email>mmding8909_bjqr@163.com</email>.</p>
</sec>
<sec sec-type="author-contributions" id="s6">
<title>Author contributions</title>
<p>MD: Conceptualization, Writing &#x2013; review and editing, Writing &#x2013; original draft, Methodology. ZC: Investigation, Writing &#x2013; review and editing, Software, Writing &#x2013; original draft. BC: Formal Analysis, Writing &#x2013; original draft, Data curation. JH: Resources, Writing &#x2013; review and editing, Methodology, Investigation.</p>
</sec>
<sec sec-type="COI-statement" id="s8">
<title>Conflict of interest</title>
<p>Authors MD, ZC, and BC were employed by State Grid Corporation of China. Author JH was employed by Beijing Tsingsoft Technology Co., Ltd.</p>
</sec>
<sec sec-type="ai-statement" id="s9">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was used in the creation of this manuscript. During the preparation of this manuscript, the author(s) used ChatGPT-4o for the purpose of polishing the English writing and language. The authors have reviewed and edited the output and take full responsibility for the content of this publication.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="s10">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ban</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Xiong</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhuo</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>The univariate model for long-term wind speed forecasting based on wavelet soft threshold denoising and improved autoformer</article-title>. <source>Energy</source> <volume>290</volume>, <fpage>130225</fpage>. <pub-id pub-id-type="doi">10.1016/j.energy.2023.130225</pub-id>
</mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Biswal</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Deb</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Datta</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Ustun</surname>
<given-names>T. S.</given-names>
</name>
<name>
<surname>Cali</surname>
<given-names>U.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Review on smart grid load forecasting for smart energy management using machine learning and deep learning techniques</article-title>. <source>Energy Rep.</source> <volume>12</volume>, <fpage>3654</fpage>&#x2013;<lpage>3670</lpage>. <pub-id pub-id-type="doi">10.1016/j.egyr.2024.09.056</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Wei</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>PV output estimation method of power distribution station area based on federated learning framework and improved transformer neural network</article-title>. <source>Front. Energy Res.</source>, <fpage>12</fpage>&#x2013;<lpage>2024</lpage>. <pub-id pub-id-type="doi">10.3389/fenrg.2024.1349995</pub-id>
</mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Duan</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Xue</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Cao</surname>
<given-names>X.</given-names>
</name>
<etal/>
</person-group> (<year>2026</year>). <article-title>Multi-energy load forecasting incorporating AI algorithms: research status and trends in integrated energy systems</article-title>. <source>Renew. Sustain. Energy Rev.</source> <volume>229</volume>, <fpage>116611</fpage>. <pub-id pub-id-type="doi">10.1016/j.rser.2025.116611</pub-id>
</mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fan</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Nie</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Xiao</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Yi</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Short-term industrial load forecasting based on error correction and hybrid ensemble learning</article-title>. <source>Energy Build.</source> <volume>313</volume>, <fpage>114261</fpage>. <pub-id pub-id-type="doi">10.1016/j.enbuild.2024.114261</pub-id>
</mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hasan</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Mifta</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Papiya</surname>
<given-names>S. J.</given-names>
</name>
<name>
<surname>Roy</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Dey</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Salsabil</surname>
<given-names>N. A.</given-names>
</name>
<etal/>
</person-group> (<year>2025</year>). <article-title>A state-of-the-art comparative review of load forecasting methods: characteristics, perspectives, and applications</article-title>. <source>Energy Convers. Manag. X</source> <volume>26</volume>, <fpage>100922</fpage>. <pub-id pub-id-type="doi">10.1016/j.ecmx.2025.100922</pub-id>
</mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>He</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2025</year>). &#x201c;<article-title>Causal-oriented machine learning for industrial load forecasting in integrated energy systems</article-title>,&#x201d; in <source>2025 4th international conference on new energy System and power engineering (NESP)</source>, <fpage>525</fpage>&#x2013;<lpage>529</lpage>.</mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Man</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Energy consumption and carbon emissions forecasting for industrial processes: status, challenges and perspectives</article-title>. <source>Renew. Sustain. Energy Rev.</source> <volume>182</volume>, <fpage>113405</fpage>. <pub-id pub-id-type="doi">10.1016/j.rser.2023.113405</pub-id>
</mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Jing</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Load forecasting using federated learning with considering electricity data privacy preservation of EASP</article-title>. <source>Ain Shams Eng. J.</source> <volume>15</volume>, <fpage>102724</fpage>. <pub-id pub-id-type="doi">10.1016/j.asej.2024.102724</pub-id>
</mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Huang</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Ren</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Cai</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Multi-task learning and single-task learning joint multi-energy load forecasting of integrated energy systems considering meteorological variations</article-title>. <source>Expert Syst. Appl.</source> <volume>288</volume>, <fpage>128269</fpage>. <pub-id pub-id-type="doi">10.1016/j.eswa.2025.128269</pub-id>
</mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kaur</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Bedi</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Aggarwal</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Privacy preserving and computationally efficient federated learning approach for household load estimation</article-title>. <source>Sustain. Energy, Grids Netw.</source> <volume>44</volume>, <fpage>102021</fpage>. <pub-id pub-id-type="doi">10.1016/j.segan.2025.102021</pub-id>
</mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Cao</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Liao</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Lai</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Cui</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Decentralized asynchronous adaptive federated learning algorithm for securely prediction of distributed power data</article-title>. <source>Front. Energy Res.</source>, <fpage>11</fpage>&#x2013;<lpage>2023</lpage>. <pub-id pub-id-type="doi">10.3389/fenrg.2023.1340639</pub-id>
</mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Pan</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Zhao</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>A Shapley value-based dynamic ensemble framework for short-term load forecasting of industrial consumers</article-title>. <source>Int. J. Electr. Power and Energy Syst.</source> <volume>172</volume>, <fpage>111102</fpage>. <pub-id pub-id-type="doi">10.1016/j.ijepes.2025.111102</pub-id>
</mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liao</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Tan</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Su</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Sequence signal prediction and reconstruction for multi-energy load forecasting in integrated energy systems: a bi-level multi-task learning method</article-title>. <source>Energy</source> <volume>313</volume>, <fpage>133960</fpage>. <pub-id pub-id-type="doi">10.1016/j.energy.2024.133960</pub-id>
</mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Fan</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Review of multiple load forecasting method for integrated energy system</article-title>. <source>Front. Energy Res.</source>, <fpage>11</fpage>&#x2013;<lpage>2023</lpage>. <pub-id pub-id-type="doi">10.3389/fenrg.2023.1296800</pub-id>
</mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Guo</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>A novel correlation feature self-assigned Kolmogorov-Arnold Networks for multi-energy load forecasting in integrated energy systems</article-title>. <source>Energy Convers. Manag.</source> <volume>325</volume>, <fpage>119388</fpage>. <pub-id pub-id-type="doi">10.1016/j.enconman.2024.119388</pub-id>
</mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Majeske</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Vaidya</surname>
<given-names>S. S.</given-names>
</name>
<name>
<surname>Roy</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Rehman</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Sohrabpoor</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Miller</surname>
<given-names>T.</given-names>
</name>
<etal/>
</person-group> (<year>2025</year>). <article-title>Industrial energy forecasting using dynamic attention neural networks</article-title>. <source>Energy AI</source> <volume>20</volume>, <fpage>100504</fpage>. <pub-id pub-id-type="doi">10.1016/j.egyai.2025.100504</pub-id>
</mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Manzoor</surname>
<given-names>H. U.</given-names>
</name>
<name>
<surname>Hussain</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Flynn</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Zoha</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>Centralised vs. decentralised federated load forecasting in smart buildings: who holds the key to adversarial attack robustness?</article-title> <source>Energy Build.</source> <volume>324</volume>, <fpage>114871</fpage>. <pub-id pub-id-type="doi">10.1016/j.enbuild.2024.114871</pub-id>
</mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ouyang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Zuo</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Duan</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Seasonal distribution analysis and short-term PV power prediction method based on decomposition optimization Deep-Autoformer</article-title>. <source>Renew. Energy</source> <volume>246</volume>, <fpage>122903</fpage>. <pub-id pub-id-type="doi">10.1016/j.renene.2025.122903</pub-id>
</mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sarantinopoulos</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Michalakopoulos</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Sarmas</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Marinakis</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Toderean</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Cioara</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Meta-heuristic federated learning aggregation methods for load forecasting</article-title>. <source>Energy AI</source> <volume>22</volume>, <fpage>100594</fpage>. <pub-id pub-id-type="doi">10.1016/j.egyai.2025.100594</pub-id>
</mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shen</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2024</year>). <article-title>An efficient and secure privacy-preserving federated learning framework based on multiplicative double privacy masking</article-title>. <source>Comput. Mater. Continua</source> <volume>80</volume>, <fpage>4729</fpage>&#x2013;<lpage>4748</lpage>. <pub-id pub-id-type="doi">10.32604/cmc.2024.054434</pub-id>
</mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Walser</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Sauer</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Typical load profile-supported convolutional neural network for short-term load forecasting in the industrial sector</article-title>. <source>Energy AI</source> <volume>5</volume>, <fpage>100104</fpage>. <pub-id pub-id-type="doi">10.1016/j.egyai.2021.100104</pub-id>
</mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Walther</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Spanier</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Panten</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Abele</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Very short-term load forecasting on factory level &#x2013; a machine learning approach</article-title>. <source>Procedia CIRP</source> <volume>80</volume>, <fpage>705</fpage>&#x2013;<lpage>710</lpage>. <pub-id pub-id-type="doi">10.1016/j.procir.2019.01.060</pub-id>
</mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Kang</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2026</year>). <article-title>An enhanced method for hierarchical federated learning based on privacy metric and adaptive differential privacy</article-title>. <source>Neurocomputing</source> <volume>667</volume>, <fpage>132334</fpage>. <pub-id pub-id-type="doi">10.1016/j.neucom.2025.132334</pub-id>
</mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>A.-X.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>J.-J.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>A novel cloud-edge collaboration based short-term load forecasting method for smart grid</article-title>. <source>Front. Energy Res.</source>, <fpage>10</fpage>&#x2013;<lpage>2022</lpage>. <pub-id pub-id-type="doi">10.3389/fenrg.2022.977026</pub-id>
</mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wazirali</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Yaghoubi</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Abujazar</surname>
<given-names>M. S. S.</given-names>
</name>
<name>
<surname>Ahmad</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Vakili</surname>
<given-names>A. H.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>State-of-the-art review on energy and load forecasting in microgrids using artificial neural networks, machine learning, and deep learning techniques</article-title>. <source>Electr. Power Syst. Res.</source> <volume>225</volume>, <fpage>109792</fpage>. <pub-id pub-id-type="doi">10.1016/j.epsr.2023.109792</pub-id>
</mixed-citation>
</ref>
<ref id="B27">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xiao</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Mo</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Shan</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Meta-ANN &#x2013; a dynamic artificial neural network refined by meta-learning for Short-Term Load forecasting</article-title>. <source>Energy</source> <volume>246</volume>, <fpage>123418</fpage>. <pub-id pub-id-type="doi">10.1016/j.energy.2022.123418</pub-id>
</mixed-citation>
</ref>
<ref id="B28">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yu</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Lu</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Hong</surname>
<given-names>S. H.</given-names>
</name>
</person-group> (<year>2016</year>). <article-title>A real-time decision model for industrial load management in a smart grid</article-title>. <source>Appl. Energy</source> <volume>183</volume>, <fpage>1488</fpage>&#x2013;<lpage>1497</lpage>. <pub-id pub-id-type="doi">10.1016/j.apenergy.2016.09.021</pub-id>
</mixed-citation>
</ref>
<ref id="B29">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yuan</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Tang</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>G.-Y.</given-names>
</name>
<name>
<surname>Gan</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2026</year>). <article-title>Adaptive load forecasting under regional distribution shifts: a meta-learning framework</article-title>. <source>Eng. Appl. Artif. Intell.</source> <volume>164</volume>, <fpage>113104</fpage>. <pub-id pub-id-type="doi">10.1016/j.engappai.2025.113104</pub-id>
</mixed-citation>
</ref>
<ref id="B30">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Kong</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Short-term power load forecasting for industrial buildings based on decomposition reconstruction and TCN-Informer-BiGRU</article-title>. <source>Energy Build.</source> <volume>347</volume>, <fpage>116317</fpage>. <pub-id pub-id-type="doi">10.1016/j.enbuild.2025.116317</pub-id>
</mixed-citation>
</ref>
<ref id="B31">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>B.</given-names>
</name>
<etal/>
</person-group> (<year>2023</year>). <article-title>A day-ahead industrial load forecasting model using load change rate features and combining FA-ELM and the AdaBoost algorithm</article-title>. <source>Energy Rep.</source> <volume>9</volume>, <fpage>971</fpage>&#x2013;<lpage>981</lpage>. <pub-id pub-id-type="doi">10.1016/j.egyr.2022.12.044</pub-id>
</mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn fn-type="custom" custom-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1475556/overview">Jiaqi Shi</ext-link>, Shenyang Institute of Engineering, China</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1765721/overview">Xiang Zhang</ext-link>, North China Electric Power University, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3245203/overview">Hongyang Jin</ext-link>, Shenyang Institute of Engineering, China</p>
</fn>
</fn-group>
</back>
</article>